never executed always true always false
1 {-# LANGUAGE LambdaCase #-}
2 {-# LANGUAGE RecordWildCards #-}
3
4 {-|
5 Description : Brainfuck compilation to IR
6 Copyright : (c) Sebastian Galkin, 2018
7 License : GPL-3
8
9 In this module we:
10
11 - Convert 'Text' into a Brainfuck intermediate representation (IR) consisting of lists of 'Op's.
12 - Provide optimization rules to speed up IR execution.
13 - Parse compiler command line options
14 -}
15 module HBF.Compiler
16 ( module HBF.Compiler
17 -- * Reexport from "BFP.Parser"
18 , BFP.ParseError
19 ) where
20
21 import Control.Monad (when)
22 import Control.Monad.Trans.State.Strict (State, execState, get, modify, put)
23 import qualified Data.Binary as B
24 import Data.ByteString.Lazy (ByteString)
25 import Data.Coerce (coerce)
26 import Data.Foldable (traverse_)
27 import Data.Functor.Identity (Identity)
28 import Data.Maybe (fromMaybe)
29 import Data.Semigroup (Semigroup (..), (<>))
30 import Data.Text.Lazy (Text)
31 import qualified Data.Text.Lazy.IO as TIO
32 import Data.Tuple (swap)
33 import Options.Applicative (Parser, ParserInfo, ParserResult,
34 argument, defaultPrefs,
35 execParserPure, fullDesc,
36 handleParseResult, header, help,
37 helper, info, long, metavar, option,
38 optional, progDesc, short, str,
39 switch, (<**>))
40 import System.Environment (getArgs)
41 import System.FilePath ((-<.>))
42 import qualified Text.Parsec as Parsec
43 import Text.Parsec.Pos (initialPos)
44
45 import qualified HBF.Parser as BFP
46 import HBF.Types
47
48 -- * Compilation
49 -- | Encode the compiled file into the given path.
50 saveCompilerOutput :: Program Optimized -> FilePath -> IO ()
51 saveCompilerOutput = flip B.encodeFile . instructions
52
53 -- | Use the given 'CompilerOptions' to parse, compile and optimize the text representation of a
54 -- Brainfuck program into the IR. 'cOptsSource' and 'cOptsOut' in the compiler options are ignored.
55 inMemoryCompile ::
56 CompilerOptions
57 -> Text
58 -> Either BFP.ParseError (Program Optimized, CompilationSummary)
59 inMemoryCompile opts code =
60 (\p -> (p, summarizeCompilation p)) . optimize opts <$> BFP.parseProgram code
61
62 -- | Compilation summary for the user. It contains overview information and
63 -- statistics about the compilation result.
64 newtype CompilationSummary = CompilationSummary
65 { compNumInstructions :: Int
66 } deriving (Show)
67
68 -- | Summarize a compiled program creating the 'CompilationSummary'
69 summarizeCompilation :: Program Optimized -> CompilationSummary
70 summarizeCompilation = CompilationSummary . length . instructions
71
72 -- | Use 'CompilerOptions' to read, compile, optimize, and save a program from/to the filesystem.
73 -- Input and output files are provided by 'cOptsSource' and 'cOptsOut'.
74 compile :: CompilerOptions -> IO (Either BFP.ParseError CompilationSummary)
75 compile opts@CompilerOptions {..} = do
76 when cOptsVerbose $ do
77 putStrLn "Compiler options:"
78 print opts
79 compileResult <- inMemoryCompile opts <$> TIO.readFile cOptsSource
80 either
81 (return . Left)
82 (\p -> save p >> (return . Right . snd) p)
83 compileResult
84 where
85 outPath = fromMaybe (cOptsSource -<.> "bfc") cOptsOut
86 save (program, _) = saveCompilerOutput program outPath
87
88 -- | Apply optimizations to the 'Unoptimized' program turning. The optimizations that
89 -- will be available are the ones specified by the 'CompilerOptions' given.
90 optimize :: CompilerOptions -> Program Unoptimized -> Program Optimized
91 optimize CompilerOptions {..} p = foldl (flip ($)) base optimizations
92 where
93 base = toIR p
94 opt condition f =
95 if condition
96 then f
97 else id
98 optimizations =
99 [ opt cOptsClearLoopOptimization clearOpt
100 , opt cOptsMulOptimization mulOpt
101 , opt cOptsScanOptimization scanOpt
102 , opt cOptsOffsetInstructionsOptimization offsetInstructionOpt
103 , opt cOptsFusionOptimization fusionOpt
104 ]
105
106 -- | Given a parsed program, turn it into an optimized one, but with the null optimization.
107 -- Effectively this is only a type change.
108 toIR :: Program Unoptimized -> Program Optimized
109 toIR = coerce
110
111 -- * Optimization
112 -- | Helper type to apply the Fuse optimization using a 'Monoid'.
113 newtype FusedProgram = Fused
114 { unfused :: Program Optimized
115 } deriving (Show)
116
117 -- | This 'Semigroup' for 'FusedProgram' does all the fusion optimization work.
118 -- When two contiguous optimizations can be fused into one, '<>' will reduce the
119 -- size of the list in the 'FusedProgram'.
120 --
121 -- Examples of fusable operations:
122 --
123 -- - (Inc a offset) (Inc b offset) -> (Inc (a+b) offset)
124 -- - (Move 3 offset) (Move (-3) offset) -> NoOp
125 -- - (Clear offset) (Clear offset) -> Clear offset
126 -- - (Scan Up offset) (Scan _ offset') -> Scan Up offset
127 instance Semigroup FusedProgram where
128 Fused (Program p1) <> Fused (Program p2) = Fused $ Program $ fuse p1 p2
129 where
130 fuse :: [Op] -> [Op] -> [Op]
131 fuse [] ops = ops
132 fuse ops [] = ops
133 fuse [op1] (op2:more) = join op1 op2 ++ more
134 fuse (op1:more) ops2 = op1 : fuse more ops2
135 join :: Op -> Op -> [Op]
136 join (Inc a n) (Inc b m)
137 | n == m = ifNotZero (flip Inc n) $ a + b
138 join (Move a) (Move b) = ifNotZero Move $ a + b
139 join (In a n) (In b m)
140 | n == m = ifNotZero (flip In n) $ a + b
141 join (Out a n) (Out b m)
142 | n == m = ifNotZero (flip Out n) $ a + b
143 join (Clear n) (Clear m)
144 | n == m = [Clear n]
145 -- once a scan is found, another one won't move the pointer
146 join (Scan Up o1) (Scan _ o2)
147 | o1 == o2 = [Scan Up o1]
148 join (Scan Down o1) (Scan _ o2)
149 | o1 == o2 = [Scan Down o1]
150 join a b = [a, b]
151 ifNotZero f n = [f n | n /= 0]
152
153 -- | Use the 'Semigroup' instance and an empty program as 'mempty'.
154 instance Monoid FusedProgram where
155 mempty = Fused mempty
156 mappend = (<>)
157
158 -- | Apply the fusion optimization using the 'FusedProgram' 'Monoid' instance.
159 --
160 -- The fusion optimization consist of turning multiple instructions into one. For example
161 -- if the original Brainfuck code contains '++++', this would be parsed as
162 --
163 -- @
164 --'Program' ['Inc' 1 0, 'Inc' 1 0, 'Inc' 1 0, 'Inc' 1 0]
165 -- @
166 --
167 -- but it would be fused to a single IR instruction: @Inc 4 0@.
168 --
169 -- >>> fusionOpt $ Program [Inc 1 0, Inc 1 0, Inc 1 0, Inc 1 0]
170 -- [Inc 4 0]
171 --
172 -- Similarly, other instructions,
173 -- like 'Move', 'In', 'Out', 'Clear' and 'Scan' can be fused as long as the offset at which they
174 -- must be applied is the same.
175 --
176 -- Non fusable operation remain unchanged:
177 --
178 -- >>> fusionOpt $ Program [Inc 1 0, Inc 1 1]
179 -- [Inc 1 0,Inc 1 1]
180 fusionOpt :: Program Optimized -> Program Optimized
181 fusionOpt = unfused . foldMap (Fused . Program . optimizeIn) . instructions
182 where
183 optimizeIn (Loop as) = [Loop inner | not (null inner)]
184 where
185 inner = instructions $ fusionOpt $ Program as
186 optimizeIn other = [other]
187
188 -- | Helper function used to implement optimizations
189 -- Iterate over all 'Program' instructions searching for 'Loop's. For each 'Loop'
190 -- apply 'f'. If 'f' returns a list of new operations, replace the original loop with
191 -- the new instructions. If 'f' returns 'Nothing', process recursively the loop instructions.
192 liftLoop :: ([Op] -> Maybe [Op]) -> Program o -> Program o
193 liftLoop f = Program . (>>= g) . instructions
194 where
195 g :: Op -> [Op]
196 g (Loop ops) =
197 fromMaybe ((: []) . Loop . instructions . liftLoop f $ Program ops) $
198 f ops
199 g other = [other]
200
201 -- | Basic optimization that turns the loop @[-]@ into a single instruction 'Clear'.
202 -- Useful because clearing a memory position is a pretty common operation in Brainfuck and
203 -- very expensive if treated as a loop.
204 --
205 -- >>> :set -XOverloadedStrings
206 -- >>> Right (res, _) = inMemoryCompile defaultCompilerOptions "[-]"
207 -- >>> res
208 -- [Clear 0]
209 clearOpt :: Program Optimized -> Program Optimized
210 clearOpt = liftLoop onLoops
211 where
212 onLoops :: [Op] -> Maybe [Op]
213 onLoops [Inc (-1) 0] = Just [Clear 0]
214 onLoops _ = Nothing
215
216 -- | Copy and multiply optimization. A very common usage of loops is to copy the value of a memory
217 -- position to a different: @[->>+<<]@ this will move the contents of the current memory position
218 -- to places to the right, also clearing the original position to zero. If we change the number of @+@
219 -- operations we get multiplication, if we have several groups of @++..@ operations we get multiple copies.
220 -- In the general case, for example:
221 --
222 -- >>> :set -XOverloadedStrings
223 -- >>> Right (res, _) = inMemoryCompile defaultCompilerOptions "[->+>++>++++<<<]"
224 -- >>> res
225 -- [Mul 1 0 1,Mul 2 0 2,Mul 4 0 3,Clear 0]
226 --
227 -- The original Brainfuck copies the current position one place to the right, doubles
228 -- the current position two places to the right, and quadruples the current position three places to the right;
229 -- finally zeroing the current position. With the mul optimization in this function, all that loop would be
230 -- replaced by 4 instructions.
231 mulOpt :: Program Optimized -> Program Optimized
232 mulOpt = liftLoop onLoops
233 where
234 onLoops :: [Op] -> Maybe [Op]
235 onLoops ops = makeOp <$> eitherToMaybe (Parsec.parse mulP "" ops)
236 where
237 makeOp :: [(MulFactor, MemOffset)] -> [Op]
238 makeOp = (++ [Clear 0]) . snd . foldl it (0, [])
239 where
240 it (totalOff, res) (fact, off) =
241 (totalOff + off, res ++ [Mul fact 0 (off + totalOff)]) -- todo very inefficient foldr
242
243 -- | Implement the scan optimization. Another common operation in Brainfuck is to search for the first zero
244 -- in the neighboring memory, either to the right or to the left @[>]@ or @[<]@. These loops can be replaced
245 -- for a more optimal search, represented as a single @'Scan' 'Up'@ or @'Scan' 'Down'@ instruction.
246 --
247 -- >>> scanOpt $ Program [Loop [Move 1]]
248 -- [Scan Up 0]
249 scanOpt :: Program Optimized -> Program Optimized
250 scanOpt = liftLoop onLoops
251 where
252 onLoops :: [Op] -> Maybe [Op]
253 onLoops [Move 1] = Just [Scan Up 0]
254 onLoops [Move (-1)] = Just [Scan Down 0]
255 onLoops _ = Nothing
256
257 -- | Helper datastructure to implement a stateful transformation in 'offsetInstructionOpt'.
258 data OffsetState = OffSt
259 { stOptimized :: [Op] -- ^ The optimized program so far
260 , stBatch :: [Op] -- ^ The current batch of instructions being optimized (between loops)
261 , stOffset :: MemOffset -- ^ The current offset since the last loop
262 } deriving (Show)
263
264 -- | Start state for 'offsetInstructionOpt'.
265 emptyState :: OffsetState
266 emptyState = OffSt [] [] 0
267
268 -- | Implement the offset instruction optimization. This is probably the most complex
269 -- optimization implemented in the library.
270 --
271 -- In streams of instructions between loops, there is no need to keep updating the current position
272 -- if we can keep track of where the different operations should be applied. This is a trade-off
273 -- of time (not updating the pointer) by space (keeping track of the offset in every operation).
274 -- For example the following unoptimized code
275 --
276 --
277 -- >>> offsetInstructionOpt $ Program [Loop [], Move 1, Inc 1 0, Move 2, Clear 0, Mul 2 0 1, Loop []]
278 -- [Loop [],Inc 1 1,Clear 3,Mul 2 3 1,Move 3,Loop []]
279 --
280 -- And the optimization eliminated one 'Move' instruction. In general, for larger programs the gain
281 -- will be more noticeable.
282 --
283 -- An important detail to take into account is that 'Scan' operations break the stream of operations
284 -- that can be optimized together, and turn the accumulated offset back to zero:
285 --
286 -- >>> offsetInstructionOpt $ Program [Loop [], Move 1, Inc 1 0, Scan Up 0, Inc 0 2, Loop []]
287 -- [Loop [],Inc 1 1,Scan Up 1,Inc 0 2,Loop []]
288 offsetInstructionOpt :: Program Optimized -> Program Optimized
289 offsetInstructionOpt -- We implement this as a stateful computation for code clarity
290 =
291 Program .
292 stOptimized .
293 (`execState` emptyState) .
294 (*> finishLastBatch) . traverse_ processOp . instructions
295 where
296 processOp :: Op -> State OffsetState ()
297 processOp (Loop l) = do
298 let newLoop = Loop (instructions $ offsetInstructionOpt (Program l))
299 finishBatch
300 modify $ \s@OffSt {..} -> s {stOptimized = newLoop : stOptimized}
301 processOp (Move n) = get >>= \s -> put s {stOffset = stOffset s + n}
302 processOp (Inc n off) = add off (Inc n)
303 processOp (In n off) = add off (In n)
304 processOp (Out n off) = add off (Out n)
305 processOp (Clear off) = add off Clear
306 processOp (Mul factor from to) = add from (\o -> Mul factor o to)
307 processOp (Scan d off) = do
308 OffSt {..} <- get
309 put
310 OffSt
311 { stOffset = 0
312 , stOptimized = stOptimized
313 , stBatch = Scan d (off + stOffset) : stBatch
314 }
315 add :: MemOffset -> (MemOffset -> Op) -> State OffsetState ()
316 add off op =
317 get >>= \s@OffSt {..} -> put s {stBatch = op (off + stOffset) : stBatch}
318 finishBatch :: State OffsetState ()
319 finishBatch = do
320 s@OffSt {..} <- get
321 let batch =
322 if stOffset /= 0
323 then Move stOffset : stBatch
324 else stBatch
325 put s {stBatch = [], stOffset = 0, stOptimized = batch ++ stOptimized}
326 finishLastBatch :: State OffsetState ()
327 finishLastBatch = do
328 finishBatch
329 modify $ \s@OffSt {..} -> s {stOptimized = reverse stOptimized}
330
331 -- * Loading Compiled Code
332 -- | Load a compiled program from 'saveCompilerOutput' output.
333 load :: ByteString -> Program Optimized
334 load = B.decode
335
336 -- | Load a compiled program saved with 'saveCompilerOutput'.
337 loadFile :: FilePath -> IO (Program Optimized)
338 loadFile = B.decodeFile
339
340 -- * Compiler Flags
341 -- | Command line flags to the Brainfuck compiler
342 data CompilerOptions = CompilerOptions
343 { cOptsOut :: Maybe FilePath -- ^ Where to put the compiled output, if 'Nothing' use the input basename with bfc extension
344 , cOptsFusionOptimization :: Bool -- ^ Enable fusion optimization
345 , cOptsClearLoopOptimization :: Bool -- ^ Enable clear loop optimization
346 , cOptsMulOptimization :: Bool -- ^ Enable mul loop optimization
347 , cOptsScanOptimization :: Bool -- ^ Enable scan loop optimization
348 , cOptsOffsetInstructionsOptimization :: Bool -- ^ Enable offset instructions optimization
349 , cOptsVerbose :: Bool -- ^ Output more debugging information
350 , cOptsSource :: FilePath -- ^ Input source to the compiler, this should be Brainfuck code
351 } deriving (Show)
352
353 optionsP :: Parser CompilerOptions
354 optionsP =
355 (\output disableAll fusion clear mul scan offset verbose source ->
356 CompilerOptions
357 { cOptsOut = output
358 , cOptsFusionOptimization = not disableAll || fusion
359 , cOptsClearLoopOptimization = not disableAll || clear
360 , cOptsMulOptimization = not disableAll || mul
361 , cOptsScanOptimization = not disableAll || scan
362 , cOptsOffsetInstructionsOptimization = not disableAll || offset
363 , cOptsVerbose = verbose
364 , cOptsSource = source
365 }) <$>
366 optional
367 (option
368 str
369 (long "output" <> short 'o' <> metavar "OUT" <>
370 help "Compiled output path")) <*>
371 switch
372 (long "disable-all-optimizations" <> short 'd' <>
373 help "Disable all optimizations") <*>
374 switch
375 (long "fusion" <>
376 help
377 "Reenable fusion optimization (turn multiple + or > into a single operation)") <*>
378 switch
379 (long "clear" <>
380 help "Reenable clear loop optimization (turn [-] into a single operation)") <*>
381 switch
382 (long "mul" <>
383 help
384 "Reenable mul loop optimization (turn [->++>+++<<] into [Mul(1, 2) Mul(2,3)] Clear operations)") <*>
385 switch
386 (long "scan" <>
387 help "Reenable scan loop optimization (turn [>] into ScanR operation)") <*>
388 switch
389 (long "offset" <>
390 help
391 "Reenable offset instructions optimization (turn >>+>->> into Inc 1 2, Inc (-1) 1, Move 1, Move 1, Move 1, Move 1, Move 1, operation)") <*>
392 switch
393 (long "verbose" <> short 'v' <> help "Output more debugging information") <*>
394 argument str (metavar "SRC" <> help "Input source code file")
395
396 options :: ParserInfo CompilerOptions
397 options =
398 info
399 (optionsP <**> helper)
400 (fullDesc <> progDesc "Compile Brainfuck code in SRC file" <>
401 header "An optimizing Brainfuck compiler and evaluator")
402
403 -- | Default compiler options: all optimizations, not verbose, no input or output files.
404 defaultCompilerOptions :: CompilerOptions
405 defaultCompilerOptions =
406 CompilerOptions
407 { cOptsOut = Nothing
408 , cOptsFusionOptimization = True
409 , cOptsClearLoopOptimization = True
410 , cOptsMulOptimization = True
411 , cOptsScanOptimization = True
412 , cOptsOffsetInstructionsOptimization = True
413 , cOptsVerbose = False
414 , cOptsSource = ""
415 }
416
417 -- | Compiler options: all optimizations off.
418 noOptimizationCompilerOptions :: CompilerOptions
419 noOptimizationCompilerOptions =
420 CompilerOptions
421 { cOptsOut = Nothing
422 , cOptsFusionOptimization = False
423 , cOptsClearLoopOptimization = False
424 , cOptsMulOptimization = False
425 , cOptsScanOptimization = False
426 , cOptsOffsetInstructionsOptimization = False
427 , cOptsVerbose = False
428 , cOptsSource = ""
429 }
430
431 -- | Parse a list of command line arguments
432 parsePure :: [String] -> ParserResult CompilerOptions
433 parsePure = execParserPure defaultPrefs options
434
435 -- | Parse a list of command line arguments printing errors to the stderr
436 unsafeParse :: [String] -> IO CompilerOptions
437 unsafeParse = handleParseResult . parsePure
438
439 -- | Parse command line arguments printing errors to the stderr
440 parse :: IO CompilerOptions
441 parse = getArgs >>= unsafeParse
442
443 ----------------------- implementation details ----------------------
444 -- * Implementation Detail: Parsing Lists of Instructions
445 -- | This parser is used to implement the mul optimization. See 'mulOpt'.
446 type ProgramParser a = Parsec.ParsecT [Op] () Identity a
447
448 -- | Parse successfully if the token satisfies the predicate.
449 satisfy' :: Show t => (t -> Bool) -> Parsec.ParsecT [t] () Identity t
450 satisfy' predicate = Parsec.token showTok posFromTok testTok
451 where
452 showTok t = show t
453 posFromTok _ = initialPos ""
454 testTok t =
455 if predicate t
456 then Just t
457 else Nothing
458
459 -- | Parse movement to the right (\>), returning the offset value.
460 --
461 -- >>> Parsec.parse mrightP "" [Move 3]
462 -- Right 3
463 --
464 -- >>> Data.Either.isLeft $ Parsec.parse mrightP "" [Move (-1)]
465 -- True
466 mrightP :: ProgramParser MemOffset
467 mrightP =
468 satisfy' isRight <&> \case
469 Move n -> n
470 _ -> undefined
471
472 -- | Parsemovement to the left (\<), returning the offset value.
473 --
474 -- >>> Parsec.parse mleftP "" [Move (-3)]
475 -- Right 3
476 --
477 -- >>> Data.Either.isLeft $ Parsec.parse mleftP "" [Move 1]
478 -- True
479 mleftP :: ProgramParser MemOffset
480 mleftP =
481 satisfy' isLeft <&> \case
482 Move n -> (negate n)
483 _ -> undefined
484
485 -- | Parse increment, returning total increment.
486 --
487 -- >>> Parsec.parse plusP "" [Inc 3 0]
488 -- Right 3
489 --
490 -- >>> Data.Either.isLeft $ Parsec.parse plusP "" [Inc (-2) 0]
491 -- True
492 plusP :: ProgramParser Int
493 plusP =
494 satisfy' isPlus <&> \case
495 Inc n 0 -> n
496 _ -> undefined
497
498 -- | Parse decrement, returning total decrement.
499 --
500 -- >>> Parsec.parse minusP "" [Inc (-3) 0]
501 -- Right 3
502 --
503 -- >>> Data.Either.isLeft $ Parsec.parse minusP "" [Inc 2 0]
504 -- True
505 minusP :: ProgramParser Int
506 minusP =
507 satisfy' isMinus <&> \case
508 Inc n 0 -> (negate n)
509 _ -> undefined
510
511 -- | Sum the result of a parser applied repeatedly
512 --
513 -- >>> Parsec.parse (summedP plusP) "" [Inc 3 0, Inc 1 0, Inc (-4) 0]
514 -- Right 4
515 summedP :: Num n => ProgramParser n -> ProgramParser n
516 summedP = fmap sum . Parsec.many1
517
518 -- | Full multiple copy/multiply operation parser. Returns the set of factors and relative, incremental offsets.
519 --
520 -- >>> Parsec.parse mulP "" [Inc (-1) 0, Move 1, Inc 2 0, Move 3, Inc 1 0, Move (-4)]
521 -- Right [(2,1),(1,3)]
522 mulP :: ProgramParser [(MulFactor, MemOffset)]
523 mulP = do
524 _ <- minusP
525 copies <- Parsec.many1 shiftFactorP
526 let totalShift = sum $ map fst copies
527 back <- summedP mleftP
528 Parsec.eof
529 if back == coerce totalShift
530 then return (fmap swap copies)
531 else Parsec.unexpected "number of left returns to close the loop"
532 where
533 shiftFactorP = (,) <$> summedP mrightP <*> fmap MulFactor (summedP plusP)
534
535 -- | Is the instruction a right movement?
536 isRight :: Op -> Bool
537 isRight (Move n)
538 | n > 0 = True
539 isRight _ = False
540
541 -- | Is the instruction a left movement?
542 isLeft :: Op -> Bool
543 isLeft (Move n)
544 | n < 0 = True
545 isLeft _ = False
546
547 -- | Is the instruction an increment?
548 isPlus :: Op -> Bool
549 isPlus (Inc n 0)
550 | n > 0 = True
551 isPlus _ = False
552
553 -- | Is the instruction a decrement?
554 isMinus :: Op -> Bool
555 isMinus (Inc n 0)
556 | n < 0 = True
557 isMinus _ = False