From ef88eb173fc87bfe1b87be533e4f574209d40b1d Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Tue, 26 Feb 2019 11:05:22 +0100 Subject: Initial import Disassembler seems to be working. --- .gitignore | 9 ++ LICENSE | 19 +++ README.rst | 49 +++++++ bower.json | 23 ++++ src/Asm.purs | 46 +++++++ src/Decode.purs | 341 ++++++++++++++++++++++++++++++++++++++++++++++++ src/InstructionSet.purs | 326 +++++++++++++++++++++++++++++++++++++++++++++ src/Machine.purs | 27 ++++ src/Main.purs | 99 ++++++++++++++ 9 files changed, 939 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.rst create mode 100644 bower.json create mode 100644 src/Asm.purs create mode 100644 src/Decode.purs create mode 100644 src/InstructionSet.purs create mode 100644 src/Machine.purs create mode 100644 src/Main.purs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..20e090a --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +/bower_components/ +/node_modules/ +/.pulp-cache/ +/output/ +/generated-docs/ +/.psc-package/ +/.psc* +/.purs* +/.psa* diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..cb69c84 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2019 EUMuLator contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..ab06b95 --- /dev/null +++ b/README.rst @@ -0,0 +1,49 @@ +EUMuLator +========= + +EUMuLator is a reimplementation of the EUMEL0 machine in PureScript_. +Currently it is compatible with the EUMEL0 BIT-A-encoded instruction set as of +version 1.8. For an introduction to the EUMEL operating system, see `this +page `__. + +.. Yes, this is my first PureScript project. + +.. _PureScript: http://www.purescript.org/ +.. _EUMuLator: https://github.com/promyloph/eumulator + +Project status +-------------- + +Works: + +- Disassembler + +Work-in-progress: + +- EUMEL0 virtual machine implementation + +Future work: + +- Browser intergation + +Usage +----- + +First `install PureScript`_. Then you’ll need a *Hintergrund* from EUMEL +version 1.8. Currently only `the base disk set’s Hintergrund version 1.8.7 +`__ is extractable by the script +``extractHintergrund.py``, found in the `tools repository`_. Extract the +dataspaces and run the disassembler: + +.. _tools repository: https://github.com/PromyLOPh/eumel-tools +.. _install PureScript: https://github.com/purescript/documentation/blob/master/guides/Getting-Started.md + +.. code:: bash + + linearizeDisk.py 03_eumel0.img 03_eumel0.img.linear + extractHintergrund.py 03_eumel0.img.linear + pulp run -- 0002_0004.ds + +Note that running the disassembler makes sense only for dataspace four, the +default dataspace, which contains each task’s code section. + diff --git a/bower.json b/bower.json new file mode 100644 index 0000000..154c223 --- /dev/null +++ b/bower.json @@ -0,0 +1,23 @@ +{ + "name": "EUMuLator", + "ignore": [ + "**/.*", + "node_modules", + "bower_components", + "output" + ], + "dependencies": { + "purescript-prelude": "^4.1.0", + "purescript-console": "^4.2.0", + "purescript-effect": "^2.0.1", + "purescript-node-process": "^6.0.0", + "purescript-node-fs": "^5.0.0", + "purescript-arrays": "^5.2.0", + "purescript-uint": "^5.1.0", + "purescript-arraybuffer": "^10.0.0", + "purescript-stringutils": "^0.0.8" + }, + "devDependencies": { + "purescript-psci-support": "^4.0.0" + } +} diff --git a/src/Asm.purs b/src/Asm.purs new file mode 100644 index 0000000..fcb60ab --- /dev/null +++ b/src/Asm.purs @@ -0,0 +1,46 @@ +-- Assembly/disassembly + +module Asm (toAsm) where + +import Prelude ((<>), show) +import Data.List +import Data.Int + +import InstructionSet + +-- | Convert Instruction to assembly mnemonic string + arguments +toAsm :: Instruction -> List String +toAsm (Instruction0 op) = show op:Nil +toAsm (InstructionI op imm) = show op:immediate imm:Nil +toAsm (InstructionC op code) = show op:codeAddress code:Nil +toAsm (InstructionM op m) = show op:modno m:Nil +toAsm (InstructionD op d) = show op:dataAddress d:Nil +toAsm (InstructionDD op d1 d2) = show op:dataAddress d1:dataAddress d2:Nil +toAsm (InstructionID op imm d) = show op:immediate imm:dataAddress d:Nil +toAsm (InstructionDI op d imm) = show op:dataAddress d:immediate imm:Nil +toAsm (InstructionDDD op d1 d2 d3) = show op:dataAddress d1:dataAddress d2:dataAddress d3:Nil +toAsm (InstructionDID op d1 imm d2) = show op:dataAddress d1:immediate imm:dataAddress d2:Nil +toAsm (InstructionIDD op imm d1 d2) = show op:immediate imm:dataAddress d1:dataAddress d2:Nil +toAsm (InstructionIIDD op i1 i2 d1 d2) = show op:immediate i1:immediate i2:dataAddress d1:dataAddress d2:Nil +toAsm (InstructionDDDD op d1 d2 d3 d4) = show op:dataAddress d1:dataAddress d2:dataAddress d3:dataAddress d4:Nil +toAsm (InstructionDDDC op d1 d2 d3 c) = show op:dataAddress d1:dataAddress d2:dataAddress d3:codeAddress c:Nil +toAsm (InstructionIIDDD op i1 i2 d1 d2 d3) = show op:immediate i1:immediate i2:dataAddress d1:dataAddress d2:dataAddress d3:Nil +toAsm (InstructionDDDDD op d1 d2 d3 d4 d5) = show op:dataAddress d1:dataAddress d2:dataAddress d3:dataAddress d4:dataAddress d5:Nil +toAsm (InstructionDDDDDDD op d1 d2 d3 d4 d5 d6 d7) = show op:dataAddress d1:dataAddress d2:dataAddress d3:dataAddress d4:dataAddress d5:dataAddress d6:dataAddress d7:Nil + +dataAddress :: DataAddress -> String +dataAddress (GlobalDataAddress a) = "*" <> (hex a) +dataAddress (LocalDataAddress a) = "=" <> (hex a) +dataAddress (LocalDataAddressRef a) = "<=" <> (hex a) <> ">" + +codeAddress a = "$" <> (hex a) + +immediate :: Immediate -> String +immediate v = "#" <> hex v + +modno :: Module -> String +modno v = "&" <> hex v + +hex :: Int -> String +hex v = toStringAs hexadecimal v <> "h" + diff --git a/src/Decode.purs b/src/Decode.purs new file mode 100644 index 0000000..c4198f8 --- /dev/null +++ b/src/Decode.purs @@ -0,0 +1,341 @@ +-- Decode BIT-A encoded EUMEL0 instuction set. +-- +-- library/entwurf-systemdokumentation-1982.djvu documents the basics of this +-- simple, word-based, variable-length encoding. +-- +-- Some instructions are not documented in the system documentation or differ +-- from the documentation there. Apparently the instruction set was changed +-- starting with EUMEL 1.7.5.4 (see comment in +-- src/devel/debugger/src/DEBUGGER.ELA). For the new/changed instructions see +-- src/devel/misc/unknown/src/0DISASS.ELA and +-- src/devel/debugger/doc/DEBUGGER.PRT + +module Decode (decode, DecodeError(..), Opcode, Word8) where + +import Prelude hiding (apply) +import Data.Tuple +import Data.Int.Bits +import Data.List +import Data.Either +import Data.Function (apply) + +import InstructionSet + +-- There is a Data.Word module, but it is outdated, so we cannot use that +type Word4 = Int +type Word8 = Int +type Word16 = Int +type Opcode = Word8 + +-- Number of words consumed by decoding +type Consumed = Int +type DecodeResult = Either DecodeError (Tuple Instruction Consumed) + +data DecodeError = + -- Expected number of words + InputTooShort Int + | UnknownOpcode Opcode + +-- | Decode arbitrary EUMEL0 instruction from a list of Word16, returns instruction and number of word16 consumed +-- XXX: would be nice to use generics here and automate argument parsing (since every argument can only be 16 bit) +decode :: List Word16 -> DecodeResult +decode (first:xs) = decode' high low xs + where + high = and (shr first 8) 0xff + low = and first 0xff +decode xs = makeInputTooShort 0 + +-- | Decode instruction with first Word16 split into two Word8. Helper for real decode. +decode' :: Word8 -> Word8 -> List Word16 -> DecodeResult +-- special instructions +decode' 0x7c v xs = makeInstID EQUIM (v:xs) +decode' 0xfc v xs = makeInstID MOVI (v:xs) +decode' 0x7d v xs = makeInstIDD MOVX (v:xs) +-- two args from opcode word + 2 additional args = 3 words, not 4 → decLen +decode' 0xfd v xs = decLen <$> makeInstIIDD PUTW (high:low:xs) where Tuple high low = getNibbles v -- XXX: high:low or low:high? +decode' 0x7e v xs = decLen <$> makeInstIIDD GETW (high:low:xs) where Tuple high low = getNibbles v +decode' 0xfe v xs = makeInstI PENTER (v:xs) +-- incLen, so escape code + opcode are consumed in addition to args +decode' 0x7f opcode xs = incLen <$> decodeSecondary opcode xs +-- long primary instruction with FFh prefix. this works iff every primary +-- instruction has at least one argument (they do) +decode' 0xff opcode xs = incLen <$> decodePrimary opcode xs +decode' high low xs = decodePrimary opcode (arg1:xs) + where Tuple opcode arg1 = (decodePrimaryOpcode high low) + +-- | Modify the consumed length of a decoded instruction +modifyLen :: (Consumed -> Consumed) -> Tuple Instruction Consumed -> Tuple Instruction Consumed +modifyLen f (Tuple inst len) = Tuple inst (f len) + +incLen = modifyLen ((+) 1) +decLen = modifyLen (flip (-) $ 1) + +-- | Get upper/lower nibble (4 bits) of the 8 bit word `v` +getNibbles :: Word8 -> Tuple Word4 Word4 +getNibbles v = Tuple high low + where + high = (and (shr v 4) 0xf) + low = (and v 0xf) + +-- Decode first Word16, split into two Word8, of primary instruction into opcode and argument +decodePrimaryOpcode :: Word8 -> Word8 -> Tuple Opcode Word16 +decodePrimaryOpcode high low = Tuple opcode arg1 + where + -- Mask high/low word according to sysdoc section 2.3 page 1. i is + -- opcode bit, d is data address bit. Having one d bit at the high + -- byte’s msb is intentional, see sysdoc section 2.1 page 36 (“Trick + -- 1”). This way small global and local addresses can be encoded here. + -- high low + -- diiiiidd dddddddd + opcode = high `and` 0x7c + arg1 = ((high `and` 0x83) `shl` 8) `or` low + +-- Decode primary instruction, based on decoded opcode. The first argument +-- (part of opcode word16) must be the head of @xs +decodePrimary :: Opcode -> List Word16 -> DecodeResult +decodePrimary 0x00 = makeInstI LN +decodePrimary 0x04 = makeInstI LN1 +decodePrimary 0x08 = makeInstDD MOV +decodePrimary 0x0c = makeInstD INC1 +decodePrimary 0x10 = makeInstD DEC1 +decodePrimary 0x14 = makeInstDD INC +decodePrimary 0x18 = makeInstDD DEC +decodePrimary 0x1c = makeInstDDD ADD +decodePrimary 0x20 = makeInstDDD SUB +decodePrimary 0x24 = makeInstD CLEAR +decodePrimary 0x28 = makeInstD TEST +decodePrimary 0x2c = makeInstDD EQU +decodePrimary 0x30 = makeInstDD LSEQ +decodePrimary 0x34 = makeInstDD FMOV +decodePrimary 0x38 = makeInstDDD FADD +decodePrimary 0x3c = makeInstDDD FSUB +decodePrimary 0x40 = makeInstDDD FMUL +decodePrimary 0x44 = makeInstDDD FDIV +decodePrimary 0x48 = makeInstDD FLSEQ +decodePrimary 0x4c = makeInstDD TMOV +decodePrimary 0x50 = makeInstDD TEQU +decodePrimary 0x54 = makeInstDD ULSEQ +decodePrimary 0x58 = makeInstDD DSACC +decodePrimary 0x5c = makeInstDD REF +decodePrimary 0x60 = makeInstIIDDD SUBS +decodePrimary 0x64 = makeInstDID SEL +decodePrimary 0x68 = makeInstD PPV +decodePrimary 0x6c = makeInstD PP +decodePrimary 0x70 = makeInstC B +decodePrimary 0x74 = makeInstC B1 +decodePrimary 0x78 = makeInstM CALL +decodePrimary x = const $ makeOpcodeUnknown x + +-- Decode secondary instruction, based on opcode (7fxxh) and tail +decodeSecondary :: Opcode -> List Word16 -> DecodeResult +decodeSecondary 0x00 = makeInst0 RTN +decodeSecondary 0x01 = makeInst0 RTNT +decodeSecondary 0x02 = makeInst0 RTNF +--decodeSecondary 0x03 = makeInst* ? -- nonexistent in 1.8 +decodeSecondary 0x04 = makeInst0 STOP +decodeSecondary 0x05 = makeInstC GOSUB +decodeSecondary 0x06 = makeInst0 KE +decodeSecondary 0x07 = makeInst0 GORET +decodeSecondary 0x08 = makeInstDD BCRD +decodeSecondary 0x09 = makeInstDD CRD +--decodeSecondary 0x0a = makeInstDD ECWR -- original sysdoc +decodeSecondary 0x0a = makeInstDDD ECWR +decodeSecondary 0x0b = makeInstDDD CWR +decodeSecondary 0x0c = makeInstDD CTT +decodeSecondary 0x0d = makeInstDDD GETC +decodeSecondary 0x0e = makeInstDDD FNONBL +decodeSecondary 0x0f = makeInstDD DREM256 +decodeSecondary 0x10 = makeInstDD AMUL256 +--decodeSecondary 0x11 = makeInst* ? +decodeSecondary 0x12 = makeInstD ISDIG +decodeSecondary 0x13 = makeInstD ISLD +decodeSecondary 0x14 = makeInstD ISLCAS +decodeSecondary 0x15 = makeInstD ISUCAS +decodeSecondary 0x16 = makeInstDDD GADDR +decodeSecondary 0x17 = makeInstDDD GCADDR +decodeSecondary 0x18 = makeInstD ISSHA +decodeSecondary 0x19 = makeInst0 SYSG +decodeSecondary 0x1a = makeInst0 GETTAB +decodeSecondary 0x1b = makeInst0 PUTTAB +decodeSecondary 0x1c = makeInst0 ERTAB +decodeSecondary 0x1d = makeInstM EXEC +decodeSecondary 0x1e = makeInstM PPROC +decodeSecondary 0x1f = makeInstD PCALL +decodeSecondary 0x20 = makeInstDI BRCOMP +decodeSecondary 0x21 = makeInstIDD MOVXX +decodeSecondary 0x22 = makeInstIDD ALIAS +decodeSecondary 0x23 = makeInstID MOVII +decodeSecondary 0x24 = makeInstDD FEQU +decodeSecondary 0x25 = makeInstDD TLSEQ +decodeSecondary 0x26 = makeInstDD FNEG +decodeSecondary 0x27 = makeInstDD NEG +decodeSecondary 0x28 = makeInstDDD IMULT +decodeSecondary 0x29 = makeInstDDD MUL +decodeSecondary 0x2a = makeInstDDD DIV +decodeSecondary 0x2b = makeInstDDD MOD +decodeSecondary 0x2c = makeInstDDD ITSUB +decodeSecondary 0x2d = makeInstDDD ITRPL +decodeSecondary 0x2e = makeInstDD DECOD +decodeSecondary 0x2f = makeInstDD ENCOD +decodeSecondary 0x30 = makeInstDDD SUBT1 +decodeSecondary 0x31 = makeInstDDDD SUBTFT +decodeSecondary 0x32 = makeInstDDD SUBTF +decodeSecondary 0x33 = makeInstDDD REPLAC +decodeSecondary 0x34 = makeInstDD CAT +decodeSecondary 0x35 = makeInstDD TLEN +decodeSecondary 0x36 = makeInstDDD POS +decodeSecondary 0x37 = makeInstDDDD POSF +decodeSecondary 0x38 = makeInstDDDDD POSFT +decodeSecondary 0x39 = makeInstDDDDDDD STRANL +decodeSecondary 0x3a = makeInstDDDDD POSIF +--decodeSecondary 0x3b = makeInst ? +decodeSecondary 0x3c = makeInstD OUT +decodeSecondary 0x3d = makeInstD COUT +decodeSecondary 0x3e = makeInstDD OUTF +decodeSecondary 0x3f = makeInstDDD OUTFT +decodeSecondary 0x40 = makeInstD INCHAR +decodeSecondary 0x41 = makeInstD INCETY +decodeSecondary 0x42 = makeInstD PAUSE +decodeSecondary 0x43 = makeInstDD GCPOS +decodeSecondary 0x44 = makeInstDD CATINP +decodeSecondary 0x45 = makeInstD NILDS +decodeSecondary 0x46 = makeInstDD DSCOPY +decodeSecondary 0x47 = makeInstD DSFORG +decodeSecondary 0x48 = makeInstDD DSWTYP +decodeSecondary 0x49 = makeInstDD DSRTYP +decodeSecondary 0x4a = makeInstDD DSHEAP +decodeSecondary 0x4b = makeInst0 ESTOP +decodeSecondary 0x4c = makeInst0 DSTOP +--decodeSecondary 0x4d = makeInst0 SETERR -- original sysdoc +decodeSecondary 0x4d = makeInstD SETERR +decodeSecondary 0x4e = makeInst0 ISERR +decodeSecondary 0x4f = makeInst0 CLRERR +decodeSecondary 0x50 = makeInstDD RPCB +--decodeSecondary 0x51 = makeInstDD WPCB -- original sysdoc +decodeSecondary 0x51 = makeInstDDD INFOPW +decodeSecondary 0x52 = makeInstDD TWCPU +decodeSecondary 0x53 = makeInstDD ROTATE +--decodeSecondary 0x54 = makeInst0 XREW -- original sysdoc +decodeSecondary 0x54 = makeInstDDDD CONTRL +--decodeSecondary 0x55 = makeInstDDD XWRITE +decodeSecondary 0x55 = makeInstDDDDD BLKOUT +--decodeSecondary 0x56 = makeInstDDD XREAD +decodeSecondary 0x56 = makeInstDDDDD BLKIN +--decodeSecondary 0x57 = makeInstD XSKIP +decodeSecondary 0x57 = makeInstDDD NXTDSP +--decodeSecondary 0x58 = makeInstD XERR +decodeSecondary 0x58 = makeInstDDD DSPAGS +decodeSecondary 0x59 = makeInstDD STORAGE +--decodeSecondary 0x5a = makeInst0 SHUTUP +decodeSecondary 0x5a = makeInstD SYSOP +decodeSecondary 0x5b = makeInst0 ARITS +decodeSecondary 0x5c = makeInst0 ARITU +decodeSecondary 0x5d = makeInstD HPSIZE +decodeSecondary 0x5e = makeInst0 GARB +--decodeSecondary 0x5f = makeInstD CHAEX +decodeSecondary 0x5f = makeInstDDDC TPBEGIN -- XXX: not sure how many bytes the last arg has +decodeSecondary 0x60 = makeInstDDD FSLD +decodeSecondary 0x61 = makeInstDD GEXP +decodeSecondary 0x62 = makeInstDD SEXP +decodeSecondary 0x63 = makeInstDD FLOOR +decodeSecondary 0x64 = makeInstDDD RTSUB +decodeSecondary 0x65 = makeInstDDD RTRPL +decodeSecondary 0x66 = makeInstDD CLOCK +decodeSecondary 0x67 = makeInstD SETNOW +decodeSecondary 0x68 = makeInstDDD TRPCB +decodeSecondary 0x69 = makeInstDDD TWPCB +decodeSecondary 0x6a = makeInstDD TCPU +decodeSecondary 0x6b = makeInstDD TSTAT +decodeSecondary 0x6c = makeInstD ACT +decodeSecondary 0x6d = makeInstD DEACT +decodeSecondary 0x6e = makeInstD THALT +decodeSecondary 0x6f = makeInstDD TBEG +decodeSecondary 0x70 = makeInstD TEND +decodeSecondary 0x71 = makeInstDDDD SEND +decodeSecondary 0x72 = makeInstDDD WAIT +decodeSecondary 0x73 = makeInstDDDD SWCALL +--decodeSecondary 0x74 = makeInstD CDBINT +decodeSecondary 0x74 = makeInstDD CDBINT +--decodeSecondary 0x75 = makeInstD CDBTXT +decodeSecondary 0x75 = makeInstDD CDBTXT +decodeSecondary 0x76 = makeInstD PNACT +decodeSecondary 0x77 = makeInstDDD PW +decodeSecondary 0x78 = makeInstDDD GW +decodeSecondary 0x79 = makeInstDDD XOR +decodeSecondary 0x7a = makeInstDDDD PPCALL +decodeSecondary 0x7b = makeInstD EXTASK +decodeSecondary 0x7c = makeInstDDD AND +decodeSecondary 0x7d = makeInstDDD OR +decodeSecondary 0x7e = makeInstD SESSION +decodeSecondary 0x7f = makeInstDDDDD SENDFT +decodeSecondary 0x80 = makeInstD DEFCOL +decodeSecondary 0x81 = makeInstDD ID +decodeSecondary x = const $ makeOpcodeUnknown x + +makeInputTooShort n = Left $ InputTooShort n +makeOpcodeUnknown x = Left $ UnknownOpcode x + +-- Instruction without argument +makeInst0 f _ = Right $ Tuple (Instruction0 f) 0 + +makeInstI f (arg1:_) = Right $ Tuple (InstructionI f arg1) 1 +makeInstI _ _ = makeInputTooShort 1 + +makeInstM f (arg1:_) = Right $ Tuple (InstructionM f arg1) 1 +makeInstM _ _ = makeInputTooShort 1 + +makeInstC f (arg1:_) = Right $ Tuple (InstructionC f arg1) 1 +makeInstC _ _ = makeInputTooShort 1 + +makeInstD f (arg1:_) = Right $ Tuple (InstructionD f (decodeDataAddress arg1)) 1 +makeInstD _ _ = makeInputTooShort 1 + +makeInstID f (arg1:arg2:_) = Right $ Tuple (InstructionID f arg1 (decodeDataAddress arg2)) 2 +makeInstID _ _ = makeInputTooShort 2 + +makeInstDI f (arg1:arg2:_) = Right $ Tuple (InstructionDI f (decodeDataAddress arg1) arg2) 2 +makeInstDI _ _ = makeInputTooShort 2 + +makeInstDD f (arg1:arg2:_) = Right $ Tuple (InstructionDD f (decodeDataAddress arg1) (decodeDataAddress arg2)) 2 +makeInstDD _ _ = makeInputTooShort 2 + +makeInstIDD f (arg1:arg2:arg3:_) = Right $ Tuple (InstructionIDD f arg1 (decodeDataAddress arg2) (decodeDataAddress arg3)) 3 +makeInstIDD _ _ = makeInputTooShort 3 + +makeInstDID f (arg1:arg2:arg3:_) = Right $ Tuple (InstructionDID f (decodeDataAddress arg1) arg2 (decodeDataAddress arg3)) 3 +makeInstDID _ _ = makeInputTooShort 3 + +makeInstDDD f (arg1:arg2:arg3:_) = Right $ Tuple (InstructionDDD f (decodeDataAddress arg1) (decodeDataAddress arg2) (decodeDataAddress arg3)) 3 +makeInstDDD _ _ = makeInputTooShort 3 + +makeInstIIDD f (arg1:arg2:arg3:arg4:_) = Right $ Tuple (InstructionIIDD f arg1 arg2 (decodeDataAddress arg3) (decodeDataAddress arg4)) 4 +makeInstIIDD _ _ = makeInputTooShort 4 + +makeInstDDDD f (arg1:arg2:arg3:arg4:_) = Right $ Tuple (InstructionDDDD f (decodeDataAddress arg1) (decodeDataAddress arg2) (decodeDataAddress arg3) (decodeDataAddress arg4)) 4 +makeInstDDDD _ _ = makeInputTooShort 4 + +makeInstDDDC f (arg1:arg2:arg3:arg4:_) = Right $ Tuple (InstructionDDDC f (decodeDataAddress arg1) (decodeDataAddress arg2) (decodeDataAddress arg3) (decodeCodeAddress arg4)) 4 +makeInstDDDC _ _ = makeInputTooShort 4 + +makeInstDDDDD f (arg1:arg2:arg3:arg4:arg5:_) = Right $ Tuple (InstructionDDDDD f (decodeDataAddress arg1) (decodeDataAddress arg2) (decodeDataAddress arg3) (decodeDataAddress arg4) (decodeDataAddress arg5)) 5 +makeInstDDDDD _ _ = makeInputTooShort 5 + +makeInstDDDDDDD f (arg1:arg2:arg3:arg4:arg5:arg6:arg7:_) = Right $ Tuple (InstructionDDDDDDD f (decodeDataAddress arg1) (decodeDataAddress arg2) (decodeDataAddress arg3) (decodeDataAddress arg4) (decodeDataAddress arg5) (decodeDataAddress arg6) (decodeDataAddress arg7)) 7 +makeInstDDDDDDD _ _ = makeInputTooShort 7 + +makeInstIIDDD f (arg1:arg2:arg3:arg4:arg5:_) = Right $ Tuple (InstructionIIDDD f arg1 arg2 (decodeDataAddress arg3) (decodeDataAddress arg4) (decodeDataAddress arg5)) 5 +makeInstIIDDD _ _ = makeInputTooShort 5 + +decodeDataAddress :: Word16 -> DataAddress +decodeDataAddress v | msb16 v == 0 = GlobalDataAddress v +-- else: msb is one and lsb is zoro +decodeDataAddress v | lsb v == 0 = LocalDataAddress ((v `and` 0x7fff) `div` 2) +-- else: msb one lsb one: +decodeDataAddress v | otherwise = LocalDataAddressRef ((v `and` 0x7fff) `div` 2) + +msb16 x = (x `shr` 15) `and` 1 +lsb x = x `and` 1 + +decodeCodeAddress :: Word16 -> CodeAddress +decodeCodeAddress v = v + diff --git a/src/InstructionSet.purs b/src/InstructionSet.purs new file mode 100644 index 0000000..7e01b05 --- /dev/null +++ b/src/InstructionSet.purs @@ -0,0 +1,326 @@ +-- | EUMEL0 instruction set data types +module InstructionSet where + +import Prelude +import Data.Show +import Data.Generic.Rep.Show +import Data.Generic.Rep + +-- in `Machine.wordsize` bytes +maxInstructionLen = 1+7 + +-- Operands: +-- Immediate, i.e. data encoded in instruction word +type Immediate = Int +-- Module number +type Module = Int +type CodeAddress = Int +type Procedure = Int + +-- A data memory address; XXX: uses word-addressing? +data DataAddress = + GlobalDataAddress Int + | LocalDataAddress Int + | LocalDataAddressRef Int + +-- No operand +data Op0 = + ARITS + | ARITU + | CLRERR + | DSTOP + | ERTAB + | ESTOP + | GARB + | GETTAB + | GORET + | ISERR + | KE + | PUTTAB + | RTN + | RTNF + | RTNT + | SHUTUP + | STOP + | SYSG + | XREW + +-- One immediate Operand +data OpI = + LN + | LN1 + | PENTER + +-- One CodeAddress operand +data OpC = + B + | B1 + | GOSUB + +-- One Module operand +data OpM = + CALL + | EXEC + | PPROC + +-- One DataAddress operand +data OpD = + ACT + | CHAEX + | CLEAR + | COUT + | DEACT + | DEC1 + | DEFCOL + | DSFORG + | EXTASK + | HPSIZE + | INC1 + | INCETY + | INCHAR + | ISDIG + | ISLCAS + | ISLD + | ISSHA + | ISUCAS + | NILDS + | OUT + | PAUSE + | PCALL + | PNACT + | PP + | PPV + | SESSION + | SETERR + | SETNOW + | SYSOP + | TEND + | TEST + | THALT + | XERR -- XXX not sure about the first letter, IpagesDT in src/devel/misc/unknown/src/0DISASS.ELA + | XSKIP + +-- Two DataAddress operands +data OpDD = + AMUL256 + | BCRD + | CAT + | CATINP + | CDBINT + | CDBTXT + | CLOCK + | CRD + | CTT + | DEC + | DECOD + | DREM256 + | DSACC + | DSCOPY + | DSHEAP + | DSRTYP + | DSWTYP + | ENCOD + | EQU + | FEQU + | FLOOR + | FLSEQ + | FMOV + | FNEG + | GCPOS + | GEXP + | INC + | ID + | LSEQ + | MOV + | NEG + | OUTF + | REF + | ROTATE + | RPCB + | SEXP + | STORAGE + | TBEG + | TCPU + | TEQU + | TLEN + | TLSEQ + | TMOV + | TSTAT + | TWCPU + | ULSEQ + | WPCB + +data OpID = + EQUIM + | MOVII + | MOVI + +data OpDI = + BRCOMP + +data OpDDD = + ADD + | AND + | CWR + | DIV + | DSPAGS + | ECWR + | FADD + | FDIV + | FMUL + | FNONBL + | FSLD + | FSUB + | GADDR + | GCADDR + | GETC + | GW + | IMULT + | INFOPW + | ITRPL + | ITSUB + | MOD + | MUL + | NXTDSP + | OR + | OUTFT + | POS + | PW + | REPLAC + | RTRPL + | RTSUB + | SUB + | SUBT1 + | SUBTF + | TRPCB + | TWPCB + | WAIT + | XOR + | XREAD + | XWRITE + +data OpDID = + SEL + +data OpIDD = + ALIAS + | MOVX + | MOVXX + +data OpIIDD = + GETW + | PUTW + +data OpDDDD = + CONTRL + | POSF + | PPCALL + | SEND + | SUBTFT + | SWCALL + +data OpDDDC = TPBEGIN + +data OpIIDDD = + SUBS + +data OpDDDDD = + BLKIN + | BLKOUT + | SENDFT + | POSFT + | POSIF + +data OpDDDDDDD = STRANL + +-- | Instruction encodings +-- | A single Instruction Opcode (Array Args) would be sufficient, but not +-- | type-safe. Thus they are grouped by encoding (i.e. argument count and type). +data Instruction = + Instruction0 Op0 + | InstructionI OpI Immediate + | InstructionC OpC CodeAddress + | InstructionM OpM Module + | InstructionD OpD DataAddress + | InstructionDD OpDD DataAddress DataAddress + | InstructionID OpID Immediate DataAddress + | InstructionDI OpDI DataAddress Immediate + | InstructionDDD OpDDD DataAddress DataAddress DataAddress + | InstructionDID OpDID DataAddress Immediate DataAddress + | InstructionIDD OpIDD Immediate DataAddress DataAddress + | InstructionIIDD OpIIDD Immediate Immediate DataAddress DataAddress + | InstructionDDDD OpDDDD DataAddress DataAddress DataAddress DataAddress + | InstructionDDDC OpDDDC DataAddress DataAddress DataAddress CodeAddress + | InstructionIIDDD OpIIDDD Immediate Immediate DataAddress DataAddress DataAddress + | InstructionDDDDD OpDDDDD DataAddress DataAddress DataAddress DataAddress DataAddress + | InstructionDDDDDDD OpDDDDDDD DataAddress DataAddress DataAddress DataAddress DataAddress DataAddress DataAddress + +-- Until we figure out how to do this without generics +derive instance genericOp0 :: Generic Op0 _ +instance showOp0 :: Show Op0 where + show = genericShow + +derive instance genericOpI :: Generic OpI _ +instance showOpI :: Show OpI where + show = genericShow + +derive instance genericOpC :: Generic OpC _ +instance showOpC :: Show OpC where + show = genericShow + +derive instance genericOpM :: Generic OpM _ +instance showOpM :: Show OpM where + show = genericShow + +derive instance genericOpD :: Generic OpD _ +instance showOpD :: Show OpD where + show = genericShow + +derive instance genericOpDD :: Generic OpDD _ +instance showOpDD :: Show OpDD where + show = genericShow + +derive instance genericOpID :: Generic OpID _ +instance showOpID :: Show OpID where + show = genericShow + +derive instance genericOpDI :: Generic OpDI _ +instance showOpDI :: Show OpDI where + show = genericShow + +derive instance genericOpDDD :: Generic OpDDD _ +instance showOpDDD :: Show OpDDD where + show = genericShow + +derive instance genericOpDID :: Generic OpDID _ +instance showOpDID :: Show OpDID where + show = genericShow + +derive instance genericOpIDD :: Generic OpIDD _ +instance showOpIDD :: Show OpIDD where + show = genericShow + +derive instance genericOpIIDD :: Generic OpIIDD _ +instance showOpIIDD :: Show OpIIDD where + show = genericShow + +derive instance genericOpDDDD :: Generic OpDDDD _ +instance showOpDDDD :: Show OpDDDD where + show = genericShow + +derive instance genericOpDDDC :: Generic OpDDDC _ +instance showOpDDDC :: Show OpDDDC where + show = genericShow + +derive instance genericOpDDDDD :: Generic OpDDDDD _ +instance showOpDDDDD :: Show OpDDDDD where + show = genericShow + +derive instance genericOpIIDDD :: Generic OpIIDDD _ +instance showOpIIDDD :: Show OpIIDDD where + show = genericShow + +derive instance genericOpDDDDDDD :: Generic OpDDDDDDD _ +instance showOpDDDDDDD :: Show OpDDDDDDD where + show = genericShow + diff --git a/src/Machine.purs b/src/Machine.purs new file mode 100644 index 0000000..7ee4a55 --- /dev/null +++ b/src/Machine.purs @@ -0,0 +1,27 @@ +-- Machine constants +module Machine where + +import Prelude + +-- | Machine’s word size +wordsize :: Int +wordsize = 2 + +-- | Code offset in default dataspace 4 in `wordsize` +codeOffset :: Int +codeOffset = 0x40000 `div` wordsize +-- | Code section size in `wordsize` +codeSize :: Int +codeSize = (128*1024) `div` wordsize + +-- | Pagesize in `wordsize`, documentation sometimes calls this `ps` +pagesize :: Int +pagesize = 512 `div` wordsize + +-- | Max size af a single code module, `cms`, in words +codeModuleSize :: Int +codeModuleSize = 4096 + +defaultDsId :: Int +defaultDsId = 4 + diff --git a/src/Main.purs b/src/Main.purs new file mode 100644 index 0000000..4906542 --- /dev/null +++ b/src/Main.purs @@ -0,0 +1,99 @@ +module Main (main, fileToArrayView) where + +import Prelude +import Effect (Effect, foreachE) +import Effect.Console (log) +import Data.List +import Data.List as L +import Data.Tuple +import Data.Int +import Data.String.Utils (length, repeat) +import Data.Maybe (fromJust, Maybe(..), maybe) +import Partial.Unsafe (unsafePartial) +import Node.FS.Sync (readFile) +import Node.Buffer (toArrayBuffer) +import Data.ArrayBuffer.Typed (whole, toArray, (!)) +import Data.ArrayBuffer.Types (Uint16, ArrayView) +import Data.UInt (toInt) +import Data.Either (either) +import Control.Monad.Rec.Class +import Node.Process (argv) +import Data.Array ((!!)) + +import Decode +import Asm +import InstructionSet +import Machine + +-- | Left-pad a string `s` to at least `num` characters with `pad` +lpad :: Int -> String -> String -> String +lpad num pad s = prefix <> s + where + prefix = case repeat (num-(length s)) pad of + Just x -> x + Nothing -> "" + +-- | Right-pad a string `s` to at least `num` characters with `pad` +rpad :: Int -> String -> String -> String +rpad num pad s = s <> prefix + where + prefix = case repeat (num-(length s)) pad of + Just x -> x + Nothing -> "" + +-- | Hex-format number `v`, adding padding to at least `len` characters +toHex :: Int -> Int -> String +toHex len v = lpad len "0" $ toStringAs hexadecimal v + +hexdump :: List Int -> String +hexdump words = intercalate " " $ map (toHex 4) words + +-- Format instruction with hexdump @width +formatInst :: Int -> Int -> Tuple (List Int) Instruction -> String +formatInst address width (Tuple words inst) = toHex 8 address <> " | " <> rpad width " " (hexdump words) <> " | " <> (rpad 10 " " mnemonic) <> " " <> (intercalate ", " args) + where + asmInst = toAsm inst + mnemonic = unsafePartial $ fromJust $ head asmInst + args = unsafePartial $ fromJust $ tail asmInst + +-- | Create ArrayView from a file `name`. +fileToArrayView :: String -> Effect (ArrayView Uint16) +fileToArrayView name = (readFile name) >>= toArrayBuffer >>= whole + +logInst address = (log <<< (formatInst address (hexWords*hexWordWidth+hexWords*spaceWidth))) + where + hexWordWidth = 4 + hexWords = maxInstructionLen + spaceWidth = 1 + +-- Take n words from an ArrayView starting at offset +takeView :: Int -> Int -> ArrayView Uint16 -> Effect (List Int) +takeView _ 0 _ = pure Nil +takeView offset n view = (view ! offset) >>= maybe (pure Nil) recurse + where + recurse v = ((:) (toInt v)) <$> (takeView (offset+1) (n-1) view) + +-- Disassemble memory in @view starting at @startAddress +disasm :: Int -> ArrayView Uint16 -> Effect Unit +disasm startAddress view = tailRecM go startAddress + where + go address = do + words <- takeView address maxInstructionLen view + either invalid (continue address words) $ decode words + invalid (InputTooShort x) = do + log $ "input too short " <> show x + pure $ Done unit + invalid (UnknownOpcode x) = do + log $ "unknown opcode " <> show x + pure $ Done unit + continue address words (Tuple inst len) = do + logInst (address*wordsize) (Tuple (take len words) inst) + pure $ Loop(address+len) + +main :: Effect Unit +main = secondArg >>= (maybe argumentMissing doDisasm) + where + secondArg = (flip (!!) $ 2) <$> argv + argumentMissing = log "Missing argument" + doDisasm file = fileToArrayView file >>= disasm codeOffset + -- cgit v1.2.3