From ef88eb173fc87bfe1b87be533e4f574209d40b1d Mon Sep 17 00:00:00 2001
From: Lars-Dominik Braun <lars@6xq.net>
Date: Tue, 26 Feb 2019 11:05:22 +0100
Subject: Initial import

Disassembler seems to be working.
---
 .gitignore              |   9 ++
 LICENSE                 |  19 +++
 README.rst              |  49 +++++++
 bower.json              |  23 ++++
 src/Asm.purs            |  46 +++++++
 src/Decode.purs         | 341 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/InstructionSet.purs | 326 +++++++++++++++++++++++++++++++++++++++++++++
 src/Machine.purs        |  27 ++++
 src/Main.purs           |  99 ++++++++++++++
 9 files changed, 939 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 LICENSE
 create mode 100644 README.rst
 create mode 100644 bower.json
 create mode 100644 src/Asm.purs
 create mode 100644 src/Decode.purs
 create mode 100644 src/InstructionSet.purs
 create mode 100644 src/Machine.purs
 create mode 100644 src/Main.purs

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..20e090a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+/bower_components/
+/node_modules/
+/.pulp-cache/
+/output/
+/generated-docs/
+/.psc-package/
+/.psc*
+/.purs*
+/.psa*
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..cb69c84
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2019 EUMuLator contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..ab06b95
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,49 @@
+EUMuLator
+=========
+
+EUMuLator is a reimplementation of the EUMEL0 machine in PureScript_.
+Currently it is compatible with the EUMEL0 BIT-A-encoded instruction set as of
+version 1.8. For an introduction to the EUMEL operating system, see `this
+page <https://6xq.net/eumel/>`__.
+
+.. Yes, this is my first PureScript project.
+
+.. _PureScript: http://www.purescript.org/
+.. _EUMuLator: https://github.com/promyloph/eumulator
+
+Project status
+--------------
+
+Works:
+
+- Disassembler
+
+Work-in-progress:
+
+- EUMEL0 virtual machine implementation
+
+Future work:
+
+- Browser intergation
+
+Usage
+-----
+
+First `install PureScript`_. Then you’ll need a *Hintergrund* from EUMEL
+version 1.8. Currently only `the base disk set’s Hintergrund version 1.8.7
+<public/disks/grundpaket.zip>`__ is extractable by the script
+``extractHintergrund.py``, found in the `tools repository`_. Extract the
+dataspaces and run the disassembler:
+
+.. _tools repository: https://github.com/PromyLOPh/eumel-tools
+.. _install PureScript: https://github.com/purescript/documentation/blob/master/guides/Getting-Started.md
+
+.. code:: bash
+
+   linearizeDisk.py 03_eumel0.img 03_eumel0.img.linear
+   extractHintergrund.py 03_eumel0.img.linear
+   pulp run -- 0002_0004.ds
+
+Note that running the disassembler makes sense only for dataspace four, the
+default dataspace, which contains each task’s code section.
+
diff --git a/bower.json b/bower.json
new file mode 100644
index 0000000..154c223
--- /dev/null
+++ b/bower.json
@@ -0,0 +1,23 @@
+{
+  "name": "EUMuLator",
+  "ignore": [
+    "**/.*",
+    "node_modules",
+    "bower_components",
+    "output"
+  ],
+  "dependencies": {
+    "purescript-prelude": "^4.1.0",
+    "purescript-console": "^4.2.0",
+    "purescript-effect": "^2.0.1",
+    "purescript-node-process": "^6.0.0",
+    "purescript-node-fs": "^5.0.0",
+    "purescript-arrays": "^5.2.0",
+    "purescript-uint": "^5.1.0",
+    "purescript-arraybuffer": "^10.0.0",
+    "purescript-stringutils": "^0.0.8"
+  },
+  "devDependencies": {
+    "purescript-psci-support": "^4.0.0"
+  }
+}
diff --git a/src/Asm.purs b/src/Asm.purs
new file mode 100644
index 0000000..fcb60ab
--- /dev/null
+++ b/src/Asm.purs
@@ -0,0 +1,46 @@
+-- Assembly/disassembly
+
+module Asm (toAsm) where
+
+import Prelude ((<>), show)
+import Data.List
+import Data.Int
+
+import InstructionSet
+
+-- | Convert Instruction to assembly mnemonic string + arguments
+toAsm :: Instruction -> List String
+toAsm (Instruction0 op) = show op:Nil
+toAsm (InstructionI op imm) = show op:immediate imm:Nil
+toAsm (InstructionC op code) = show op:codeAddress code:Nil
+toAsm (InstructionM op m) = show op:modno m:Nil
+toAsm (InstructionD op d) = show op:dataAddress d:Nil
+toAsm (InstructionDD op d1 d2) = show op:dataAddress d1:dataAddress d2:Nil
+toAsm (InstructionID op imm d) = show op:immediate imm:dataAddress d:Nil
+toAsm (InstructionDI op d imm) = show op:dataAddress d:immediate imm:Nil
+toAsm (InstructionDDD op d1 d2 d3) = show op:dataAddress d1:dataAddress d2:dataAddress d3:Nil
+toAsm (InstructionDID op d1 imm d2) = show op:dataAddress d1:immediate imm:dataAddress d2:Nil
+toAsm (InstructionIDD op imm d1 d2) = show op:immediate imm:dataAddress d1:dataAddress d2:Nil
+toAsm (InstructionIIDD op i1 i2 d1 d2) = show op:immediate i1:immediate i2:dataAddress d1:dataAddress d2:Nil
+toAsm (InstructionDDDD op d1 d2 d3 d4) = show op:dataAddress d1:dataAddress d2:dataAddress d3:dataAddress d4:Nil
+toAsm (InstructionDDDC op d1 d2 d3 c) = show op:dataAddress d1:dataAddress d2:dataAddress d3:codeAddress c:Nil
+toAsm (InstructionIIDDD op i1 i2 d1 d2 d3) = show op:immediate i1:immediate i2:dataAddress d1:dataAddress d2:dataAddress d3:Nil
+toAsm (InstructionDDDDD op d1 d2 d3 d4 d5) = show op:dataAddress d1:dataAddress d2:dataAddress d3:dataAddress d4:dataAddress d5:Nil
+toAsm (InstructionDDDDDDD op d1 d2 d3 d4 d5 d6 d7) = show op:dataAddress d1:dataAddress d2:dataAddress d3:dataAddress d4:dataAddress d5:dataAddress d6:dataAddress d7:Nil
+
+dataAddress :: DataAddress -> String
+dataAddress (GlobalDataAddress a) = "*" <> (hex a)
+dataAddress (LocalDataAddress a) = "=" <> (hex a)
+dataAddress (LocalDataAddressRef a) = "<=" <> (hex a) <> ">"
+
+codeAddress a = "$" <> (hex a)
+
+immediate :: Immediate -> String
+immediate v = "#" <> hex v
+
+modno :: Module -> String
+modno v = "&" <> hex v
+
+hex :: Int -> String
+hex v = toStringAs hexadecimal v <> "h"
+
diff --git a/src/Decode.purs b/src/Decode.purs
new file mode 100644
index 0000000..c4198f8
--- /dev/null
+++ b/src/Decode.purs
@@ -0,0 +1,341 @@
+-- Decode BIT-A encoded EUMEL0 instuction set.
+--
+-- library/entwurf-systemdokumentation-1982.djvu documents the basics of this
+-- simple, word-based, variable-length encoding.
+--
+-- Some instructions are not documented in the system documentation or differ
+-- from the documentation there. Apparently the instruction set was changed
+-- starting with EUMEL 1.7.5.4 (see comment in
+-- src/devel/debugger/src/DEBUGGER.ELA). For the new/changed instructions see
+-- src/devel/misc/unknown/src/0DISASS.ELA and
+-- src/devel/debugger/doc/DEBUGGER.PRT
+
+module Decode (decode, DecodeError(..), Opcode, Word8) where
+
+import Prelude hiding (apply)
+import Data.Tuple
+import Data.Int.Bits
+import Data.List
+import Data.Either
+import Data.Function (apply)
+
+import InstructionSet
+
+-- There is a Data.Word module, but it is outdated, so we cannot use that
+type Word4 = Int
+type Word8 = Int
+type Word16 = Int
+type Opcode = Word8
+
+-- Number of words consumed by decoding
+type Consumed = Int
+type DecodeResult = Either DecodeError (Tuple Instruction Consumed)
+
+data DecodeError =
+	-- Expected number of words
+	  InputTooShort Int
+	| UnknownOpcode Opcode
+
+-- | Decode arbitrary EUMEL0 instruction from a list of Word16, returns instruction and number of word16 consumed
+-- XXX: would be nice to use generics here and automate argument parsing (since every argument can only be 16 bit)
+decode :: List Word16 -> DecodeResult
+decode (first:xs) = decode' high low xs
+	where
+		high = and (shr first 8) 0xff
+		low = and first 0xff
+decode xs = makeInputTooShort 0
+
+-- | Decode instruction with first Word16 split into two Word8. Helper for real decode.
+decode' :: Word8 -> Word8 -> List Word16 -> DecodeResult
+-- special instructions
+decode' 0x7c v xs = makeInstID EQUIM (v:xs)
+decode' 0xfc v xs = makeInstID MOVI (v:xs)
+decode' 0x7d v xs = makeInstIDD MOVX (v:xs)
+-- two args from opcode word + 2 additional args = 3 words, not 4 → decLen
+decode' 0xfd v xs = decLen <$> makeInstIIDD PUTW (high:low:xs) where Tuple high low = getNibbles v -- XXX: high:low or low:high?
+decode' 0x7e v xs = decLen <$> makeInstIIDD GETW (high:low:xs) where Tuple high low = getNibbles v
+decode' 0xfe v xs = makeInstI PENTER (v:xs)
+-- incLen, so escape code + opcode are consumed in addition to args
+decode' 0x7f opcode xs = incLen <$> decodeSecondary opcode xs
+-- long primary instruction with FFh prefix. this works iff every primary
+-- instruction has at least one argument (they do)
+decode' 0xff opcode xs = incLen <$> decodePrimary opcode xs
+decode' high low xs = decodePrimary opcode (arg1:xs)
+	where Tuple opcode arg1 = (decodePrimaryOpcode high low)
+
+-- | Modify the consumed length of a decoded instruction
+modifyLen :: (Consumed -> Consumed) -> Tuple Instruction Consumed -> Tuple Instruction Consumed
+modifyLen f (Tuple inst len) = Tuple inst (f len)
+
+incLen = modifyLen ((+) 1)
+decLen = modifyLen (flip (-) $ 1)
+
+-- | Get upper/lower nibble (4 bits) of the 8 bit word `v`
+getNibbles :: Word8 -> Tuple Word4 Word4
+getNibbles v = Tuple high low
+	where
+		high = (and (shr v 4) 0xf)
+		low = (and v 0xf)
+
+-- Decode first Word16, split into two Word8, of primary instruction into opcode and argument
+decodePrimaryOpcode :: Word8 -> Word8 -> Tuple Opcode Word16
+decodePrimaryOpcode high low = Tuple opcode arg1
+    where
+		-- Mask high/low word according to sysdoc section 2.3 page 1. i is
+		-- opcode bit, d is data address bit. Having one d bit at the high
+		-- byte’s msb is intentional, see sysdoc section 2.1 page 36 (“Trick
+		-- 1”). This way small global and local addresses can be encoded here.
+		--   high      low
+		-- diiiiidd dddddddd
+        opcode = high `and` 0x7c
+        arg1 = ((high `and` 0x83) `shl` 8) `or` low
+
+-- Decode primary instruction, based on decoded opcode. The first argument
+-- (part of opcode word16) must be the head of @xs
+decodePrimary :: Opcode -> List Word16 -> DecodeResult
+decodePrimary 0x00 = makeInstI LN
+decodePrimary 0x04 = makeInstI LN1
+decodePrimary 0x08 = makeInstDD MOV
+decodePrimary 0x0c = makeInstD INC1
+decodePrimary 0x10 = makeInstD DEC1
+decodePrimary 0x14 = makeInstDD INC
+decodePrimary 0x18 = makeInstDD DEC
+decodePrimary 0x1c = makeInstDDD ADD
+decodePrimary 0x20 = makeInstDDD SUB
+decodePrimary 0x24 = makeInstD CLEAR
+decodePrimary 0x28 = makeInstD TEST
+decodePrimary 0x2c = makeInstDD EQU
+decodePrimary 0x30 = makeInstDD LSEQ
+decodePrimary 0x34 = makeInstDD FMOV
+decodePrimary 0x38 = makeInstDDD FADD
+decodePrimary 0x3c = makeInstDDD FSUB
+decodePrimary 0x40 = makeInstDDD FMUL
+decodePrimary 0x44 = makeInstDDD FDIV
+decodePrimary 0x48 = makeInstDD FLSEQ
+decodePrimary 0x4c = makeInstDD TMOV
+decodePrimary 0x50 = makeInstDD TEQU
+decodePrimary 0x54 = makeInstDD ULSEQ
+decodePrimary 0x58 = makeInstDD DSACC
+decodePrimary 0x5c = makeInstDD REF
+decodePrimary 0x60 = makeInstIIDDD SUBS
+decodePrimary 0x64 = makeInstDID SEL
+decodePrimary 0x68 = makeInstD PPV
+decodePrimary 0x6c = makeInstD PP
+decodePrimary 0x70 = makeInstC B
+decodePrimary 0x74 = makeInstC B1
+decodePrimary 0x78 = makeInstM CALL
+decodePrimary x = const $ makeOpcodeUnknown x
+
+-- Decode secondary instruction, based on opcode (7fxxh) and tail
+decodeSecondary :: Opcode -> List Word16 -> DecodeResult
+decodeSecondary 0x00 = makeInst0 RTN
+decodeSecondary 0x01 = makeInst0 RTNT
+decodeSecondary 0x02 = makeInst0 RTNF
+--decodeSecondary 0x03 = makeInst* ? -- nonexistent in 1.8
+decodeSecondary 0x04 = makeInst0 STOP
+decodeSecondary 0x05 = makeInstC GOSUB
+decodeSecondary 0x06 = makeInst0 KE
+decodeSecondary 0x07 = makeInst0 GORET
+decodeSecondary 0x08 = makeInstDD BCRD
+decodeSecondary 0x09 = makeInstDD CRD
+--decodeSecondary 0x0a = makeInstDD ECWR -- original sysdoc
+decodeSecondary 0x0a = makeInstDDD ECWR
+decodeSecondary 0x0b = makeInstDDD CWR
+decodeSecondary 0x0c = makeInstDD CTT
+decodeSecondary 0x0d = makeInstDDD GETC
+decodeSecondary 0x0e = makeInstDDD FNONBL
+decodeSecondary 0x0f = makeInstDD DREM256
+decodeSecondary 0x10 = makeInstDD AMUL256
+--decodeSecondary 0x11 = makeInst* ?
+decodeSecondary 0x12 = makeInstD ISDIG
+decodeSecondary 0x13 = makeInstD ISLD
+decodeSecondary 0x14 = makeInstD ISLCAS
+decodeSecondary 0x15 = makeInstD ISUCAS
+decodeSecondary 0x16 = makeInstDDD GADDR
+decodeSecondary 0x17 = makeInstDDD GCADDR
+decodeSecondary 0x18 = makeInstD ISSHA
+decodeSecondary 0x19 = makeInst0 SYSG
+decodeSecondary 0x1a = makeInst0 GETTAB
+decodeSecondary 0x1b = makeInst0 PUTTAB
+decodeSecondary 0x1c = makeInst0 ERTAB
+decodeSecondary 0x1d = makeInstM EXEC
+decodeSecondary 0x1e = makeInstM PPROC
+decodeSecondary 0x1f = makeInstD PCALL
+decodeSecondary 0x20 = makeInstDI BRCOMP
+decodeSecondary 0x21 = makeInstIDD MOVXX
+decodeSecondary 0x22 = makeInstIDD ALIAS
+decodeSecondary 0x23 = makeInstID MOVII
+decodeSecondary 0x24 = makeInstDD FEQU
+decodeSecondary 0x25 = makeInstDD TLSEQ
+decodeSecondary 0x26 = makeInstDD FNEG
+decodeSecondary 0x27 = makeInstDD NEG
+decodeSecondary 0x28 = makeInstDDD IMULT
+decodeSecondary 0x29 = makeInstDDD MUL
+decodeSecondary 0x2a = makeInstDDD DIV
+decodeSecondary 0x2b = makeInstDDD MOD
+decodeSecondary 0x2c = makeInstDDD ITSUB
+decodeSecondary 0x2d = makeInstDDD ITRPL
+decodeSecondary 0x2e = makeInstDD DECOD
+decodeSecondary 0x2f = makeInstDD ENCOD
+decodeSecondary 0x30 = makeInstDDD SUBT1
+decodeSecondary 0x31 = makeInstDDDD SUBTFT
+decodeSecondary 0x32 = makeInstDDD SUBTF
+decodeSecondary 0x33 = makeInstDDD REPLAC
+decodeSecondary 0x34 = makeInstDD CAT
+decodeSecondary 0x35 = makeInstDD TLEN
+decodeSecondary 0x36 = makeInstDDD POS
+decodeSecondary 0x37 = makeInstDDDD POSF
+decodeSecondary 0x38 = makeInstDDDDD POSFT
+decodeSecondary 0x39 = makeInstDDDDDDD STRANL
+decodeSecondary 0x3a = makeInstDDDDD POSIF
+--decodeSecondary 0x3b = makeInst ?
+decodeSecondary 0x3c = makeInstD OUT
+decodeSecondary 0x3d = makeInstD COUT
+decodeSecondary 0x3e = makeInstDD OUTF
+decodeSecondary 0x3f = makeInstDDD OUTFT
+decodeSecondary 0x40 = makeInstD INCHAR
+decodeSecondary 0x41 = makeInstD INCETY
+decodeSecondary 0x42 = makeInstD PAUSE
+decodeSecondary 0x43 = makeInstDD GCPOS
+decodeSecondary 0x44 = makeInstDD CATINP
+decodeSecondary 0x45 = makeInstD NILDS
+decodeSecondary 0x46 = makeInstDD DSCOPY
+decodeSecondary 0x47 = makeInstD DSFORG
+decodeSecondary 0x48 = makeInstDD DSWTYP
+decodeSecondary 0x49 = makeInstDD DSRTYP
+decodeSecondary 0x4a = makeInstDD DSHEAP
+decodeSecondary 0x4b = makeInst0 ESTOP
+decodeSecondary 0x4c = makeInst0 DSTOP
+--decodeSecondary 0x4d = makeInst0 SETERR -- original sysdoc
+decodeSecondary 0x4d = makeInstD SETERR
+decodeSecondary 0x4e = makeInst0 ISERR
+decodeSecondary 0x4f = makeInst0 CLRERR
+decodeSecondary 0x50 = makeInstDD RPCB
+--decodeSecondary 0x51 = makeInstDD WPCB -- original sysdoc
+decodeSecondary 0x51 = makeInstDDD INFOPW
+decodeSecondary 0x52 = makeInstDD TWCPU
+decodeSecondary 0x53 = makeInstDD ROTATE
+--decodeSecondary 0x54 = makeInst0 XREW -- original sysdoc
+decodeSecondary 0x54 = makeInstDDDD CONTRL
+--decodeSecondary 0x55 = makeInstDDD XWRITE
+decodeSecondary 0x55 = makeInstDDDDD BLKOUT
+--decodeSecondary 0x56 = makeInstDDD XREAD
+decodeSecondary 0x56 = makeInstDDDDD BLKIN
+--decodeSecondary 0x57 = makeInstD XSKIP
+decodeSecondary 0x57 = makeInstDDD NXTDSP
+--decodeSecondary 0x58 = makeInstD XERR
+decodeSecondary 0x58 = makeInstDDD DSPAGS
+decodeSecondary 0x59 = makeInstDD STORAGE
+--decodeSecondary 0x5a = makeInst0 SHUTUP
+decodeSecondary 0x5a = makeInstD SYSOP
+decodeSecondary 0x5b = makeInst0 ARITS
+decodeSecondary 0x5c = makeInst0 ARITU
+decodeSecondary 0x5d = makeInstD HPSIZE
+decodeSecondary 0x5e = makeInst0 GARB
+--decodeSecondary 0x5f = makeInstD CHAEX
+decodeSecondary 0x5f = makeInstDDDC TPBEGIN -- XXX: not sure how many bytes the last arg has
+decodeSecondary 0x60 = makeInstDDD FSLD
+decodeSecondary 0x61 = makeInstDD GEXP
+decodeSecondary 0x62 = makeInstDD SEXP
+decodeSecondary 0x63 = makeInstDD FLOOR
+decodeSecondary 0x64 = makeInstDDD RTSUB
+decodeSecondary 0x65 = makeInstDDD RTRPL
+decodeSecondary 0x66 = makeInstDD CLOCK
+decodeSecondary 0x67 = makeInstD SETNOW
+decodeSecondary 0x68 = makeInstDDD TRPCB
+decodeSecondary 0x69 = makeInstDDD TWPCB
+decodeSecondary 0x6a = makeInstDD TCPU
+decodeSecondary 0x6b = makeInstDD TSTAT
+decodeSecondary 0x6c = makeInstD ACT
+decodeSecondary 0x6d = makeInstD DEACT
+decodeSecondary 0x6e = makeInstD THALT
+decodeSecondary 0x6f = makeInstDD TBEG
+decodeSecondary 0x70 = makeInstD TEND
+decodeSecondary 0x71 = makeInstDDDD SEND
+decodeSecondary 0x72 = makeInstDDD WAIT
+decodeSecondary 0x73 = makeInstDDDD SWCALL
+--decodeSecondary 0x74 = makeInstD CDBINT
+decodeSecondary 0x74 = makeInstDD CDBINT
+--decodeSecondary 0x75 = makeInstD CDBTXT
+decodeSecondary 0x75 = makeInstDD CDBTXT
+decodeSecondary 0x76 = makeInstD PNACT
+decodeSecondary 0x77 = makeInstDDD PW
+decodeSecondary 0x78 = makeInstDDD GW
+decodeSecondary 0x79 = makeInstDDD XOR
+decodeSecondary 0x7a = makeInstDDDD PPCALL
+decodeSecondary 0x7b = makeInstD EXTASK
+decodeSecondary 0x7c = makeInstDDD AND
+decodeSecondary 0x7d = makeInstDDD OR
+decodeSecondary 0x7e = makeInstD SESSION
+decodeSecondary 0x7f = makeInstDDDDD SENDFT
+decodeSecondary 0x80 = makeInstD DEFCOL
+decodeSecondary 0x81 = makeInstDD ID
+decodeSecondary x = const $ makeOpcodeUnknown x
+
+makeInputTooShort n = Left $ InputTooShort n
+makeOpcodeUnknown x = Left $ UnknownOpcode x
+
+-- Instruction without argument
+makeInst0 f _ = Right $ Tuple (Instruction0 f) 0
+
+makeInstI f (arg1:_) = Right $ Tuple (InstructionI f arg1) 1
+makeInstI _ _ = makeInputTooShort 1
+
+makeInstM f (arg1:_) = Right $ Tuple (InstructionM f arg1) 1
+makeInstM _ _ = makeInputTooShort 1
+
+makeInstC f (arg1:_) = Right $ Tuple (InstructionC f arg1) 1
+makeInstC _ _ = makeInputTooShort 1
+
+makeInstD f (arg1:_) = Right $ Tuple (InstructionD f (decodeDataAddress arg1)) 1
+makeInstD _ _ = makeInputTooShort 1
+
+makeInstID f (arg1:arg2:_) = Right $ Tuple (InstructionID f arg1 (decodeDataAddress arg2)) 2
+makeInstID _ _ = makeInputTooShort 2
+
+makeInstDI f (arg1:arg2:_) = Right $ Tuple (InstructionDI f (decodeDataAddress arg1) arg2) 2
+makeInstDI _ _ = makeInputTooShort 2
+
+makeInstDD f (arg1:arg2:_) = Right $ Tuple (InstructionDD f (decodeDataAddress arg1) (decodeDataAddress arg2)) 2
+makeInstDD _ _ = makeInputTooShort 2
+
+makeInstIDD f (arg1:arg2:arg3:_) = Right $ Tuple (InstructionIDD f arg1 (decodeDataAddress arg2) (decodeDataAddress arg3)) 3
+makeInstIDD _ _ = makeInputTooShort 3
+
+makeInstDID f (arg1:arg2:arg3:_) = Right $ Tuple (InstructionDID f (decodeDataAddress arg1) arg2 (decodeDataAddress arg3)) 3
+makeInstDID _ _ = makeInputTooShort 3
+
+makeInstDDD f (arg1:arg2:arg3:_) = Right $ Tuple (InstructionDDD f (decodeDataAddress arg1) (decodeDataAddress arg2) (decodeDataAddress arg3)) 3
+makeInstDDD _ _ = makeInputTooShort 3
+
+makeInstIIDD f (arg1:arg2:arg3:arg4:_) = Right $ Tuple (InstructionIIDD f arg1 arg2 (decodeDataAddress arg3) (decodeDataAddress arg4)) 4
+makeInstIIDD _ _ = makeInputTooShort 4
+
+makeInstDDDD f (arg1:arg2:arg3:arg4:_) = Right $ Tuple (InstructionDDDD f (decodeDataAddress arg1) (decodeDataAddress arg2) (decodeDataAddress arg3) (decodeDataAddress arg4)) 4
+makeInstDDDD _ _ = makeInputTooShort 4
+
+makeInstDDDC f (arg1:arg2:arg3:arg4:_) = Right $ Tuple (InstructionDDDC f (decodeDataAddress arg1) (decodeDataAddress arg2) (decodeDataAddress arg3) (decodeCodeAddress arg4)) 4
+makeInstDDDC _ _ = makeInputTooShort 4
+
+makeInstDDDDD f (arg1:arg2:arg3:arg4:arg5:_) = Right $ Tuple (InstructionDDDDD f (decodeDataAddress arg1) (decodeDataAddress arg2) (decodeDataAddress arg3) (decodeDataAddress arg4) (decodeDataAddress arg5)) 5
+makeInstDDDDD _ _ = makeInputTooShort 5
+
+makeInstDDDDDDD f (arg1:arg2:arg3:arg4:arg5:arg6:arg7:_) = Right $ Tuple (InstructionDDDDDDD f (decodeDataAddress arg1) (decodeDataAddress arg2) (decodeDataAddress arg3) (decodeDataAddress arg4) (decodeDataAddress arg5) (decodeDataAddress arg6) (decodeDataAddress arg7)) 7
+makeInstDDDDDDD _ _ = makeInputTooShort 7
+
+makeInstIIDDD f (arg1:arg2:arg3:arg4:arg5:_) = Right $ Tuple (InstructionIIDDD f arg1 arg2 (decodeDataAddress arg3) (decodeDataAddress arg4) (decodeDataAddress arg5)) 5
+makeInstIIDDD _ _ = makeInputTooShort 5
+
+decodeDataAddress :: Word16 -> DataAddress
+decodeDataAddress v | msb16 v == 0 = GlobalDataAddress v
+-- else: msb is one and lsb is zoro
+decodeDataAddress v | lsb v == 0 = LocalDataAddress ((v `and` 0x7fff) `div` 2)
+-- else: msb one lsb one:
+decodeDataAddress v | otherwise = LocalDataAddressRef ((v `and` 0x7fff) `div` 2)
+
+msb16 x = (x `shr` 15) `and` 1
+lsb x = x `and` 1
+
+decodeCodeAddress :: Word16 -> CodeAddress
+decodeCodeAddress v = v
+
diff --git a/src/InstructionSet.purs b/src/InstructionSet.purs
new file mode 100644
index 0000000..7e01b05
--- /dev/null
+++ b/src/InstructionSet.purs
@@ -0,0 +1,326 @@
+-- | EUMEL0 instruction set data types
+module InstructionSet where
+
+import Prelude
+import Data.Show
+import Data.Generic.Rep.Show
+import Data.Generic.Rep
+
+-- in `Machine.wordsize` bytes
+maxInstructionLen = 1+7
+
+-- Operands:
+-- Immediate, i.e. data encoded in instruction word
+type Immediate = Int
+-- Module number
+type Module = Int
+type CodeAddress = Int
+type Procedure = Int
+
+-- A data memory address; XXX: uses word-addressing?
+data DataAddress =
+	  GlobalDataAddress Int
+	| LocalDataAddress Int
+	| LocalDataAddressRef Int
+
+-- No operand
+data Op0 =
+	  ARITS
+	| ARITU
+	| CLRERR
+	| DSTOP
+	| ERTAB
+	| ESTOP
+	| GARB
+	| GETTAB
+	| GORET
+	| ISERR
+    | KE
+	| PUTTAB
+	| RTN
+	| RTNF
+	| RTNT
+	| SHUTUP
+    | STOP
+	| SYSG
+	| XREW
+
+-- One immediate Operand
+data OpI =
+	  LN
+	| LN1
+    | PENTER
+
+-- One CodeAddress operand
+data OpC =
+	  B
+	| B1
+	| GOSUB
+
+-- One Module operand
+data OpM =
+      CALL
+	| EXEC
+	| PPROC
+
+-- One DataAddress operand
+data OpD =
+	  ACT
+	| CHAEX
+	| CLEAR
+	| COUT
+	| DEACT
+	| DEC1
+	| DEFCOL
+	| DSFORG
+	| EXTASK
+	| HPSIZE
+	| INC1 
+	| INCETY
+	| INCHAR
+	| ISDIG
+	| ISLCAS
+	| ISLD
+	| ISSHA
+	| ISUCAS
+	| NILDS
+	| OUT
+	| PAUSE
+	| PCALL
+	| PNACT
+    | PP
+    | PPV
+	| SESSION
+	| SETERR
+	| SETNOW
+	| SYSOP
+	| TEND
+	| TEST
+	| THALT
+	| XERR -- XXX not sure about the first letter, IpagesDT in src/devel/misc/unknown/src/0DISASS.ELA
+	| XSKIP
+
+-- Two DataAddress operands
+data OpDD =
+	  AMUL256
+    | BCRD
+	| CAT
+	| CATINP
+	| CDBINT
+	| CDBTXT
+	| CLOCK
+    | CRD
+	| CTT
+	| DEC
+	| DECOD
+	| DREM256
+	| DSACC
+	| DSCOPY
+	| DSHEAP
+	| DSRTYP
+	| DSWTYP
+	| ENCOD
+	| EQU
+	| FEQU
+	| FLOOR
+	| FLSEQ
+	| FMOV
+	| FNEG
+	| GCPOS
+	| GEXP
+	| INC
+	| ID
+	| LSEQ
+	| MOV
+	| NEG
+	| OUTF
+	| REF
+	| ROTATE
+	| RPCB
+	| SEXP
+	| STORAGE
+	| TBEG
+	| TCPU
+	| TEQU
+	| TLEN
+	| TLSEQ
+	| TMOV
+	| TSTAT
+	| TWCPU
+	| ULSEQ
+	| WPCB
+
+data OpID =
+      EQUIM
+	| MOVII
+    | MOVI
+
+data OpDI =
+	  BRCOMP
+
+data OpDDD =
+	  ADD
+	| AND
+	| CWR
+	| DIV
+	| DSPAGS
+    | ECWR
+	| FADD
+	| FDIV
+	| FMUL
+	| FNONBL
+	| FSLD
+	| FSUB
+	| GADDR
+	| GCADDR
+	| GETC
+	| GW
+	| IMULT
+	| INFOPW
+	| ITRPL
+	| ITSUB
+	| MOD
+	| MUL
+	| NXTDSP
+	| OR
+	| OUTFT
+	| POS
+	| PW
+	| REPLAC
+	| RTRPL
+	| RTSUB
+	| SUB
+	| SUBT1
+	| SUBTF
+	| TRPCB
+	| TWPCB
+	| WAIT
+	| XOR
+	| XREAD
+	| XWRITE
+
+data OpDID =
+	  SEL
+
+data OpIDD =
+	  ALIAS
+    | MOVX
+	| MOVXX
+
+data OpIIDD =
+      GETW
+    | PUTW
+
+data OpDDDD =
+	  CONTRL
+	| POSF
+	| PPCALL
+	| SEND
+	| SUBTFT
+	| SWCALL
+
+data OpDDDC = TPBEGIN
+
+data OpIIDDD =
+	  SUBS
+
+data OpDDDDD =
+	  BLKIN
+	| BLKOUT
+	| SENDFT
+	| POSFT
+	| POSIF
+
+data OpDDDDDDD = STRANL
+
+-- | Instruction encodings
+-- | A single Instruction Opcode (Array Args) would be sufficient, but not
+-- | type-safe. Thus they are grouped by encoding (i.e. argument count and type).
+data Instruction =
+	  Instruction0 Op0
+	| InstructionI OpI Immediate
+	| InstructionC OpC CodeAddress
+	| InstructionM OpM Module
+	| InstructionD OpD DataAddress
+	| InstructionDD OpDD DataAddress DataAddress
+	| InstructionID OpID Immediate DataAddress
+	| InstructionDI OpDI DataAddress Immediate 
+	| InstructionDDD OpDDD DataAddress DataAddress DataAddress
+	| InstructionDID OpDID DataAddress Immediate DataAddress
+	| InstructionIDD OpIDD Immediate DataAddress DataAddress
+	| InstructionIIDD OpIIDD Immediate Immediate DataAddress DataAddress
+	| InstructionDDDD OpDDDD DataAddress DataAddress DataAddress DataAddress
+	| InstructionDDDC OpDDDC DataAddress DataAddress DataAddress CodeAddress
+	| InstructionIIDDD OpIIDDD Immediate Immediate DataAddress DataAddress DataAddress
+	| InstructionDDDDD OpDDDDD DataAddress DataAddress DataAddress DataAddress DataAddress
+	| InstructionDDDDDDD OpDDDDDDD DataAddress DataAddress DataAddress DataAddress DataAddress DataAddress DataAddress
+
+-- Until we figure out how to do this without generics
+derive instance genericOp0 :: Generic Op0 _
+instance showOp0 :: Show Op0 where
+    show = genericShow
+
+derive instance genericOpI :: Generic OpI _
+instance showOpI :: Show OpI where
+    show = genericShow
+
+derive instance genericOpC :: Generic OpC _
+instance showOpC :: Show OpC where
+    show = genericShow
+
+derive instance genericOpM :: Generic OpM _
+instance showOpM :: Show OpM where
+    show = genericShow
+
+derive instance genericOpD :: Generic OpD _
+instance showOpD :: Show OpD where
+    show = genericShow
+
+derive instance genericOpDD :: Generic OpDD _
+instance showOpDD :: Show OpDD where
+    show = genericShow
+
+derive instance genericOpID :: Generic OpID _
+instance showOpID :: Show OpID where
+    show = genericShow
+
+derive instance genericOpDI :: Generic OpDI _
+instance showOpDI :: Show OpDI where
+    show = genericShow
+
+derive instance genericOpDDD :: Generic OpDDD _
+instance showOpDDD :: Show OpDDD where
+    show = genericShow
+
+derive instance genericOpDID :: Generic OpDID _
+instance showOpDID :: Show OpDID where
+    show = genericShow
+
+derive instance genericOpIDD :: Generic OpIDD _
+instance showOpIDD :: Show OpIDD where
+    show = genericShow
+
+derive instance genericOpIIDD :: Generic OpIIDD _
+instance showOpIIDD :: Show OpIIDD where
+    show = genericShow
+
+derive instance genericOpDDDD :: Generic OpDDDD _
+instance showOpDDDD :: Show OpDDDD where
+    show = genericShow
+
+derive instance genericOpDDDC :: Generic OpDDDC _
+instance showOpDDDC :: Show OpDDDC where
+    show = genericShow
+
+derive instance genericOpDDDDD :: Generic OpDDDDD _
+instance showOpDDDDD :: Show OpDDDDD where
+    show = genericShow
+
+derive instance genericOpIIDDD :: Generic OpIIDDD _
+instance showOpIIDDD :: Show OpIIDDD where
+    show = genericShow
+
+derive instance genericOpDDDDDDD :: Generic OpDDDDDDD _
+instance showOpDDDDDDD :: Show OpDDDDDDD where
+    show = genericShow
+
diff --git a/src/Machine.purs b/src/Machine.purs
new file mode 100644
index 0000000..7ee4a55
--- /dev/null
+++ b/src/Machine.purs
@@ -0,0 +1,27 @@
+-- Machine constants
+module Machine where
+
+import Prelude
+
+-- | Machine’s word size
+wordsize :: Int
+wordsize = 2
+
+-- | Code offset in default dataspace 4 in `wordsize`
+codeOffset :: Int
+codeOffset = 0x40000 `div` wordsize
+-- | Code section size in `wordsize`
+codeSize :: Int
+codeSize = (128*1024) `div` wordsize
+
+-- | Pagesize in `wordsize`, documentation sometimes calls this `ps`
+pagesize :: Int
+pagesize = 512 `div` wordsize
+
+-- | Max size af a single code module, `cms`, in words
+codeModuleSize :: Int
+codeModuleSize = 4096
+
+defaultDsId :: Int
+defaultDsId = 4
+
diff --git a/src/Main.purs b/src/Main.purs
new file mode 100644
index 0000000..4906542
--- /dev/null
+++ b/src/Main.purs
@@ -0,0 +1,99 @@
+module Main (main, fileToArrayView) where
+
+import Prelude
+import Effect (Effect, foreachE)
+import Effect.Console (log)
+import Data.List
+import Data.List as L
+import Data.Tuple
+import Data.Int
+import Data.String.Utils (length, repeat)
+import Data.Maybe (fromJust, Maybe(..), maybe)
+import Partial.Unsafe (unsafePartial)
+import Node.FS.Sync (readFile)
+import Node.Buffer (toArrayBuffer)
+import Data.ArrayBuffer.Typed (whole, toArray, (!))
+import Data.ArrayBuffer.Types (Uint16, ArrayView)
+import Data.UInt (toInt)
+import Data.Either (either)
+import Control.Monad.Rec.Class
+import Node.Process (argv)
+import Data.Array ((!!))
+
+import Decode
+import Asm
+import InstructionSet
+import Machine
+
+-- | Left-pad a string `s` to at least `num` characters with `pad`
+lpad :: Int -> String -> String -> String
+lpad num pad s = prefix <> s
+	where
+		prefix = case repeat (num-(length s)) pad of
+			Just x -> x
+			Nothing -> ""
+
+-- | Right-pad a string `s` to at least `num` characters with `pad`
+rpad :: Int -> String -> String -> String
+rpad num pad s = s <> prefix
+	where
+		prefix = case repeat (num-(length s)) pad of
+			Just x -> x
+			Nothing -> ""
+
+-- | Hex-format number `v`, adding padding to at least `len` characters
+toHex :: Int -> Int -> String
+toHex len v = lpad len "0" $ toStringAs hexadecimal v
+
+hexdump :: List Int -> String
+hexdump words = intercalate " " $ map (toHex 4) words
+
+-- Format instruction with hexdump @width
+formatInst :: Int -> Int -> Tuple (List Int) Instruction -> String
+formatInst address width (Tuple words inst) = toHex 8 address <> " | " <> rpad width " " (hexdump words) <> " | " <> (rpad 10 " " mnemonic) <> "  " <> (intercalate ", " args)
+	where
+		asmInst = toAsm inst
+		mnemonic = unsafePartial $ fromJust $ head asmInst
+		args = unsafePartial $ fromJust $ tail asmInst
+
+-- | Create ArrayView from a file `name`.
+fileToArrayView :: String -> Effect (ArrayView Uint16)
+fileToArrayView name = (readFile name) >>= toArrayBuffer >>= whole
+
+logInst address = (log <<< (formatInst address (hexWords*hexWordWidth+hexWords*spaceWidth)))
+	where
+		hexWordWidth = 4
+		hexWords = maxInstructionLen
+		spaceWidth = 1
+
+-- Take n words from an ArrayView starting at offset
+takeView :: Int -> Int -> ArrayView Uint16 -> Effect (List Int)
+takeView _ 0 _ = pure Nil
+takeView offset n view = (view ! offset) >>= maybe (pure Nil) recurse
+	where
+		recurse v = ((:) (toInt v)) <$> (takeView (offset+1) (n-1) view)
+
+-- Disassemble memory in @view starting at @startAddress
+disasm :: Int -> ArrayView Uint16 -> Effect Unit
+disasm startAddress view = tailRecM go startAddress
+	where
+		go address = do
+			words <- takeView address maxInstructionLen view
+			either invalid (continue address words) $ decode words
+		invalid (InputTooShort x) = do
+			log $ "input too short " <> show x
+			pure $ Done unit
+		invalid (UnknownOpcode x) = do
+			log $ "unknown opcode " <> show x
+			pure $ Done unit
+		continue address words (Tuple inst len) = do
+			logInst (address*wordsize) (Tuple (take len words) inst)
+			pure $ Loop(address+len)
+			
+main :: Effect Unit
+main = secondArg >>= (maybe argumentMissing doDisasm)
+	where
+		secondArg = (flip (!!) $ 2) <$> argv
+		argumentMissing = log "Missing argument"
+		doDisasm file = fileToArrayView file >>= disasm codeOffset
+
-- 
cgit v1.2.3