A => README.md +25 -0
@@ 0,0 1,25 @@
+# What is this
+This is an attempt to write a compiler from Lua 5.4 bytecode to
+Why? As the name implies, to have something to do.
+# Building/running
+Runs with Fennel 1.2.0 or maybe something else.
+This uses integer math and bitops for opcodes, so requires Lua >= 5.3 to
+run. Probably assumes it's running on a 64-bit machine, which is
+hilarious since 64-bit webassembly kinda doesn't exist yet.
+# References
+ * <https://www.lua.org/ftp/lua-5.4.4.tar.gz>
+ * <https://the-ravi-programming-language.readthedocs.io/en/latest/lua_bytecode_reference.html>
+ (outdated)
+# License
+Mozilla Public License 2.0
A => src/main.fnl +226 -0
@@ 0,0 1,226 @@
+; From lopcodes.h:
+; /*===========================================================================
+; We assume that instructions are unsigned 32-bit integers.
+; All instructions have an opcode in the first 7 bits.
+; Instructions can have the following formats:
+; 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
+; 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+; iABC C(8) | B(8) |k| A(8) | Op(7) |
+; iABx Bx(17) | A(8) | Op(7) |
+; iAsBx sBx (signed)(17) | A(8) | Op(7) |
+; iAx Ax(25) | Op(7) |
+; isJ sJ(25) | Op(7) |
+; A signed argument is represented in excess K: the represented value is
+; the written unsigned value minus K, where K is half the maximum for the
+; corresponding unsigned argument.
+; ===========================================================================*/
+; enum OpMode {iABC, iABx, iAsBx, iAx, isJ}; /* basic instruction formats */
+; /*
+; ** size and position of opcode arguments.
+; */
+; #define SIZE_C 8
+; #define SIZE_B 8
+; #define SIZE_Bx (SIZE_C + SIZE_B + 1)
+; #define SIZE_A 8
+; #define SIZE_Ax (SIZE_Bx + SIZE_A)
+; #define SIZE_sJ (SIZE_Bx + SIZE_A)
+; #define SIZE_OP 7
+; #define POS_OP 0
+; #define POS_A (POS_OP + SIZE_OP)
+; #define POS_k (POS_A + SIZE_A)
+; #define POS_B (POS_k + 1)
+; #define POS_C (POS_B + SIZE_B)
+; #define POS_Bx POS_k
+; #define POS_Ax POS_A
+; #define POS_sJ POS_A
+; ]]
+; enum OpMode {iABC, iABx, iAsBx, iAx, isJ}; /* basic instruction formats */
+;; Opcodes with metadata, as listed in lopcodes.h
+;; `code` is the integer ID of the opcode from the Lua OpCode struct
+;; Valid formats are per the instruction formats above:
+;; :iABC :iABx :iAsBx :iAx :isJ
+;; It seems that many instructions use :iABC format but do not use
+;; all the args?
+;; :name is the printable name of the opcode.
+(local Opcodes [
+ {:name :OpMove :format :iABC}
+ {:name :OpLoadI :format :iAsBx}
+ {:name :OpLoadF :format :iAsBx}
+ {:name :OpLoadK :format :iABx}
+ {:name :OpLoadKX :format :iABC}
+ {:name :OploadFalse :format :iABC}
+ {:name :OpLFalseSkip :format :iABC}
+ {:name :OpLoadTrue :format :iABC}
+ {:name :OpLoadNil :format :iABC}
+ {:name :OpGetUpval :format :iABC}
+ {:name :OpSetUpval :format :iABC}
+ {:name :OpGetTabUp :format :iABC}
+ {:name :OpGetTable :format :iABC}
+ {:name :OpGetI :format :iABC}
+ {:name :OpGetField :format :iABC}
+ {:name :OpSetTabUp :format :iABC}
+ {:name :OpSetTable :format :iABC}
+ {:name :OpSetI :format :iABC}
+ {:name :OpSetField :format :iABC}
+ {:name :OpSelf :format :iABC}
+ {:name :OpAddI :format :iABC}
+ {:name :OpAddK :format :iABC}
+ {:name :OpSubK :format :iABC}
+ {:name :OpMulK :format :iABC}
+ {:name :OpModK :format :iABC}
+ {:name :OpPowK :format :iABC}
+ {:name :OpDivK :format :iABC}
+ {:name :OpIDivK :format :iABC}
+ {:name :OpBAndK :format :iABC}
+ {:name :OpBOrK :format :iABC}
+ {:name :OpBXorK :format :iABC}
+ {:name :OpShrI :format :iABC}
+ {:name :OpShlI :format :iABC}
+ {:name :OpAdd :format :iABC}
+ {:name :OpSub :format :iABC}
+ {:name :OpMul :format :iABC}
+ {:name :OpMod :format :iABC}
+ {:name :OpPow :format :iABC}
+ {:name :OpDiv :format :iABC}
+ {:name :OpIDiv :format :iABC}
+ {:name :OpBand :format :iABC}
+ {:name :OpBor :format :iABC}
+ {:name :OpBXor :format :iABC}
+ {:name :OpShl :format :iABC}
+ {:name :OpShr :format :iABC}
+ {:name :OpMMBin :format :i}
+ {:name :OpMMBinI :format :i}
+ {:name :OpMMBinK :format :i}
+ {:name :OpUnm :format :iABC} ; unary minus
+ {:name :OpBNot :format :iABC}
+ {:name :OpNot :format :iABC}
+ {:name :OpLen :format :iABC}
+ {:name :OpConcat :format :iABC}
+ {:name :OpClose :format :iABC}
+ {:name :OpTbc :format :iABC} ; "to be closed"
+ {:name :OpJmp :format :isJ}
+ {:name :OpEq :format :iABC}
+ {:name :OpLt :format :iABC}
+ {:name :OpLe :format :iABC}
+ {:name :OpEqK :format :iABC}
+ {:name :OpEqI :format :iABC}
+ {:name :OpLtI :format :iABC}
+ {:name :OpLeI :format :iABC}
+ {:name :OpGtI :format :iABC}
+ {:name :OpGeI :format :iABC}
+ {:name :OpTest :format :iABC}
+ {:name :OpTestSet :format :iABC}
+ {:name :OpCall :format :iABC}
+ {:name :OpTailCall :format :iABC}
+ {:name :OpReturn :format :iABC}
+ {:name :OpReturn0 :format :iABC}
+ {:name :OpReturn1 :format :iABC}
+ {:name :OpForLoop :format :iABx}
+ {:name :OpForPrep :format :iABx}
+ {:name :OpTForPrep :format :iABx}
+ {:name :OpTForCall :format :iABC}
+ {:name :OpTForLoop :format :iABx}
+ {:name :OpSetList :format :iABC}
+ {:name :OpClosure :format :iABx}
+ {:name :OpVarArg :format :iABC}
+ {:name :OpVarArgPrep :format :iABC}
+ {:name :OpExtraArg :format :iAx}])
+(local OpcodeNames
+ (collect [ky vl (pairs Opcodes)]
+ (values vl.name ky)))
+(fn opcode-idx [op]
+ "Returns the opcode number of the given opcode name"
+ (- (. OpcodeNames op) 1))
+(fn idx-opcode [i]
+ "Returns the opcode struct for the opcode with the given number"
+ (. Opcodes (+ i 1)))
+(fn extract [i offset len]
+ "Extract `len` bits of integer `i`, starting from bit `offset`
+ This operates on 32-bit integers. If you hand it an integer with
+ a bit >31 set it will not do the right thing."
+ ; From lopcodes.h to generate a mask with n 1 bits at position p:
+ ; ((~((~(Instruction)0)<<(n)))<<(p))
+ (let [mask (lshift (bnot (lshift (bnot 0) len)) offset)]
+ (rshift (band i mask) offset)))
+(fn decode-abc [i]
+ "Decode an instruction in the :iABC format"
+ 0)
+(fn decode-abx [i]
+ "Decode an instruction in the :iABx format"
+ 0)
+(fn decode-absbx [i]
+ "Decode an instruction in the :iAsBx format"
+ 0)
+(fn decode-ax [i]
+ "Decode an instruction in the :iAx format"
+ 0)
+(fn decode-asj [i]
+ "Decode an instruction in the :isJ format"
+ 0)
+(fn decode [i]
+ "Decodes a 32-bit integer into a table containing a Lua opcode"
+ (let [OpSize 7 ; Size of the opcode field, in bits
+ OpPos 0 ; Starting bit number of the opcode field
+ opcode (extract i OpPos OpSize)
+ opcode-format (. (idx-opcode opcode) :format)]
+ (print (string.format "Instruction %d is opcode %i named %s with format %s"
+ i
+ opcode
+ (. (idx-opcode opcode) :name)
+ opcode-format))))
+(print "Opcode" 3 "is" (. (idx-opcode 3) :name))
+(print "Opcode :OpGetI is" (opcode-idx :OpGetI))
+(decode 0x03)