asm: rewrite all; keep the old code still in place to merge changes incrementally
3 files changed, 106 insertions(+), 17 deletions(-)

M asm.fox
M malloc.fox
M mandelbrot-asm.fox
M asm.fox +105 -15
@@ 25,6 25,9 @@ mov %eax,   -100(%eax)
 
 \ addressing modes:
 \ https://stackoverflow.com/questions/34058101/referencing-the-contents-of-a-memory-location-x86-addressing-modes/34058400#34058400
+
+\ https://gist.github.com/mikesmullin/6259449
+
 (
 [base_reg + index_reg*scale + displacement] 
 [ base_reg + index_reg *scale + displacement ] 

          
@@ 75,22 78,107 @@ mov %eax,   -100(%eax)
 \ REX.W +	05 id		ADD RAX, imm32		Add imm32 sign-extended to 64-bits to RAX
 \ REX.W +	83 /0 ib	ADD r/m64, imm8		Add sign-extended imm8 to r/m64
 
-\ encoding, try this:
-(
-mov eax, [rcx+rbx*2+2346784]
-mov rax, [rcx+rbx*2+2346784]
-mov eax, [ecx+ebx*2+2346784]
-mov rax, [ecx+ebx*2+2346784]
-)
+\ addressing modes (acting also as registers)
+\ can be assigned to the first operand of an instruction by using the
+\ mod and r/m field of the modrm byte.
+\ the reg field is the second operand (a register) or an extension
+\ to the opcode, if only one operand is required.
+\
+\ the lowest byte resembles the modr/m.mod and modr/m.r/m fields:
+\ 0-2:	reg# as taken from intel doc; modr/m.r/m field
+\ 3-5:	empty; to be used to store modr/m.reg(.opcode) field
+\ 6-7:	modr/m.mod field
+\ 8:	operand size (only valid for registers, mod=11b)
+\		the idea is to use this for the rex.w field
+\		0: 32 bit, 1: 64 bit
+\ 9:	1: extended register; 0: x86-based register; used for
+\		rex.[rxb] field
+
+\ no support for [eax] etc., because this would require a 0x67 prefix			
+
+\ modr/m.mod = 00b
+00 const [rax]	01 const [rcx]	02 const [rdx]	03 const [rbx]
+\ 04 (would be [RSP]) and 05 (would be [EBP]) are special
+06 const [rsi]	07 const [rdi]
+200 const [r8]	201 const [r9]	202 const [r10]	203 const [r11]
+204 const [r12]	205 const [r13]	206 const [r14]	207 const [r15]
+
+\ modr/m.mod = 11b
+c0 const eax	c1 const ecx	c2 const edx	c3 const ebx
+c4 const esp	c5 const ebp	c6 const esi	c7 const edi
+2c0 const r8d	2c1 const r9d	2c2 const r10d	2c3 const r11d
+2c4 const r12d	2c5 const r13d	2c6 const r14d	2c7 const r15d
+
+\ modr/m.mod = 11b
+1c0 const rax	1c1 const rcx	1c2 const rdx	1c3 const rbx
+1c4 const rsp	1c5 const rbp	1c6 const rsi	1c7 const rdi
+3c0 const  r8	3c1 const  r9	3c2 const r10	3c3 const r11
+3c4 const r12	3c5 const r13	3c6 const r14	3c7 const r15
+
+			( r1 -- regid )
+: r.reg		7 and ;
+
+			( r1 -- modrm-byte )
+: r.modrm	ff and ;
+
+			( r1 -- mod )
+: r.mod		c0 and ;
 
-\ registers:
+			\ fold 2 operands into a modrm byte
+			( r r/m64  -- modrm-byte )
+: fuse	 	r.modrm swap r.reg 2* 2* 2* or ;
+			
+			\ zf==1: operand is a memory address (e.g. [reg])
+			( r/m64 -- r/m64 [zf] )
+: m64?		dup r.mod drop ;
+
+			( r1 -- n )
+: regsize 	100 and 40 / 4 + ;
+
+			( r1 -- n )
+: r.rex.w	100 and 20 / ;
+: r.rex.r	200 and 80 / ;
+: r.rex.b	200 / ;
+
+			( r r/m64 -- n )
+: rex.rb	r.rex.b swap r.rex.r or ;
+
+			( r r/m64 -- n )
+: rex.wrb	over r.rex.w rpush rex.rb rpop or ;
+
+			( rex.[wrxb] -- )
+: rex,		0=? if 40 or ,1 ;; then drop ;
+
+			( r/m64 r/m64 opcode -- r r/m64 opcode )
+: sort		uber m64? 0if 2 or ( reverse ) rpush swap rpop then ;
+
+			( r/m64 r/m64 opcode -- )
+: alu		sort uber uber rex.wrb rex, ,1 fuse ,1 ;
+
+			( r/m64 opcode digit -- )
+: /digit	swap rpush over fuse swap r.rex.b rex, rpop ,1 ,1 ;
+
+			( r/m64 opcode -- )
+: /0 0 /digit ;   : /1 1 /digit ;   : /2 2 /digit ;   : /3 3 /digit ;
+: /4 4 /digit ;   : /5 5 /digit ;   : /6 6 /digit ;   : /7 7 /digit ;
+
+			( r/m64 opcode -- )
+: +ro		swap r.rex.b rex, r.reg or ,1 ;
+
+			\ encode immediate operands
+			( n -- )
+: ib ,1 ;	: iw ,2 ;	: id ,4 ;	: io ,4 ;
+
+\ registers including addressing mode:
 \ 0-3: reg# as taken from intel doc
-\ 4-5: byte size /4; 32 bit: 01b, 64 bit: 10b
+\ 4-5: byte size; 32 bit: 01b, 64 bit: 10b
 \ 6-7: modrm.mod bytes
 
 \ modrm.mod = 11b
 d0 const eax	d1 const ecx	d2 const edx	d3 const ebx
 d4 const esp	d5 const ebp	d6 const esi	d7 const edi
+d8 const r8d	d9 const r9d	da const r10d	db const r11d
+dc const r12d	dd const r13d	de const r14d	df const r15d
 
 \ modrm.mod = 11b
 e0 const rax	e1 const rcx	e2 const rdx	e3 const rbx

          
@@ 99,11 187,13 @@ e8 const  r8	e9 const  r9	ea const r10	e
 ec const r12	ed const r13	ee const r14	ef const r15
 
 \ modrm.mod = 00b
+\ no support for [eax] ..., because this would require a 0x67 prefix			
 20 const [rax]	21 const [rcx]	22 const [rdx]	23 const [rbx]
-( [RSP] and [EBP] special )		26 const [rsi]	27 const [rdi]
+\ 24 (would be [RSP]) and 25 (would be [EBP]) are special
+26 const [rsi]	27 const [rdi]
 28 const  [r8]	29 const  [r9]	2a const [r10]	2b const [r11]
 2c const [r12]	2d const [r13]	2e const [r14]	2f const [r15]
-			
+
 			( r1 -- regid )
 : reg		7 and ;
 

          
@@ 111,7 201,7 @@ 2c const [r12]	2d const [r13]	2e const [
 : r.mod		c0 and ; 
 			
 			\ zf==1: operand is a memory address like [reg]
-			( r1 -- r1 [zf] )
+			( r/m64 -- r/m64 [zf] )
 : m64?		dup r.mod drop ;
 
 			( r1 -- n )

          
@@ 141,7 231,7 @@ 2c const [r12]	2d const [r13]	2e const [
 			( r1 opcode -- )
 : +ro		swap rex.b, reg or ,1 ;
 	
-			\ the modrm.mod field.
+			\ push a modrm byte and set the modrm.mod field.
 			( r1 r2 -- modrm.mod )
 : modrm.mod	r.mod swap r.mod and ;
 

          
@@ 163,7 253,7 @@ 2c const [r12]	2d const [r13]	2e const [
 			\ are specified by following the opcode with a slash (/)
 			\ and a digit 0-7."
 			\ encode /digit
-			( r1 instruction_opcode modrm.opcode -- )
+			( r1 instruction-opcode modrm-opcode -- )
 : /digit	swap ,1 over r.mod swap modrm.code swap modrm.r/m ,1 ;
 
 			( r1 r2 modrm.opcode -- )

          
@@ 178,7 268,7 @@ 2c const [r12]	2d const [r13]	2e const [
 		( r1 r2 -- )
 : /r2 	over over modrm.mod swap modrm.r/m swap modrm.reg ,1 ;
 
-			( r1 modrm -- )
+			( r1 instruction-opcode -- )
 : /0 0 /digit ;   : /1 1 /digit ;   : /2 2 /digit ;   : /3 3 /digit ;
 : /4 4 /digit ;   : /5 5 /digit ;   : /6 6 /digit ;   : /7 7 /digit ;
 

          
M malloc.fox +1 -1
@@ 6,7 6,7 @@ hex
 			3			\ prot: PROT_READ | PROT_WRITE
 			62			\ flags: MAP_PRIVATE |  MAP_ANONYMOUS | MAP_32BI
 			9			\ mmap syscall
-			syscall4 ;	\ fd, offset ignored for MAP_ANONYMOUS
+			syscall4 ;	\ fd and offset params are ignored for MAP_ANONYMOUS
 
 \ cleaned up automatically when process exits, see man (2) mmap
 \ : umalloc ;

          
M mandelbrot-asm.fox +0 -1
@@ 147,7 147,6 @@ image 	\ address, where we store the ima
 
 : setup 		() setdelta adjustx adjusty alloc-space ;
 
-
 : create-file	() " apple.pgm" ( oct 644 ) 1a4 creat fh ! ;
 
 : write-header	() ff  rows @  cols @  pgmheader ;