M Makefile +8 -3
@@ 2,7 2,7 @@ AS = nasm
ASFLAGS = -f elf64
bins = fox swapwm avx foxx \
- mandelbrot mandelbrot-mt mandelbrot-asm
+ mandelbrot mandelbrot-mt mandelbrot-asm asm-test
all: $(bins)
@@ 37,13 37,13 @@ srcs-mandelbrot = system.fox fileio.fox
mandelbrot.o: fox $(srcs-mandelbrot)
cat $(srcs-mandelbrot) | ./fox
-srcs-mandelbrot-asm = system.fox fileio.fox elf64.fox asm.fox \
+srcs-mandelbrot-asm = system.fox fileio.fox asm.fox elf64.fox \
bootstrap2.fox fileio.fox malloc.fox \
mandelbrot-asm.fox bye.fox
mandelbrot-asm.o: fox $(srcs-mandelbrot-asm)
cat $(srcs-mandelbrot-asm) | ./fox
-srcs-mandelbrot-mt = system.fox fileio.fox elf64.fox asm.fox \
+srcs-mandelbrot-mt = system.fox fileio.fox asm.fox elf64.fox \
bootstrap2.fox fileio.fox malloc.fox \
thread.fox mandelbrot-mt.fox bye.fox
mandelbrot-mt.o: fox $(srcs-mandelbrot-mt)
@@ 59,6 59,11 @@ srcs-foxx = system.fox asm.fox fileio.fo
foxx.o: fox $(srcs-foxx)
cat $(srcs-foxx) | ./fox
+srcs-asmtest = system.fox asm.fox fileio.fox elf64.fox \
+ bootstrap2.fox asmtest.fox bye.fox
+asmtest.o: fox $(srcs-asmtest)
+ cat $(srcs-asmtest) | ./fox
+
clean:
rm -f *.o $(bins) bootstrap2
M asm.fox +104 -168
@@ 88,9 88,13 @@ mov %eax, -100(%eax)
\ 0-2: reg# as taken from intel doc; modr/m.r/m field
\ 3-5: empty; to be used to store modr/m.reg(.opcode) field
\ 6-7: modr/m.mod field
-\ 8: operand size (only valid for registers, mod=11b)
-\ the idea is to use this for the rex.w field
+\ 8: operand size, used for setting the rex.w field
\ 0: 32 bit, 1: 64 bit
+\ per default this flag is set for 64 bit registers only
+\ but the qword wort will set this no matter what,
+\ we need this to enforce 64 bit mode when there is no
+\ register, like imul [rcx]. without qword this operation
+\ can not be assembled for rax, because [rcx] is 32 bit.
\ 9: 1: extended register; 0: x86-based register; used for
\ rex.[rxb] field
@@ 115,6 119,9 @@ 1c4 const rsp 1c5 const rbp 1c6 const rs
3c0 const r8 3c1 const r9 3c2 const r10 3c3 const r11
3c4 const r12 3c5 const r13 3c6 const r14 3c7 const r15
+ ( r/m32|64 -- r/m64 )
+: qword 100 or ;
+
( r1 -- regid )
: r.reg 7 and ;
@@ 140,176 147,131 @@ 3c4 const r12 3c5 const r13 3c6 const r1
: r.rex.r 200 and 80 / ;
: r.rex.b 200 / ;
+ \ apply rex.w to both operands so we can use reg.opcode as r
+ \ argument in /digit addressing mode
+ ( r r/m64 -- n )
+: rex.w r.rex.w swap r.rex.w or ;
+
( r r/m64 -- n )
: rex.rb r.rex.b swap r.rex.r or ;
( r r/m64 -- n )
-: rex.wrb over r.rex.w rpush rex.rb rpop or ;
+: rex.wrb over over rex.rb rpush rex.w rpop or ;
- ( rex.[wrxb] -- )
-: rex, 0=? if 40 or ,1 ;; then drop ;
+ \ compile a rex prefix if necessary
+ ( rex.[wrxb] -- )
+: rex, 0=? if 40 or ,1 ;; then drop ;
+
+ \ todo: 2 byte ops
+ ( n -- )
+: ,op 100 =? drop -if ,1 ;; then ,2 ;
- ( r/m64 r/m64 opcode -- r r/m64 opcode )
-: sort uber m64? 0if 2 or ( reverse ) rpush swap rpop then ;
+ \ intel manual 2, 3.1.1.1:
+ \ "/r indicates that the modrm byte of the instruction
+ \ contains a register operand and a r/m operand."
+ ( r r/m64 opcode -- )
+: /r uber uber rex.wrb rex, ,op fuse ,1 ;
- ( r/m64 r/m64 opcode -- )
-: alu sort uber uber rex.wrb rex, ,1 fuse ,1 ;
-
+ \ intel manual 2, 3.1.1.1:
+ \ "/digit: Some instructions cannot make use of the
+ \ REG portion of the ModR/M byte. Many of these
+ \ instructions are "multiplexed" using this field,
+ \ where a single opcode can refer to multiple instructions,
+ \ and the REG field determines the instruction.
+ \ In opcode listings, these are specified by following
+ \ the opcode with a slash (/) and a digit 0-7."
( r/m64 opcode digit -- )
-: /digit swap rpush over fuse swap r.rex.b rex, rpop ,1 ,1 ;
+: /digit swap rpush swap rpop /r ;
( r/m64 opcode -- )
: /0 0 /digit ; : /1 1 /digit ; : /2 2 /digit ; : /3 3 /digit ;
: /4 4 /digit ; : /5 5 /digit ; : /6 6 /digit ; : /7 7 /digit ;
- ( r/m64 opcode -- )
-: +ro swap r.rex.b rex, r.reg or ,1 ;
-
- \ encode immediate operands
- ( n -- )
-: ib ,1 ; : iw ,2 ; : id ,4 ; : io ,4 ;
-
-\ registers including addressing mode:
-\ 0-3: reg# as taken from intel doc
-\ 4-5: byte size; 32 bit: 01b, 64 bit: 10b
-\ 6-7: modrm.mod bytes
-
-\ modrm.mod = 11b
-d0 const eax d1 const ecx d2 const edx d3 const ebx
-d4 const esp d5 const ebp d6 const esi d7 const edi
-d8 const r8d d9 const r9d da const r10d db const r11d
-dc const r12d dd const r13d de const r14d df const r15d
-
-\ modrm.mod = 11b
-e0 const rax e1 const rcx e2 const rdx e3 const rbx
-e4 const rsp e5 const rbp e6 const rsi e7 const rdi
-e8 const r8 e9 const r9 ea const r10 eb const r11
-ec const r12 ed const r13 ee const r14 ef const r15
-
-\ modrm.mod = 00b
-\ no support for [eax] ..., because this would require a 0x67 prefix
-20 const [rax] 21 const [rcx] 22 const [rdx] 23 const [rbx]
-\ 24 (would be [RSP]) and 25 (would be [EBP]) are special
-26 const [rsi] 27 const [rdi]
-28 const [r8] 29 const [r9] 2a const [r10] 2b const [r11]
-2c const [r12] 2d const [r13] 2e const [r14] 2f const [r15]
-
- ( r1 -- regid )
-: reg 7 and ;
-
- ( r1 -- mod )
-: r.mod c0 and ;
-
- \ zf==1: operand is a memory address like [reg]
- ( r/m64 -- r/m64 [zf] )
-: m64? dup r.mod drop ;
-
- ( r1 -- n )
-: regsize 30 and 2/ 2/ ;
-
-\ Intel Manual II, 2.2.1.7 points out ops with default operand size = 64:
-\ - near branches
-\ - all ops, except far branches, that implicitely reference rsp ( push .e.g.)
- \ sets zf
- ( r1 -- rex.? )
-: rex.r 8 and 2 / ;
-: rex.b 8 and 8 / ;
-: rex.w 20 and 2/ 2/ ;
-
-: rex 40 ;
+ \ intel manual 2, 3.1.1.1:
+ \ +rb, +rw, +rd, +ro: Indicated the lower 3 bits of the
+ \ opcode byte is used to encode the register operand
+ \ without a modR/M byte. The instruction lists the
+ \ corresponding hexadecimal value of the opcode byte with
+ \ low 3 bits as 000b. ... In 64-bit mode, indicates the four
+ \ bit field of REX.b and opcode[2:0] field encodes the
+ \ register operand of the instruction. “+ro” is applicable only
+ \ in 64-bit mode. See Table 3-1 for the codes.
+ ( r/m64 opcode -- )
+: +ro over dup r.rex.b swap r.rex.w or rex, swap r.reg or ,1 ;
-: rex.w, rex 8 or ,1 ;
+ \ encode immediate operands
+ ( n -- )
+: ib ,1 ; : iw ,2 ; : id ,4 ; : io ,8 ;
- ( r1 r2 -- )
-: rex2, over over or rex.w 0=? if rpush rex.b swap rex.r + rpop + 40 + ,1 ;; then drop drop drop ;
- ( r1 -- )
-: rex, dup rex.w 0=? if swap rex.b + 40 + ,1 ;; then drop drop ;
-
- ( r1 -- r1 )
-: rex.b, dup rex.b if dup rex or ,1 then drop ;
-
- ( r1 opcode -- )
-: +ro swap rex.b, reg or ,1 ;
-
- \ push a modrm byte and set the modrm.mod field.
- ( r1 r2 -- modrm.mod )
-: modrm.mod r.mod swap r.mod and ;
- \ set the modrm.reg/opcode field to register/opcode n
- ( modrm r1 -- )
-: modrm.reg reg # \ fall through
- ( modrm opcode -- )
-: modrm.code 8 * or ;
-
- \ set the modrm.r/m field to register
- ( modrm r1 -- )
-: modrm.r/m reg or ;
+: inc, ( r/m64 -- ) ff /0 ;
+: dec, ( r/m64 -- ) ff /1 ;
- \ "Some instructions cannot make use of the REG portion of
- \ the ModR/M byte. Many of these instructions are
- \ "multiplexed" using this field, where a single opcode
- \ can refer to multiple instructions, and the REG field
- \ determines the instruction. In opcode listings, these
- \ are specified by following the opcode with a slash (/)
- \ and a digit 0-7."
- \ encode /digit
- ( r1 instruction-opcode modrm-opcode -- )
-: /digit swap ,1 over r.mod swap modrm.code swap modrm.r/m ,1 ;
-
- ( r1 r2 modrm.opcode -- )
-: /r reg /digit ;
+ \ use qword for m64
+: shl, ( n r/m64 -- ) c1 /4 ib ;
+: sar, ( n r/m64 -- ) c1 /7 ib ;
- \ "indicates that the modrm byte of the instruction contains
- \ a register operand and a r/m operand."
- \ todo: only register-register supported
- \ todo: check r1 and r2 if we have a mem and encode accordingly
- \ we need to change the direction bit of the opcode eventually,
- \ so put that on the stack as well: ( r1 r2 opcode -- )
- ( r1 r2 -- )
-: /r2 over over modrm.mod swap modrm.r/m swap modrm.reg ,1 ;
-
- ( r1 instruction-opcode -- )
-: /0 0 /digit ; : /1 1 /digit ; : /2 2 /digit ; : /3 3 /digit ;
-: /4 4 /digit ; : /5 5 /digit ; : /6 6 /digit ; : /7 7 /digit ;
-
-\ encode immediate operands
-: ib ,1 ; : iw ,2 ; : id ,4 ; : io ,4 ;
+ \ remove rex.w flag for ops with default operand size of 64 bit.
+ ( r64 -- r/m32 )
+: no-rex.w # -1 100 xor # lit and ;
\ push r/m64
- ( r1 -- )
-: push m64? if ( push r64 ) 50 +ro ;; then ( push m64 ) ff /6 ;
+ ( r/m64 -- )
+: push, m64? if ( push r64 ) no-rex.w 50 +ro ;; then ( push m64 ) ff /6 ;
\ pop r/m64
- ( r1 -- )
-: pop m64? if ( pop r64 ) 58 +ro ;; then ( pop m64 ) 8f /0 ;
+ ( r/m64 -- )
+: pop, m64? if ( pop r64 ) 58 +ro ;; then ( pop m64 ) 8f /0 ;
+
+ \ stack sort - sort arguments for alu/mov instruction.
+ \ we normalize the operands and eventually have to clear
+ \ the direction bit of the opcode.
+ ( r/m64 r/m64 opcode -- r r/m64 opcode )
+: ssort uber m64? drop 0if 2 - ( reverse ) rpush swap rpop then ;
+
+ ( r/m64 r/m64 opcode -- )
+: alu ssort /r ;
+
+ \ copy/add/sub... tos: src, nos: dst
+ ( r/m64 r/m64 -- )
+: mov, 8b alu ;
+: add, 03 alu ;
+: sub, 2b alu ;
+: cmp, 3b alu ;
+
+: rex.w, () rax r.rex.w rex, ;
+
+ ( imm32 opcode -- )
+: aluraxi rex.w, #
+: alueaxi ,1 id ;
( n -- )
-: cmpraxi rex.w, #
-: cmpeaxi 3d ,1 id ;
-: andraxi rex.w, #
-: andeaxi 25 ,1 id ;
+: cmpraxi, 3d aluraxi ;
+: cmpeaxi, 3d alueaxi ;
+: andraxi, 25 aluraxi ;
+: andeaxi, 25 alueaxi ;
-: 3dup uber uber uber ;
-
- \ set direction bit according to the r/m64 values
- ( r/m64 r/m64 opcode -- opcode )
-: dir rpush nip r.mod c0 / rpop or ;
+ ( r/m64 r64 -- )
+: imul, af0f /r ;
- \ compile "opcode r1, r2"
- ( r1 r2 opcode -- )
-: oprr, rpush swap over over rex2, over over rpop dir ,1 /r2 ;
+ \ /digit takes care of rex prefix
+ \ todo: does not work: [rcx] imulax, chooses r32,
+ \ no way to address rax
+ ( r/m64/r/m32 -- r/m64*[e|r]ax )
+: imulrax, qword #
+: imuleax, f7 /5 ;
- \ compile "opcode(2bytes) r1, r2"
- ( r1 r2 opcode -- )
-: oprr,2 rpush swap over over rex2, rpop ,2 /r2 ;
+ ( imm8 r64(32) r/m64(32) -- )
+: shrd, swap ac0f /r ib ;
- \ copy register r1 into register r2
- ( r1 r2 -- )
-: movrr, 89 oprr, ;
+ \ todo: if n is imm32, then "MOV r/m64, imm32"
+ ( n r32/r64 -- )
+: movi, dup b8 +ro regsize cp (,) ;
+
( r1 -- )
: reg@ mcreate # mfind dup # lit call, rax ^ lit ^ lit
- # ffind movrr, # lit call, ret, ;
+ # ffind mov, # lit call, ret, ;
( -- n )
rcx reg@ rcx@ rdx reg@ rdx@ rbx reg@ rbx@ rsp reg@ rsp@
rbp reg@ rbp@ rsi reg@ rsi@ rdi reg@ rdi@ r8 reg@ r8@
@@ 322,7 284,7 @@ r13 reg@ r13@ r14 reg@ r14@ r15 reg@ r15
: r13@ r13@ ; : r14@ r14@ ; : r15@ r15@ ;
( r1 -- )
-: reg! mcreate ^ lit rax ^ lit # ffind movrr, # lit call,
+: reg! mcreate ^ lit rax ^ lit # ffind mov, # lit call,
# mfind drop # lit call, ret, ;
( n -- )
@@ 336,29 298,3 @@ r13 reg! r13! r14 reg! r14! r15 reg! r15
: r9! r9! ; : r10! r10! ; : r11! r11! ; : r12! r12! ;
: r13! r13! ; : r14! r14! ; : r15! r15! ;
- \ r1 op r2, store result in r2
- ( r1 r2 -- )
-: addrr, 01 oprr, ;
-: subrr, 29 oprr, ;
-: cmprr, 39 oprr, ;
-
- \ in imul r1 and r2 are swapped when encoding.
-: imulrr, af0f oprr,2 ;
-
- ( r1 n -- )
-: movir, swap dup rex, dup reg b8 + ,1 regsize cp (,) ;
-
- ( r1 -- )
-: decr, dup rex, ff /1 ;
-: incr, dup rex, ff /0 ;
-
- ( r1 n -- )
-: sarr, swap dup rex, c1 /7 ib ;
-: shlr, swap dup rex, c1 /4 ib ;
-
- ( n r2 r1 -- )
-: shrd, swap over over rex2, 0f ,1 ac ,1 /r2 ib ;
-
- ( r1 -- r1*[e|r]ax )
-: imulrax, dup rex, #
-: imuleax, f7 /5 ;
R asm.sh => +0 -6
@@ 1,6 0,0 @@
-#!/bin/sh
-# don't source this file, execute it!
-sp=$(dirname "$(readlink -f "$0")")
-
-cat system.fox fileio.fox elf64.fox asm.fox - | ./fox
-
M foxx.sh => asmtest.sh +1 -1
@@ 1,2 1,2 @@
make fox
-cat system.fox asm.fox fileio.fox elf64.fox bootstrap2.fox foxx.fox - | ./fox
+cat system.fox asm.fox fileio.fox elf64.fox bootstrap2.fox asmtest.fox - | ./fox
M foxx.fox +5 -10
@@ 56,11 56,11 @@ s1
: nos xstack @2 100 / ;
( n -- )
-: xpush grow find-reg dup xstack !1 regid movir, ;
+: xpush grow find-reg dup xstack !1 regid movi, ;
-: xdup tos regid grow find-reg dup xstack !1 regid movrr, ;
+: xdup tos regid grow find-reg dup xstack !1 regid mov, ;
-: xover nos regid grow find-reg dup xstack !1 regid movrr, ;
+: xover nos regid grow find-reg dup xstack !1 regid mov, ;
: xdrop tos free-reg shrink ;
@@ 75,13 75,8 @@ s1
\ compile subtraction
( rt: n1 n2 -- n1-n2 )
-: x- tos regid nos regid subrr, xdrop ;
-: x+ tos regid nos regid addrr, xdrop ;
-
-: test # rcx push rdi push rdi pop rcx pop [rax] push [rax] pop
- rax [rax] movrr,
- [rcx] rdx addrr, # ;
-
+: x- tos regid nos regid sub, xdrop ;
+: x+ tos regid nos regid add, xdrop ;
\ 1 xpush 2 xpush x- xdup xswap x- xdup x- dup xdup xdup xdup xdup xdrop xdup
1 xpush 2 xpush xover xnip x+
: run initmain bye ;
M mandelbrot-asm.fox +17 -18
@@ 11,7 11,7 @@ hex
1d ( deci 29 ) const scalef
-: scale # rax scalef shlr, # ;
+: scale # scalef rax shl, # ;
\ svar creates a scaled variable
( n -- )
@@ 88,34 88,34 @@ r15 const boundary
\ compile r1^2 scaled
( r1 -- r1^2 )
-: sq dup dup imulrr, scalef sarr, ;
+: sq dup dup imul, scalef swap sar, ;
( y0 x0 -- )
: plot
x0! y0! \ start with z = x0 + iy0
- # x x0 movrr, y y0 movrr,
- ecx ff movir, \ counter=0xff
- boundary 4 scale movir, #
+ # x x0 mov, y y0 mov,
+ ff ecx movi, \ counter=0xff
+ 4 scale boundary movi, #
begin #
- x^2 x movrr,
+ x^2 x mov,
x^2 sq
- y^2 y movrr,
+ y^2 y mov,
y^2 sq
- r14 y^2 movrr,
- r14 x^2 addrr,
- r14 boundary cmprr, #
+ r14 y^2 mov,
+ r14 x^2 add,
+ r14 boundary cmp, #
+if
rcx@ wplot1 ;;
then
\ y = 2xy + y0
- # y x imulrr,
- y scalef 1- sarr, \ unscale, -1 because 2*xy
- y y0 addrr,
+ # y x imul,
+ scalef 1- y sar, \ unscale, -1 because 2*xy
+ y y0 add,
\ x = x^2 - y^2 + x0
- x x^2 movrr,
- x y^2 subrr,
- x x0 addrr,
- ecx decr, # \ counter--
+ x x^2 mov,
+ x y^2 sub,
+ x x0 add,
+ ecx dec, # \ counter--
until
rcx@ wplot ;
@@ 161,4 161,3 @@ image \ address, where we store the ima
end-app start-with mb " mandelbrot-asm.o" write-obj
-
M mandelbrot-mt.fox +20 -20
@@ 18,7 18,7 @@ max-iter
3a ( deci 58 ) const
scalef \ scale factor for fp arithmetic
-: scale # rax scalef shlr, # ;
+: scale # scalef rax shl, # ;
\ convert float to fp:
\ https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Converting_decimal_to_binary32
@@ 63,11 63,11 @@ hex
\ choose color for iteration n
( n -- addr )
-: color # max-iter cmpeaxi #
+: color # max-iter cmpeaxi, #
0if
- # black eax over movir, here 4 - relo,v # ;;
+ # black dup eax movi, here 4 - relo,v # ;;
then
- # f andeaxi
+ # f andeaxi,
\ todo: displacement "colors" needs sign-extended, not zero-extended relocate
\ lea eax, [eax*4+colors]
85048d67 ,4 colors here over ,4 relo,v # ;
@@ 139,29 139,29 @@ r14 const boundary
: plot ( y0 x0 -- )
x0! y0! \ start with z = x0 + iy0
0 \ make room for rax used below
- # x x0 movrr, y y0 movrr,
- ecx max-iter movir, \ counter
+ # x x0 mov, y y0 mov,
+ max-iter ecx movi, \ counter
# begin #
- rax x movrr, sq x^2 rax movrr,
- rax y movrr, sq y^2 rax movrr,
+ rax x mov, sq x^2 rax mov,
+ rax y mov, sq y^2 rax mov,
\ x^2 + y^2 < boundary?
\ rax has y^2
- rax x^2 addrr,
- rax boundary cmprr, #
+ rax x^2 add,
+ rax boundary cmp, #
+if
drop rcx@ color color, ;;
then
\ y = 2xy + y0
- # rax y movrr,
+ # rax y mov,
x imulrax,
scalef 1- rax rdx shrd, \ unscale, -1 because 2*xy
- y rax movrr,
- y y0 addrr,
+ y rax mov,
+ y y0 add,
\ x = x^2 - y^2 + x0
- x x^2 movrr,
- x y^2 subrr,
- x x0 addrr,
- ecx decr, # \ counter--
+ x x^2 mov,
+ x y^2 sub,
+ x x0 add,
+ ecx dec, # \ counter--
until
drop black color, ;
@@ 181,9 181,9 @@ r14 const boundary
counter
: down ( -- n ) -1 counter
- # rex.w, 168b ,2 \ mov rdx, [rsi]
+ # rdx [rsi] mov,
c10f48f0 ,4 10 ,1 \ lock xadd QWORD PTR [rax],rdx
- rex.w, d089 ,2 # \ mov rax, rdx; the counter value
+ rax rdx mov, #
nip ;
: dy y2 @ y1 @ - #tasks / ;
@@ 225,7 225,7 @@ image \ address, where we store the ima
: setup () setdelta adjustx adjusty alloc-space
\ boundary for calculation, is fixed
- # boundary 4 scale movir, # ;
+ # 4 scale boundary movi, # ;
( xt #threads -- )
: spawn-all begin over spawn 1- until drop drop ;
R test-asm.fox => +0 -12
@@ 1,12 0,0 @@
-hex
-
-: run initmain
- 5 rbp! 10 rcx! # rbp rcx subrr, # rcx@ .
- # r8 r10 movrr,
- 1234 rbx movir,
- 1234 r15 movir,
- 1234 ecx movir, #
- bye ;
-
-end-app start-with run " asm.o" write-obj
-