asm: swap places of src, dst: src dst op is now dst src op, this is more in line with nasm
5 files changed, 49 insertions(+), 50 deletions(-)

M asm.fox
M malloc.fox
M mandelbrot-asm.fox
M mandelbrot-mt.fox
M thread.fox
M asm.fox +13 -12
@@ 207,18 207,18 @@ 2c const [r12]	2d const [r13]	2e const [
 
 		\ compile "opcode r1, r2"
 		( r1 r2 opcode -- )
-: oprr, rpush over over rex2, over over rpop dir ,1 /r2 ;
+: oprr, rpush swap over over rex2, over over rpop dir ,1 /r2 ;
 
 		\ compile "opcode(2bytes) r1, r2"
 		( r1 r2 opcode -- )
-: oprr,2 rpush over over rex2, rpop ,2 /r2 ;
+: oprr,2 rpush swap over over rex2, rpop ,2 /r2 ;
 
 		\ copy register r1 into register r2
 		( r1 r2 -- )
 : movrr, 89 oprr,  ;
 
 		( r1 -- )
-: reg@	mcreate # mfind dup # lit call, ^ lit rax ^ lit 
+: reg@	mcreate # mfind dup # lit call, rax ^ lit  ^ lit 
 			# ffind movrr, # lit call, ret, ;
 ( -- n )
 rcx reg@ rcx@	rdx reg@ rdx@	rbx reg@ rbx@	rsp reg@ rsp@ 

          
@@ 231,7 231,8 @@ r13 reg@ r13@	r14 reg@ r14@	r15 reg@ r15
 :  r9@  r9@ ;	: r10@ r10@ ;	: r11@ r11@ ;	: r12@ r12@ ;	
 : r13@ r13@ ;	: r14@ r14@ ;	: r15@ r15@ ;	
 
-: reg! mcreate rax ^ lit  ^ lit  # ffind movrr, # lit call,
+		( r1 -- )
+: reg! mcreate  ^ lit  rax ^ lit  # ffind movrr, # lit call,
 		# mfind drop # lit call, ret, ;
 		
 ( n -- )

          
@@ 252,21 253,21 @@ r13 reg! r13!	r14 reg! r14!	r15 reg! r15
 : cmprr, 39 oprr, ;
 
 			\ in imul r1 and r2 are swapped when encoding.
-: imulrr,	swap af0f oprr,2 ;
+: imulrr,	af0f oprr,2 ;
 
-			( n r1 -- )
-: movir,	dup rex, dup reg b8 + ,1 regsize cp (,) ;
+			( r1 n -- )
+: movir,	swap dup rex, dup reg b8 + ,1 regsize cp (,) ;
 
 			( r1 -- )
 : decr,		dup rex,  ff /1 ;
 : incr,		dup rex,  ff /0 ;
 
-			( n r1 -- )
-: sarr,		dup rex, c1 /7 ib ;
-: shlr,		dup rex, c1 /4 ib ;
+			( r1 n -- )
+: sarr,		swap dup rex, c1 /7 ib ;
+: shlr,		swap dup rex, c1 /4 ib ;
 
-			( n r1 r2 -- )
-: shrd,		over over rex2, 0f ,1 ac ,1 /r2 ib ;
+			( n r2 r1 -- )
+: shrd,		swap over over rex2, 0f ,1 ac ,1 /r2 ib ;
 
 			( r1 -- r1*[e|r]ax )
 : imulrax,	dup rex,  #

          
M malloc.fox +1 -0
@@ 10,3 10,4 @@ hex
 
 \ cleaned up automatically when process exits, see man (2) mmap
 \ : umalloc ;
+

          
M mandelbrot-asm.fox +16 -18
@@ 11,7 11,7 @@ hex
 
 1d ( deci 29 ) const scalef
 
-: scale # scalef rax shlr, # ;
+: scale # rax scalef shlr, # ;
 
 		\ svar creates a scaled variable
 		( n -- )

          
@@ 88,33 88,33 @@ r15 const boundary
 
 		\ compile r1^2 scaled
 		( r1 -- r1^2 )
-: sq	dup dup imulrr,  scalef swap sarr, ;
+: sq	dup dup imulrr,  scalef sarr, ;
 
 ( y0 x0 -- )
 : plot
 	x0! y0!				\ start with z = x0 + iy0
-	# x0 x movrr, y0 y movrr,
-	ff ecx movir, 			\ counter=0xff
-	4 scale boundary movir,  #
+	# x x0 movrr,  y y0 movrr,
+	ecx ff movir, 			\ counter=0xff
+	boundary 4 scale  movir,  #
 	begin #
-		x x^2 movrr,
+		x^2 x movrr,
 		x^2 sq
-		y y^2 movrr,
+		y^2 y movrr,
 		y^2 sq
-		y^2 r14 movrr, 
-		x^2 r14 addrr,
-		boundary r14 cmprr, #
+		r14 y^2  movrr, 
+		r14 x^2 addrr,
+		r14 boundary cmprr, #
 		+if
 			rcx@ wplot1 ;;
 		then
 		\ y = 2xy + y0
-		# x y imulrr, 
-		scalef 1- y sarr, \ unscale, -1 because 2*xy
-		y0 y addrr,
+		# y x imulrr, 
+		y scalef 1- sarr, \ unscale, -1 because 2*xy
+		y y0 addrr,
 		\ x = x^2 - y^2 + x0
-		x^2 x movrr,
-		y^2 x subrr, 
-		x0 x addrr,
+		x x^2 movrr,
+		x y^2 subrr, 
+		x x0 addrr,
 		ecx decr, # 	\ counter--
 	until
 	rcx@ wplot ;

          
@@ 158,8 158,6 @@ image 	\ address, where we store the ima
 
 : save-image	() create-file write-header write-image close-file ;
 
-
-
 : mb			() initmain setup render save-image ." done, stack: " .s bye ;
 
 end-app   start-with mb   " mandelbrot-asm.o" write-obj

          
M mandelbrot-mt.fox +17 -18
@@ 1,4 1,3 @@ 
-
 hex
 
 			28 const

          
@@ 19,7 18,7 @@ max-iter
 		3a ( deci 58 ) const
 scalef	\ scale factor for fp arithmetic
 
-: scale # scalef rax shlr, # ;
+: scale # rax scalef shlr, # ;
 
 \ convert float to fp:
 \ https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Converting_decimal_to_binary32

          
@@ 66,7 65,7 @@ hex
 		( n -- addr )
 : color	# max-iter cmpeaxi #
 		0if
-			# black dup eax  movir,  here 4 - relo,v # ;;
+			# black eax over movir,  here 4 - relo,v # ;;
 		then 
 		# f andeaxi
 		\ todo: displacement "colors" needs sign-extended, not zero-extended relocate

          
@@ 134,34 133,34 @@ r14 const boundary
 
 		\ compile rax^2 scaled
 		( n -- n^2 )
-: sq	rax imulrax,  scalef rdx rax shrd, ;
+: sq	rax imulrax,  scalef rax rdx shrd, ;
 
 
 : plot	( y0 x0 -- )
 	x0! y0!						\ start with z = x0 + iy0
 	0 \ make room for rax used below
-	# x0 x movrr, y0 y movrr,
-	max-iter ecx movir, 			\ counter
+	# x x0 movrr, y y0 movrr,
+	ecx max-iter movir, 			\ counter
 	# begin #
-		x rax movrr,   sq   rax x^2 movrr,
-		y rax movrr,   sq   rax y^2 movrr,
+		rax x movrr,   sq   x^2 rax movrr,
+		rax y movrr,   sq   y^2 rax movrr,
 		\ x^2 + y^2 < boundary?
 		\ rax has y^2
-		x^2 rax addrr,
-		boundary rax cmprr, #
+		rax x^2 addrr,
+		rax boundary cmprr, #
 		+if
 			drop rcx@ color	color, ;;
 		then
 		\ y = 2xy + y0
-		# y rax movrr,
+		# rax y movrr,
 		x imulrax,
-		scalef 1- rdx rax shrd,	 \ unscale, -1 because 2*xy
-		rax y movrr,
-		y0 y addrr,
+		scalef 1- rax rdx shrd,	 \ unscale, -1 because 2*xy
+		y rax movrr,
+		y y0 addrr,
 		\ x = x^2 - y^2 + x0
-		x^2 x movrr,
-		y^2 x subrr, 
-		x0 x addrr,
+		x x^2 movrr,
+		x y^2 subrr, 
+		x x0 addrr,
 		ecx decr, # 	\ counter--
 	until
 	drop black color, ;

          
@@ 226,7 225,7 @@ image 	\ address, where we store the ima
 
 : setup 		() setdelta adjustx adjusty alloc-space 
 					\ boundary for calculation, is fixed
-					# 4 scale boundary movir, #  ;
+					# boundary  4 scale  movir, #  ;
 
 				( xt #threads -- )
 : spawn-all		begin over spawn 1- until  drop drop ;

          
M thread.fox +2 -2
@@ 69,10 69,10 @@ m: init-tls	5b ,1 ;		\ pop rbx, load tls
 				0 100 milliseconds sleep
 			next ;
 
-: run		initmain
+: test		initmain
 			# f' threadfn # cpush #threads spawn-all
 			wait ." main task done: stack:" .s \n
 				." counter: " counter @ . \n bye ;
 
-\ end-app  start-with run  " thread.o" write-obj
+\ end-app  start-with test  " thread.o" write-obj