M lib/scopes/compiler/pilot/SX.sc => lib/scopes/compiler/arcmem/Atom.sc +439 -9
@@ 1,16 1,446 @@
""""SX
==
- A two-level lexer for Scoped eXpressions (SX) with yield flow.
+ arcmem-based S-expressions
+
+#
+ notes from masto:
+
+ on the parser level, also some interesting developments: all values are now
+ either lists or symbols (a random byte array). subtypes are encoded as
+ constructor calls, e.g. an int becomes `("type:s32" <4 byte LE encoded
+ integer>)`. a string becomes `("type:str" <blob>)`. a symbol is just
+ "symbol".this allows to wrap typed values late and lazily.
+
+ besides its many other advantages to my work, arcmem also made it possible
+ to "decons" arrays (truncate at the front), which made me realize that
+ scheme lists are broken: you append *and* remove in the front, which means
+ you often need to reverse a list after building it, to traverse it in the
+ same order. but arrays can be back-appended to *and* truncated in the front,
+ which creates queue instead of stack logic, which is ideal for S-expr
+ processing.
+
+ so, arcmem allocations can be dynamic arrays, and they can be S-expr lists,
+ which means we could use only libarcmem features to encode it all: lists are
+ void*[], symbols are byte[]. the contents are already distinguishable by the
+ fact that we can use arcmem to check if an address points to a pointer; a
+ void* that starts with a pointer is a void** (a list). otherwise it's a
+ symbol. type-foreign pointers may pretend to be a symbol, i.e. encoded as
+ 8 byte tag + pointer.
+
+#
+ lists and symbols differentiate from pointer arrays and strings in the
+ following ways:
+
+ 1) guaranteed to be immutable
+ 2) not zero-terminated
+
+Symbol_ := Symbol
+unlet list Symbol
+
+using import compiler.basics
+
+fn print-atom
+
+type Atom :: (arc@ void)
+ inline __storageprinter (self print)
+ print-atom (imply (deref self) this-type) print
+
+ __copy := share
+
+type AtomImpl < Atom
+ inline __countof (self)
+ rtell (storagecast (view self))
+
+ inline __@ (self index)
+ self := storagecast (view self)
+ size := rtell self
+ if (index >= size)
+ raise;
+ @ (getelementptr self index)
+
+ fn __hash (self)
+ viewing self
+ T := typeof self
+ ptr sz := 'data self
+ sz := sz * (sizeof (elementof T))
+ hash.from-bytes (ptr as rawstring) sz
+
+ fn __uhash (self)
+ viewing self
+ T := typeof self
+ ptr sz := 'data self
+ sz := sz * (sizeof (elementof T))
+ uhash.from-bytes (ptr as rawstring) sz
+
+ @@ memo
+ inline __== (cls T)
+ static-if (cls == T)
+ inline (self other)
+ aptr asz := 'data (view self)
+ bptr bsz := 'data (view other)
+ sz := asz * (sizeof (elementof cls))
+ and
+ asz == bsz
+ (memcmp (aptr as rawstring) (bptr as rawstring) sz) == 0
+
+ @@ memo
+ inline __imply (cls T)
+ static-if (T == Atom)
+ inline (self)
+ bitcast self T
+ elseif (T == Generator)
+ inline (self)
+ count := countof self
+ Generator
+ inline () 0:usize
+ inline (i) (icmp<u i count)
+ inline (i) (@ (getelementptr (view self) i))
+ inline (i) (add i 1:usize)
+
+ inline... reverse (self, offset : usize = 0:usize)
+ Generator
+ inline () (countof self)
+ inline (i) (icmp>u i offset)
+ inline (i) (@ (getelementptr (view self) (sub i 1:usize)))
+ inline (i) (sub i 1:usize)
- The first level properly segments whitespace and textual tokens and is
- appropriate to use in the context of documentation generation and
- basic syntax highlighting in editors.
+ inline rdecons (self count)
+ let T = (typeof self)
+ count := static-if (none? count) 1
+ else count
+ if (count > (countof self))
+ raise;
+ src := storagecast (view self)
+ mapf := static-if (view? self)
+ inline (i)
+ @ (getelementptr src i)
+ else
+ inline (i)
+ copy (@ (getelementptr src i))
+ result... := va-map mapf
+ va-range count
+ ptr := ptrtoint (deref (storagecast self)) uniqueintptr
+ ptr := add ptr
+ bitcast (mul (sizeof (elementof T)) (imply count usize)) uniqueintptr
+ self := inttoptr ptr T
+ pass self result...
+
+ inline decons (self count)
+ self ... := rdecons self count
+ (va-join ...) self
+
+ inline pop (self)
+ sz := countof self
+ if (icmp== sz 0:usize)
+ raise;
+ sz := sub sz 1:usize
+ src := storagecast (view self)
+ arg := copy (@ (getelementptr src sz))
+ resize self sz
+ arg
+
+ inline last (self)
+ sz := countof self
+ if (icmp== sz 0:usize)
+ raise;
+ sz := sub sz 1:usize
+ src := storagecast (view self)
+ @ (getelementptr src sz)
+
+ inline data (self)
+ sz := rtell (storagecast (view self))
+ pass (storagecast self) sz
+
+ inline... __rslice (self, start : usize)
+ trim self start (countof self)
+
+ inline... __lslice (self, end : usize)
+ trim self 0:usize end
+
+MutableListPtr := ~arc@ Atom
+
+type List < AtomImpl :: (arc@ Atom)
+ Kind := 1
+
+ inline __typecall (cls ...)
+ static-if ((va-countof ...) == 0)
+ bitcast ((storageof cls)) cls
+ else
+ argnum := va-countof ...
+ ptr := MutableListPtr argnum
+ va-map
+ inline (i)
+ arg := va@ i ...
+ (@ (getelementptr (view ptr) i)) = autocopy (imply arg Atom)
+ va-range argnum
+ bitcast ptr cls
+
+ inline append (self ...)
+ count := rtell (storagecast (view self))
+ argnum := va-countof ...
+ resize self (count + argnum)
+ ptr := bitcast (storagecast (view self)) MutableListPtr
+ ptr := getelementptr ptr count
+ va-map
+ inline (i)
+ arg := va@ i ...
+ (@ (getelementptr ptr i)) = autocopy (imply arg Atom)
+ va-range argnum
+
+ inline from-arrayptr (cls ptr sz)
+ dst := (storageof cls) sz
+ arcmemcopyn dst (view ptr) sz
+ bitcast dst cls
+
+ inline append-from-arrayptr (self ptr sz)
+ count := rtell (storagecast (view self))
+ resize self (count + sz)
+ dest := bitcast (storagecast (view self)) MutableListPtr
+ dest := getelementptr dest count
+ arcmemcopyn
+ dest
+ ptr
+ sz
+ ;
+
+ inline... __trim (self, start : usize, end : usize)
+ let T = (typeof self)
+ if (start == end) (T)
+ else
+ sz := countof self
+ ptr := getelementptr (view self) start
+ if (end == sz)
+ share (bitcast ptr T)
+ else
+ 'from-arrayptr T ptr (end - start)
+
+MutableSymbolPtr := ~arc@ char
+
+type Symbol < AtomImpl :: (arc@ char)
+ Kind := 0
+
+ inline __typecall (cls value)
+ imply value cls
- The second level, which wraps the first, maintains an indentation stack to
- encode additional events for entering and exiting indentation, validates
- indentation format and handles `\` correctly.
+ inline from-literal (cls value)
+ T := typeof value
+ sz := sizeof T
+ ptr := (storageof cls) sz
+ @ (bitcast (view ptr) ~@T) = value
+ bitcast ptr cls
+
+ inline from-rawstring (cls ptr sz)
+ ET := elementof (typeof ptr)
+ sz := sz * (sizeof ET)
+ dst := (storageof cls) sz
+ arcmemcopyn (dst as ~rawstring) ((view ptr) as rawstring) sz
+ bitcast dst cls
+
+ inline append-from-arrayptr (self ptr sz)
+ ET := elementof (typeof ptr)
+ sz := sz * (sizeof ET)
+ count := rtell (storagecast (view self))
+ resize self (count + sz)
+ dest := bitcast (storagecast (view self)) MutableSymbolPtr
+ dest := getelementptr dest count
+ arcmemcopyn
+ dest as ~rawstring
+ ptr as rawstring
+ sz
+ ;
+
+ inline append-memset (self num sz)
+ sz := imply sz usize
+ count := rtell (storagecast (view self))
+ resize self (count + sz)
+ dest := bitcast (storagecast (view self)) MutableSymbolPtr
+ dest := getelementptr dest count
+ arcmemsetn
+ dest as ~rawstring
+ num
+ sz
+ ;
+
+ inline align (self al)
+ al := imply al usize
+ sz1 := rtell (storagecast (view self))
+ sz2 := (sz1 + (al - 1:usize)) & -al
+ if (sz2 > sz1)
+ append-memset self 0 (sub sz2 sz1)
+
+ fn append-from-literal (self value)
+ ET := typeof value
+ sz := sizeof ET
+ align self (alignof ET)
+ count := rtell (storagecast (view self))
+ resize self (count + sz)
+ dest := getelementptr (storagecast (view self)) count
+ @ (bitcast (view dest) ~@ET) = value
+ ;
+
+ inline... append (self, value : (typematch &chararray? T))
+ static-assert (constant? value)
+ append-from-arrayptr self ('data value)
+ case (self, value : Symbol_)
+ static-assert (constant? value)
+ append-from-arrayptr self ('data (value as zarray))
+ case (self, value : (typematch T < integer))
+ static-assert (constant? value)
+ append-from-literal self value
+
+
+ @@ memo
+ inline __rimply (T cls)
+ static-if (T < integer)
+ inline (value)
+ from-literal cls value
+
+ @@ memo
+ inline __static-rimply (T cls)
+ static-if (T == Nothing)
+ inline (value)
+ bitcast ((storageof cls)) cls
+ elseif (T == Symbol_)
+ inline (value)
+ 'from-rawstring cls ('data (value as zarray))
+ elseif (&chararray? T)
+ inline (value)
+ 'from-rawstring cls ('data value)
+
+ inline... __trim (self, start : usize, end : usize)
+ let T = (typeof self)
+ if (start == end) (T)
+ else
+ sz := countof self
+ ptr := getelementptr (view self) start
+ if (end == sz)
+ share (bitcast ptr T)
+ else
+ 'from-rawstring T ptr (end - start)
- Additional functions are available for unescaping symbols, strings and
- block strings.
+fn print-atom (atom print)
+ returning void
+ print-atom := this-function
+ downcast... atom
+ case List (l)
+ print
+ /p
+ /do
+ for arg in l
+ print-atom arg print
+ case Symbol (s)
+ print
+ /prettydata ('data (view s))
+ default
+ unreachable;
+
+type+ Atom :: (arc@ void)
+ @@ memo
+ inline __rimply (T cls)
+ conv := do
+ conv := static-imply-converter T Symbol false
+ static-if (none? conv)
+ static-imply-converter T List false
+ else conv
+ static-if (not none? conv)
+ inline (value)
+ bitcast (conv value) cls
+
+ # must return an integer
+ fn __tagof (self)
+ viewing self
+ pptr := bitcast (storagecast self) @intptr
+ (arcptrptr? pptr) List.Kind Symbol.Kind
+
+ @@ memo
+ inline __downcast (cls T)
+ inline (self)
+ bitcast self T
+
+ @@ memo
+ inline __typetagof (cls T)
+ static-if ((unqualified T) < (unqualified cls)) T.Kind
+
+ fn __hash (self)
+ viewing self
+ H := downcast... (view self)
+ case List (l)
+ hash l
+ case Symbol (s)
+ hash s
+ default
+ unreachable;
+ hash ('__tagof self) H
+ fn __uhash (self)
+ viewing self
+ H := downcast... (view self)
+ case List (l)
+ uhash l
+ case Symbol (s)
+ uhash s
+ default
+ unreachable;
+ uhash.join-from-integer
+ uhash.from-integer ('__tagof self)
+ storagecast H
+
+ @@ memo
+ inline __== (cls T)
+ static-if (cls == T)
+ fn (self other)
+ viewing self other
+ TA := '__tagof self
+ TB := '__tagof other
+ and
+ TA == TB
+ do
+ downcast... self
+ case List (l)
+ l == (bitcast other List)
+ case Symbol (s)
+ s == (bitcast other Symbol)
+ default
+ unreachable;
+
+@if main-module?
+
+fn test1 ()
+ local s := Symbol 1:u8
+ 'append s "test"
+ 'append s 'foobarfoo
+ 'append s 123:u64
+ print s
+ local l := (List)
+ 'append l 1 2
+ 'append l 3
+ print
+ lslice l 2
+ print l
+ print
+ try! 'pop l
+ print (uhash l)
+ print
+ try! 'pop l
+ print (uhash l)
+ local m := (List 1 (List 2 3) 4)
+ n := copy m
+ 'append-from-arrayptr m ('data (view m))
+ print m n
+ k := imply m Atom
+ s := imply s Atom
+ print (k == k)
+ print
+ try! downcast k List
+ print
+ try! downcast s Symbol
+
+
+test1;
+
+@endif
+
+do
+ let Atom List Symbol
+ locals;
No newline at end of file
M lib/scopes/compiler/arcmem/init.sc +2 -2
@@ 793,7 793,7 @@ let new =
inline share (value)
value := view value
cls := typeof value
- static-assert ((storageof cls) < arc@)
+ static-assert ((storageof cls) <= arc@)
.. "cannot share value of type " (nameof (storageof cls))
bitcast (copy (storagecast value)) cls
@@ 828,6 828,7 @@ do
let arcmem-check
let arcpointer-offsets arcintptr
let hard soft soft?
+ let arcptrptr? = ptrptr?
locals;
#do
let arcmalloc arcshare arcfree archangle arcinit arcresize arctrunc
@@ 840,7 841,6 @@ do
let arcsoften archarden ptr-harden
let arcwrite arcread ArcWriter ArcReader
let arcpointer-offsets arcintptr
- let arccat arccat1
let share share&
let assign
let store memcpy memset
M lib/scopes/compiler/basics.sc +2 -1
@@ 1,5 1,5 @@
-import struct enum hash print downcast bitset
+import struct enum hash print downcast bitset slice
compiler.arcmem
compiler.arcmem.Map
compiler.arcmem.Array
@@ 13,6 13,7 @@ do
using print
using downcast
using bitset
+ using slice
using compiler.arcmem
using compiler.arcmem.Map
using compiler.arcmem.Array
M lib/scopes/compiler/noir/Module.sc +10 -0
@@ 179,6 179,16 @@ struct Module
line = u32
column = u32
+ # TODO: debuginfo can reference anchors or anchor paths (traces), which
+ relate a value or instruction to the parent statement that caused its
+ inlining, with the main module as root; e.g. a long anchor path could
+ be: import statement -> import statement -> APPLY -> APPLY -> value
+ # trace : RowIndexedTable
+ id = Trace
+ desc = TraceDesc # Const, TraceKind, ...?
+ inlined_by = Trace
+ at = Anchor
+
# we distinguish between procedures (no return value) and functions.
procedures are closed form update flow graphs.
functions are open form and C ABI compliant.