f48ffd7ef858 — Leonard Ritter 7 months ago
* added `arcmem.Atom`, which implements S-expression lists and symbols
4 files changed, 453 insertions(+), 12 deletions(-)

M lib/scopes/compiler/pilot/SX.sc => lib/scopes/compiler/arcmem/Atom.sc
M lib/scopes/compiler/arcmem/init.sc
M lib/scopes/compiler/basics.sc
M lib/scopes/compiler/noir/Module.sc
M lib/scopes/compiler/pilot/SX.sc => lib/scopes/compiler/arcmem/Atom.sc +439 -9
@@ 1,16 1,446 @@ 
 """"SX
     ==
 
-    A two-level lexer for Scoped eXpressions (SX) with yield flow.
+    arcmem-based S-expressions
+
+#
+    notes from masto:
+
+    on the parser level, also some interesting developments: all values are now
+    either lists or symbols (a random byte array). subtypes are encoded as
+    constructor calls, e.g. an int becomes `("type:s32" <4 byte LE encoded
+    integer>)`. a string becomes `("type:str" <blob>)`. a symbol is just
+    "symbol".this allows to wrap typed values late and lazily.
+
+    besides its many other advantages to my work, arcmem also made it possible
+    to "decons" arrays (truncate at the front), which made me realize that
+    scheme lists are broken: you append *and* remove in the front, which means
+    you often need to reverse a list after building it, to traverse it in the
+    same order. but arrays can be back-appended to *and* truncated in the front,
+    which creates queue instead of stack logic, which is ideal for S-expr
+    processing.
+
+    so, arcmem allocations can be dynamic arrays, and they can be S-expr lists,
+    which means we could use only libarcmem features to encode it all: lists are
+    void*[], symbols are byte[]. the contents are already distinguishable by the
+    fact that we can use arcmem to check if an address points to a pointer; a
+    void* that starts with a pointer is a void** (a list). otherwise it's a
+    symbol. type-foreign pointers may pretend to be a symbol, i.e. encoded as
+    8 byte tag + pointer.
+
+#
+    lists and symbols differentiate from pointer arrays and strings in the
+    following ways:
+
+    1) guaranteed to be immutable
+    2) not zero-terminated
+
+Symbol_ := Symbol
+unlet list Symbol
+
+using import compiler.basics
+
+fn print-atom
+
+type Atom :: (arc@ void)
+    inline __storageprinter (self print)
+        print-atom (imply (deref self) this-type) print
+
+    __copy := share
+
+type AtomImpl < Atom
+    inline __countof (self)
+        rtell (storagecast (view self))
+
+    inline __@ (self index)
+        self := storagecast (view self)
+        size := rtell self
+        if (index >= size)
+            raise;
+        @ (getelementptr self index)
+
+    fn __hash (self)
+        viewing self
+        T := typeof self
+        ptr sz := 'data self
+        sz := sz * (sizeof (elementof T))
+        hash.from-bytes (ptr as rawstring) sz
+
+    fn __uhash (self)
+        viewing self
+        T := typeof self
+        ptr sz := 'data self
+        sz := sz * (sizeof (elementof T))
+        uhash.from-bytes (ptr as rawstring) sz
+
+    @@ memo
+    inline __== (cls T)
+        static-if (cls == T)
+            inline (self other)
+                aptr asz := 'data (view self)
+                bptr bsz := 'data (view other)
+                sz := asz * (sizeof (elementof cls))
+                and
+                    asz == bsz
+                    (memcmp (aptr as rawstring) (bptr as rawstring) sz) == 0
+
+    @@ memo
+    inline __imply (cls T)
+        static-if (T == Atom)
+            inline (self)
+                bitcast self T
+        elseif (T == Generator)
+            inline (self)
+                count := countof self
+                Generator
+                    inline () 0:usize
+                    inline (i) (icmp<u i count)
+                    inline (i) (@ (getelementptr (view self) i))
+                    inline (i) (add i 1:usize)
+
+    inline... reverse (self, offset : usize = 0:usize)
+        Generator
+            inline () (countof self)
+            inline (i) (icmp>u i offset)
+            inline (i) (@ (getelementptr (view self) (sub i 1:usize)))
+            inline (i) (sub i 1:usize)
 
-    The first level properly segments whitespace and textual tokens and is
-    appropriate to use in the context of documentation generation and
-    basic syntax highlighting in editors.
+    inline rdecons (self count)
+        let T = (typeof self)
+        count := static-if (none? count) 1
+        else count
+        if (count > (countof self))
+            raise;
+        src := storagecast (view self)
+        mapf := static-if (view? self)
+            inline (i)
+                @ (getelementptr src i)
+        else
+            inline (i)
+                copy (@ (getelementptr src i))
+        result... := va-map mapf
+            va-range count
+        ptr := ptrtoint (deref (storagecast self)) uniqueintptr
+        ptr := add ptr
+            bitcast (mul (sizeof (elementof T)) (imply count usize)) uniqueintptr
+        self := inttoptr ptr T
+        pass self result...
+
+    inline decons (self count)
+        self ... := rdecons self count
+        (va-join ...) self
+
+    inline pop (self)
+        sz := countof self
+        if (icmp== sz 0:usize)
+            raise;
+        sz := sub sz 1:usize
+        src := storagecast (view self)
+        arg := copy (@ (getelementptr src sz))
+        resize self sz
+        arg
+
+    inline last (self)
+        sz := countof self
+        if (icmp== sz 0:usize)
+            raise;
+        sz := sub sz 1:usize
+        src := storagecast (view self)
+        @ (getelementptr src sz)
+
+    inline data (self)
+        sz := rtell (storagecast (view self))
+        pass (storagecast self) sz
+
+    inline... __rslice (self, start : usize)
+        trim self start (countof self)
+
+    inline... __lslice (self, end : usize)
+        trim self 0:usize end
+
+MutableListPtr := ~arc@ Atom
+
+type List < AtomImpl :: (arc@ Atom)
+    Kind := 1
+
+    inline __typecall (cls ...)
+        static-if ((va-countof ...) == 0)
+            bitcast ((storageof cls)) cls
+        else
+            argnum := va-countof ...
+            ptr := MutableListPtr argnum
+            va-map
+                inline (i)
+                    arg := va@ i ...
+                    (@ (getelementptr (view ptr) i)) = autocopy (imply arg Atom)
+                va-range argnum
+            bitcast ptr cls
+
+    inline append (self ...)
+        count := rtell (storagecast (view self))
+        argnum := va-countof ...
+        resize self (count + argnum)
+        ptr := bitcast (storagecast (view self)) MutableListPtr
+        ptr := getelementptr ptr count
+        va-map
+            inline (i)
+                arg := va@ i ...
+                (@ (getelementptr ptr i)) = autocopy (imply arg Atom)
+            va-range argnum
+
+    inline from-arrayptr (cls ptr sz)
+        dst := (storageof cls) sz
+        arcmemcopyn dst (view ptr) sz
+        bitcast dst cls
+
+    inline append-from-arrayptr (self ptr sz)
+        count := rtell (storagecast (view self))
+        resize self (count + sz)
+        dest := bitcast (storagecast (view self)) MutableListPtr
+        dest := getelementptr dest count
+        arcmemcopyn
+            dest
+            ptr
+            sz
+        ;
+
+    inline... __trim (self, start : usize, end : usize)
+        let T = (typeof self)
+        if (start == end) (T)
+        else
+            sz := countof self
+            ptr := getelementptr (view self) start
+            if (end == sz)
+                share (bitcast ptr T)
+            else
+                'from-arrayptr T ptr (end - start)
+
+MutableSymbolPtr := ~arc@ char
+
+type Symbol < AtomImpl :: (arc@ char)
+    Kind := 0
+
+    inline __typecall (cls value)
+        imply value cls
 
-    The second level, which wraps the first, maintains an indentation stack to
-    encode additional events for entering and exiting indentation, validates
-    indentation format and handles `\` correctly.
+    inline from-literal (cls value)
+        T := typeof value
+        sz := sizeof T
+        ptr := (storageof cls) sz
+        @ (bitcast (view ptr) ~@T) = value
+        bitcast ptr cls
+
+    inline from-rawstring (cls ptr sz)
+        ET := elementof (typeof ptr)
+        sz := sz * (sizeof ET)
+        dst := (storageof cls) sz
+        arcmemcopyn (dst as ~rawstring) ((view ptr) as rawstring) sz
+        bitcast dst cls
+
+    inline append-from-arrayptr (self ptr sz)
+        ET := elementof (typeof ptr)
+        sz := sz * (sizeof ET)
+        count := rtell (storagecast (view self))
+        resize self (count + sz)
+        dest := bitcast (storagecast (view self)) MutableSymbolPtr
+        dest := getelementptr dest count
+        arcmemcopyn
+            dest as ~rawstring
+            ptr as rawstring
+            sz
+        ;
+
+    inline append-memset (self num sz)
+        sz := imply sz usize
+        count := rtell (storagecast (view self))
+        resize self (count + sz)
+        dest := bitcast (storagecast (view self)) MutableSymbolPtr
+        dest := getelementptr dest count
+        arcmemsetn
+            dest as ~rawstring
+            num
+            sz
+        ;
+
+    inline align (self al)
+        al := imply al usize
+        sz1 := rtell (storagecast (view self))
+        sz2 := (sz1 + (al - 1:usize)) & -al
+        if (sz2 > sz1)
+            append-memset self 0 (sub sz2 sz1)
+
+    fn append-from-literal (self value)
+        ET := typeof value
+        sz := sizeof ET
+        align self (alignof ET)
+        count := rtell (storagecast (view self))
+        resize self (count + sz)
+        dest := getelementptr (storagecast (view self)) count
+        @ (bitcast (view dest) ~@ET) = value
+        ;
+
+    inline... append (self, value : (typematch &chararray? T))
+        static-assert (constant? value)
+        append-from-arrayptr self ('data value)
+    case (self, value : Symbol_)
+        static-assert (constant? value)
+        append-from-arrayptr self ('data (value as zarray))
+    case (self, value : (typematch T < integer))
+        static-assert (constant? value)
+        append-from-literal self value
+
+
+    @@ memo
+    inline __rimply (T cls)
+        static-if (T < integer)
+            inline (value)
+                from-literal cls value
+
+    @@ memo
+    inline __static-rimply (T cls)
+        static-if (T == Nothing)
+            inline (value)
+                bitcast ((storageof cls)) cls
+        elseif (T == Symbol_)
+            inline (value)
+                'from-rawstring cls ('data (value as zarray))
+        elseif (&chararray? T)
+            inline (value)
+                'from-rawstring cls ('data value)
+
+    inline... __trim (self, start : usize, end : usize)
+        let T = (typeof self)
+        if (start == end) (T)
+        else
+            sz := countof self
+            ptr := getelementptr (view self) start
+            if (end == sz)
+                share (bitcast ptr T)
+            else
+                'from-rawstring T ptr (end - start)
 
-    Additional functions are available for unescaping symbols, strings and
-    block strings.
+fn print-atom (atom print)
+    returning void
+    print-atom := this-function
+    downcast... atom
+    case List (l)
+        print
+            /p
+                /do
+                    for arg in l
+                        print-atom arg print
+    case Symbol (s)
+        print
+            /prettydata ('data (view s))
+    default
+        unreachable;
+
+type+ Atom :: (arc@ void)
+    @@ memo
+    inline __rimply (T cls)
+        conv := do
+            conv := static-imply-converter T Symbol false
+            static-if (none? conv)
+                static-imply-converter T List false
+            else conv
+        static-if (not none? conv)
+            inline (value)
+                bitcast (conv value) cls
+
+    # must return an integer
+    fn __tagof (self)
+        viewing self
+        pptr := bitcast (storagecast self) @intptr
+        (arcptrptr? pptr) List.Kind Symbol.Kind
+
+    @@ memo
+    inline __downcast (cls T)
+        inline (self)
+            bitcast self T
+
+    @@ memo
+    inline __typetagof (cls T)
+        static-if ((unqualified T) < (unqualified cls)) T.Kind
+
+    fn __hash (self)
+        viewing self
+        H := downcast... (view self)
+        case List (l)
+            hash l
+        case Symbol (s)
+            hash s
+        default
+            unreachable;
+        hash ('__tagof self) H
 
+    fn __uhash (self)
+        viewing self
+        H := downcast... (view self)
+        case List (l)
+            uhash l
+        case Symbol (s)
+            uhash s
+        default
+            unreachable;
+        uhash.join-from-integer
+            uhash.from-integer ('__tagof self)
+            storagecast H
+
+    @@ memo
+    inline __== (cls T)
+        static-if (cls == T)
+            fn (self other)
+                viewing self other
+                TA := '__tagof self
+                TB := '__tagof other
+                and
+                    TA == TB
+                    do
+                        downcast... self
+                        case List (l)
+                            l == (bitcast other List)
+                        case Symbol (s)
+                            s == (bitcast other Symbol)
+                        default
+                            unreachable;
+
+@if main-module?
+
+fn test1 ()
+    local s := Symbol 1:u8
+    'append s "test"
+    'append s 'foobarfoo
+    'append s 123:u64
+    print s
+    local l := (List)
+    'append l 1 2
+    'append l 3
+    print
+        lslice l 2
+    print l
+    print
+        try! 'pop l
+    print (uhash l)
+    print
+        try! 'pop l
+    print (uhash l)
+    local m := (List 1 (List 2 3) 4)
+    n := copy m
+    'append-from-arrayptr m ('data (view m))
+    print m n
+    k := imply m Atom
+    s := imply s Atom
+    print (k == k)
+    print
+        try! downcast k List
+    print
+        try! downcast s Symbol
+
+
+test1;
+
+@endif
+
+do
+    let Atom List Symbol
+    locals;
  No newline at end of file

          
M lib/scopes/compiler/arcmem/init.sc +2 -2
@@ 793,7 793,7 @@ let new =
 inline share (value)
     value := view value
     cls := typeof value
-    static-assert ((storageof cls) < arc@)
+    static-assert ((storageof cls) <= arc@)
         .. "cannot share value of type " (nameof (storageof cls))
     bitcast (copy (storagecast value)) cls
 

          
@@ 828,6 828,7 @@ do
     let arcmem-check
     let arcpointer-offsets arcintptr
     let hard soft soft?
+    let arcptrptr? = ptrptr?
     locals;
 #do
     let arcmalloc arcshare arcfree archangle arcinit arcresize arctrunc

          
@@ 840,7 841,6 @@ do
     let arcsoften archarden ptr-harden
     let arcwrite arcread ArcWriter ArcReader
     let arcpointer-offsets arcintptr
-    let arccat arccat1
     let share share&
     let assign
     let store memcpy memset

          
M lib/scopes/compiler/basics.sc +2 -1
@@ 1,5 1,5 @@ 
 
-import struct enum hash print downcast bitset
+import struct enum hash print downcast bitset slice
     compiler.arcmem
     compiler.arcmem.Map
     compiler.arcmem.Array

          
@@ 13,6 13,7 @@ do
     using print
     using downcast
     using bitset
+    using slice
     using compiler.arcmem
     using compiler.arcmem.Map
     using compiler.arcmem.Array

          
M lib/scopes/compiler/noir/Module.sc +10 -0
@@ 179,6 179,16 @@ struct Module
         line = u32
         column = u32
 
+    # TODO: debuginfo can reference anchors or anchor paths (traces), which
+        relate a value or instruction to the parent statement that caused its
+        inlining, with the main module as root; e.g. a long anchor path could
+        be: import statement -> import statement -> APPLY -> APPLY -> value
+    # trace : RowIndexedTable
+        id = Trace
+        desc = TraceDesc # Const, TraceKind, ...?
+        inlined_by = Trace
+        at = Anchor
+
     # we distinguish between procedures (no return value) and functions.
         procedures are closed form update flow graphs.
         functions are open form and C ABI compliant.