5ec34f19e916 — Leonard Ritter tip 5 hours ago
* compiler.pilot.bootstrap: more work on the new parser
M lib/scopes/compiler/Lexer.sc +7 -0
@@ 110,6 110,10 @@ struct Lexer
 
     temp_tokens : (Array TokenLocation)
 
+    inline valid? (self) true
+
+    inline finalize (self ...) ...
+
     inline insert (self send ch)
         tokens := self.temp_tokens
         assert (empty? tokens)

          
@@ 395,6 399,9 @@ fn test2 ()
                 b : TokenLocation
                 c : TokenLocation
 
+                inline valid? (self) true
+                inline finalize (self ...) ...
+
                 inline insert (self send value)
                     self.a = self.b
                     self.b = self.c

          
M lib/scopes/compiler/NumberParser.sc +32 -1
@@ 354,10 354,34 @@ struct NumberParser < AnyNumberParser
     inline as-integer (self T)
         local exp : T = 1
         local result : T = 0
+        local wrapped? = false
         for i in (rrange self.dot)
+            oldresult := copy result
             result += (self.digits @ i) as T * exp
+            wrapped? = wrapped? | (result < oldresult)
             exp *= self.base as T
-        ? ('negative? self) -result (copy result)
+        ? ('negative? self) -result (copy result), wrapped?
+
+    fn integer-bitcount (self)
+        # returns number of bits required (without sign bit),
+            whether the sign bit is set, and the sign-extended
+            u128 value.
+            if required bits is -1, the literal doesn't fit
+            128 bits.
+        value wrapped? := if ('signed? self)
+            value wrapped? := 'as-integer self i128
+            pass (value as u128) wrapped?
+        else
+            'as-integer self u128
+        if wrapped?
+            pass -1 ('negative? self) value
+        elseif (value == 0)
+            pass 0 false value
+        else
+            neg? := 'negative? self
+            bvalue := neg? ~value value
+            bc := (findmsb bvalue) as i32
+            pass (bc + 1) neg? value
 
     fn as-i64 (self)
         'as-integer self i64

          
@@ 394,6 418,13 @@ static-if main-module?
     static-if parser?
         print ('as-integer np i32)
         print ('as-f64 np)
+    'clear np
+    'parsedata np ('data "128")
+    print ('as-integer np i8)
+    print ('as-integer np i16)
+    print ('as-integer np u16)
+    print ('as-integer np i32)
+    print ('integer-bitcount np)
 
     ;
 

          
M lib/scopes/compiler/pilot/arcmem.sc +1 -1
@@ 97,7 97,7 @@ from (import itertools) let iterbits
 
 using import .bitmem
 
-USE_LOG := true
+USE_LOG := false
 
 @if USE_LOG
 arclog := report

          
A => lib/scopes/compiler/pilot/bootstrap.sc +598 -0
@@ 0,0 1,598 @@ 
+
+""""compiler.pilot.bootstrap
+    ========================
+
+    A "Low Level Scopes Machine" based on arcmem. The idea here is to devise
+    an extremely minimalist interpreter kernel that can be used to bootstrap
+    a more fully fledged compiler. The "language" of the interpreter is modeled
+    after Scopes List Notation.
+
+using import struct enum print itertools format Array String ..MappedFile
+using import .arcmem ..Lexer ..NumberParser .static-map
+
+from Lexer let Location Token
+
+fn... parse_hexchar (c : char)
+    if ((c >= char"0") and (c <= char"9"))
+        c - char"0"
+    elseif ((c >= char"a") and (c <= char"f"))
+        c - char"a" + 10:char
+    elseif ((c >= char"A") and (c <= char"F"))
+        c - char"A" + 10:char
+    else
+        -1:char
+
+enum Class : u16
+    U = 0x20
+    S = 0x30
+    F = 0x40
+    P = 0x50
+
+enum Kind : u16
+    inline K (class bitwidth array?)
+        |
+            getattr Class class
+            bitwidth
+            array? 0x80 0x00
+
+    inline N (C bitwidth)
+        num := static-tostring (1 << bitwidth)
+        tag
+            Symbol (.. (C as zarray) num)
+            Nothing
+            K C bitwidth false
+
+    inline I (bitwidth)
+        N 'U bitwidth
+        N 'S bitwidth
+
+    inline F (bitwidth)
+        N 'F bitwidth
+
+    # any kind of arcmem memory
+    Unknown = 0x00
+
+    # define all integer types (8 .. 128 bits)
+    va-map I (va-range 3 8)
+    # define all float types (16 .. 64 bits)
+    va-map F (va-range 4 7)
+
+    String = K 'S 3 true
+    Symbol = K 'U 3 true
+
+    List = K 'P 7 true
+
+    # in bits
+    inline __countof (self)
+        1:usize << (((storagecast self) & 0xf) as usize)
+
+inline ptrbytecount (start end)
+    end as intptr - start as intptr
+
+numbersuffix->kind := do
+    using Kind
+    static-map Unknown
+        shift = 10
+        width = 6
+        \ ":i8" S8
+        \ ":i16" S16
+        \ ":i32" S32
+        \ ":i64" S64
+        \ ":i128" S128
+        \ ":u8" U8
+        \ ":u16" U16
+        \ ":u32" U32
+        \ ":u64" U64
+        \ ":u128" U128
+        \ ":char" S8
+        \ ":usize" U64
+        \ ":f32" F32
+        \ ":f64" F64
+
+fn parsesuffix (start end)
+    numbersuffix->kind start (ptrbytecount start end)
+
+struct Parser
+    struct Level plain
+        # Close, SquareClose, CurlyClose, EOF
+        terminator : Token
+        loc : Location
+        nude? : bool = true
+        subcolumn? : bool = false
+        escape? : bool
+    levels : Array Level = typeinit
+        Level Token.EOF
+    last_token : Token
+    last_loc : Location
+    np : NumberParser
+    number_value : u128
+    number_kind : Kind
+
+    inline valid? (self) true
+
+    inline finalize (self ...) ...
+
+    fn unescape-append (self start end)
+        local src = start
+        inline at ()
+            copy @src
+        inline src++ ()
+            src = dupe (& (src @ 1))
+        inline append (ch)
+            'string-char self ch
+        loop ()
+            if (src == end)
+                break;
+            *src := (at)
+            if (*src == char"\\")
+                src++;
+                if (src == end)
+                    break;
+                *src := (at)
+                if (*src == char"\n")
+                    src++;
+                    # skip until next non whitespace character
+                    loop ()
+                        if (src == end)
+                            break;
+                        c := (at)
+                        if ((c == char" ") or (c == char"\t"))
+                            src++;
+                            repeat;
+                        else
+                            break;
+                    repeat;
+                else
+                    switch *src
+                    case char"n"
+                        src++;
+                        append char"\n"
+                        repeat;
+                    case char"t"
+                        src++;
+                        append char"\t"
+                        repeat;
+                    case char"r"
+                        src++;
+                        append char"\r"
+                        repeat;
+                    case char"x"
+                        src++;
+                        if (src == end)
+                            repeat;
+                        c0 := parse_hexchar (at)
+                        src++;
+                        c1 := parse_hexchar (at)
+                        src++;
+                        append ((c0 << 4) | c1)
+                        repeat;
+                    pass char"\""
+                    pass char"\\"
+                    do
+                        src++;
+                        append *src
+                        repeat;
+                    default
+                        # translate as-is
+                        append char"\\"
+                        src++;
+                        append *src
+                        repeat;
+            else
+                src++;
+                append *src
+                repeat;
+
+    fn unescape-raw-append (self start end)
+        local src = start
+        inline at ()
+            copy @src
+        inline src++ ()
+            src = dupe (& (src @ 1))
+        inline append (ch)
+            'string-char self ch
+        loop ()
+            if (src == end)
+                break;
+            *src := (at)
+            if (*src == char"\\")
+                src++;
+                if (src == end)
+                    break;
+                *src := (at)
+                if (*src == char"\"")
+                    src++;
+                else
+                    # translate as-is
+                    append char"\\"
+                    src++;
+                append *src
+                repeat;
+            src++;
+            append *src
+            repeat;
+
+    inline append-symbol (self symbol)
+        ptr sz := 'data symbol
+        'begin-string self Kind.Symbol sz
+        'string-data self ptr sz
+        'end-string self
+
+    fn append-number (self kind value)
+        numbytes := ((countof kind) + 7) // 8
+        local value = value
+        ptr := &value as rawstring
+        'begin-string self kind numbytes
+        'string-data self ptr numbytes
+        'end-string self
+
+    inline insert (self send token start end x0 x1)
+        #report
+            countof self.levels
+            start.line
+            start.column
+            token
+        inline error (...)
+            'error self start end ...
+        inline error@ (start end ...)
+            'error self start end ...
+        size := ptrbytecount x0 x1
+        switch token
+        pass 'Whitespace
+        pass 'Comment
+        do
+            for i in (range size)
+                if ((x0 @ i) == c"\t")
+                    error "format: please use spaces instead of tabs"
+            return;
+        default;
+        # manage the current level we're on
+        loop ()
+            level := 'last self.levels
+            if level.nude?
+                toplevel? := (countof self.levels) == 1
+                if level.escape?
+                    if ((start.line == level.loc.line) or toplevel?)
+                        pos := level.loc
+                        error@ pos pos
+                            "format: list continuation character must be at beginning or end of sublist line"
+                    level.escape? = false
+                    level.loc.line = start.line
+                if (token == level.terminator)
+                    'end-list self
+                    report "POP block" token
+                    'pop self.levels
+                    return;
+                elseif (token == 'Escape)
+                    level.escape? = true
+                    return;
+                elseif (start.line > level.loc.line)
+                    if toplevel?
+                        if (start.column != 1)
+                            error "format: indentation mismatch"
+                    elseif (start.column <= level.loc.column)
+                        'end-list self
+                        report "POP block"
+                        'pop self.levels
+                        repeat;
+                    else # indented into the block
+                        if ((level.loc.column + 4) != start.column)
+                            error "format: indentations must nest by 4 spaces"
+                        # escape = false
+                    level.loc.line = start.line
+                    report "PUSH block"
+                    'append self.levels
+                        Level Token.EOF start
+                    'begin-list self
+                    repeat;
+                elseif (token == 'ExprSep)
+                    if toplevel?
+                        error "format: unexpected list separation character"
+                    else;
+                        # todo: turn off unwrap_single
+                    # todo: only break if non-empty?
+                    'end-list self
+                    report "POP expr sep"
+                    'pop self.levels
+                    return;
+                else;
+            else
+                if (token == level.terminator)
+                    'end-list self
+                    report "POP" token
+                    'pop self.levels
+                    return;
+                # elseif escape token
+                elseif (token == 'EOF)
+                    error@ start start "format: list never closed"
+                # elseif statement token
+            break;
+        switch token
+        case 'Open
+            report "PUSH" token
+            'append self.levels
+                Level Token.Close start
+                    nude? = false
+            'begin-list self
+            return;
+        case 'SquareOpen
+            report "PUSH" token
+            'append self.levels
+                Level Token.SquareClose start
+                    nude? = false
+            'begin-list self
+            return;
+        case 'CurlyOpen
+            report "PUSH" token
+            'append self.levels
+                Level Token.CurlyClose start
+                    nude? = false
+            'begin-list self
+            return;
+        default;
+        'insert-any self send token start end x0 x1
+        ;
+
+    inline insert-any (self send token start end x0 x1)
+        cls := typeof self
+        inline error (...)
+            'error self start end ...
+        size := ptrbytecount x0 x1
+        #report
+            countof self.levels
+            token
+            /string
+                /data x0 size
+        prefixed? := self.last_token == 'StringPrefix
+        self.last_token = token
+        switch token
+        case 'CommaSep
+            'append-symbol self ","
+        case 'Symbol
+            'begin-string self Kind.Symbol size
+            'unescape-append self x0 x1
+            'end-string self
+        case 'StringPrefix
+            'begin-list self
+            prefix := "prefix:"
+            'begin-string self Kind.Symbol
+                size + (countof prefix)
+            'string-data self ('data prefix)
+            'unescape-append self x0 x1
+            'end-string self
+        case 'String
+            'begin-string self Kind.String size
+            if prefixed?
+                'unescape-raw-append self x0 x1
+            else
+                'unescape-append self x0 x1
+            'end-string self
+        pass 'Number
+        pass 'SuffixedNumber
+        do
+            suffixed? := token == 'SuffixedNumber
+            'parsedata self.np x0 size
+            np := self.np
+            kind value := if ('real? np)
+                value := 'as-f64 np
+                if suffixed?
+                    pass Kind.F64
+                        zext (bitcast value u64) u128
+                else
+                    pass Kind.F32
+                        zext (bitcast (value as f32) u32) u128
+            else
+                count neg? value := 'integer-bitcount np
+                kind := if (count == -1)
+                    Kind.Unknown
+                elseif ('signed? np)
+                    if (count < 32)
+                        Kind.S32
+                    elseif (count < 64)
+                        Kind.S64
+                    elseif (count < 128)
+                        Kind.S128
+                    else
+                        Kind.Unknown
+                else
+                    if (count < 32)
+                        Kind.S32
+                    elseif (count == 32)
+                        Kind.U32
+                    elseif (count < 64)
+                        Kind.S64
+                    elseif (count == 64)
+                        Kind.U64
+                    elseif (count < 128)
+                        Kind.S128
+                    else
+                        Kind.U128
+                pass kind value
+            if (kind == Kind.Unknown)
+                error "format: integer literal is too large for any type"
+            if suffixed?
+                self.number_kind = kind
+                self.number_value = value
+            else
+                'append-number self kind value
+        case 'NumberSuffix
+            req_kind := parsesuffix x0 x1
+            value := copy self.number_value
+            value := switch self.number_kind
+            case 'F64
+                switch req_kind
+                case 'F32
+                    value := bitcast (itrunc value u64) f64
+                    value := value as f32
+                    zext (bitcast value u32) u128
+                case 'F64 value
+                default
+                    error "format: invalid suffix for real literal"
+            default
+                if (req_kind == Kind.Unknown)
+                    error "format: invalid suffix for integer literal"
+                value
+            'append-number self req_kind value
+        case 'SugarQuote
+            #'begin-list self
+            #'append-symbol self "sugar-quote"
+            # todo: pop sugar quote
+        case 'BlockString
+            strip_col := start.column + 4
+            x0 := & (x0 @ 4)
+            'begin-string self Kind.String (size - 4)
+            loop (x0)
+                if (x0 == x1)
+                    break;
+                c := copy @x0
+                x0 := & (x0 @ 1)
+                'string-char self c
+                if (c == c"\n")
+                    # strip leftside column
+                    fold (x0) for i in (range 1 strip_col)
+                        if (x0 == x1)
+                            break x0
+                        if ((@x0 != c" ") and (@x0 != "\t"))
+                            break x0
+                        & (x0 @ 1)
+                else x0
+            'end-string self
+        default
+            error "don't know what to do with lexer token: " token
+        if prefixed?
+            'end-list self
+
+        ;
+
+inline encode-kind (kind)
+    ((storagecast kind) as u64) << 48
+
+struct Atom_ plain
+    inline element-size (self)
+        sizeof (elementof (typeof self.data))
+
+    inline __countof (self)
+        (self.size_kind & (-1:u64 >> 16)) // ('element-size self)
+
+    inline kindof (self)
+        (self.size_kind >> 48) as u16 as Kind
+
+    inline set-kind (self kind)
+        self.size_kind = (self.size_kind & (-1:u64 >> 16)) | (encode-kind kind)
+
+    inline append (self value)
+        count := countof self
+        ET := 'element-size self
+        if ((rtell self.data) <= count)
+            archold self.data ((count + 1) * ET)
+        assert ((rtell (& (self.data @ count))) != 0)
+        arcassign value (self.data @ count)
+        self.size_kind += ET
+
+struct Atom < Atom_
+    data : voidstar
+    size_kind : usize
+
+struct List < Atom_
+    data : mutable @Atom
+    size_kind : usize = encode-kind Kind.List
+    fn __printer (self print)
+        returning void
+        print
+            /p
+                @@ printer
+                inline (print)
+                    for i in (range (countof self))
+                        ptr := & (self.data @ i)
+                        print (self.data @ i)
+
+struct Blob < Atom_
+    data : mutable rawstring
+    size_kind : usize
+
+    fn __printer (self print)
+        returning void
+        print
+            /string
+                /data self.data (countof self)
+            /..
+            Styled.Operator ":"
+            /..
+            '__tostring ('kindof self)
+
+type+ Atom
+    fn __printer (self print)
+        returning void
+        switch ('kindof self)
+        case 'List
+            print (@ (&self as @List))
+        default # blob
+            print (@ (&self as @Blob))
+
+struct AtomParser < Parser
+    filepath : String
+    str : Blob
+    stack : Array List = typeinit (List)
+
+    inline __typecall (cls filepath)
+        super-type.__typecall cls
+            filepath = filepath
+
+    inline finalize (self)
+        while ((countof self.stack) > 1)
+            'end-list self
+        deref ('pop self.stack)
+
+    inline error (self start end ...)
+        print2
+            /.. self.filepath ":" (dec start.line) ":" (dec start.column) ":"
+            (repr ...) as string
+        abort;
+
+    inline begin-string (self kind capacity)
+        self.str = Blob
+            (arcmalloc capacity) as (mutable rawstring)
+            encode-kind kind
+    inline string-data (self ptr size)
+        for i in (range size)
+            'append self.str (ptr @ i)
+    inline string-char (self ch)
+        'append self.str ch
+    inline end-string (self kind)
+        local str := popswap self.str (Blob)
+        'append
+            'last self.stack
+            @ (&str as @Atom)
+    inline begin-list (self)
+        'append self.stack (List)
+    inline end-list (self)
+        local item := 'pop self.stack
+        'append
+            'last self.stack
+            @ (&item as @Atom)
+
+inline parse (ParserType)
+    finite-state-machine ParserType
+
+do
+    from Lexer let TokenLocation
+    f := try! MappedFile.open module-path
+    ptr sz := 'data f
+    print2
+        ->>
+            range (sz + 1)
+            map
+                inline (i)
+                    if (i == sz)
+                        0:char
+                    else
+                        deref (ptr @ i)
+            tokenize
+            map
+                inline (token begin end)
+                    pass token begin end
+                        & (ptr @ begin.offset)
+                        & (ptr @ end.offset)
+            parse
+                inline ()
+                    AtomParser module-path

          
M lib/scopes/itertools.sc +4 -2
@@ 740,9 740,11 @@ inline finite-state-machine (T coll)
             inline ()
                 pass (T) (start)
             inline "valid?" (self it...)
-                valid? it...
+                ('valid? self) and (valid? it...)
             inline "finalize" (self it...)
-                finalize it...
+                local self = self
+                'finalize self
+                    finalize it...
             inline "insert" (src self it...)
                 local self = self
                 it... := va-map