71640b4066b7 — Leonard Ritter tip 19 hours ago
* arcmem: moved bitwise memory ops to compiler.pilot.bitmem
M lib/scopes/compiler/pilot/arcmem.sc +40 -82
@@ 13,6 13,8 @@ 
 using import struct print format C.stdio C.mman C.errno
 from (import itertools) let iterbits
 
+using import .bitmem
+
 USE_LOG := true
 
 @if USE_LOG

          
@@ 31,6 33,7 @@ pool-min-size := 1:usize << pool-min-siz
 pool-count := pool-size-bits - pool-min-size-bits
 
 pointer-size := sizeof voidstar
+pointer-bitcount := bitcountof intptr
 
 # round S to the next power of 2 and return the log2 of it.
 inline... logsize (S : usize)

          
@@ 47,7 50,7 @@ pool-size := 1:u64 << pool-size-bits
 pool-index-mask := (1:u64 << pool-index-bits) - 1
 pointer-bits-needed := pool-size-bits + pool-index-bits
 pool-capacity := pool-size * pool-count
-bitmap-capacity := pool-capacity // (bitcountof intptr)
+bitmap-capacity := pool-capacity // pointer-bitcount
 bitmap-offset := pool-capacity
 mmap-size := pool-capacity + bitmap-capacity
 # we pick an address above the bits we need for the mapping; pointer. if we fail

          
@@ 64,6 67,8 @@ bitmap-bitaddress-end := bitmap-bitaddre
 # conversion offset for converting pointers to bitpointers
 bitmap-conv-offset := bitmap-bitaddress - mmap-address // 8
 
+bit63 := 0x8000000000000000:u64
+
 weak-bit := 0x8000000000000000:u64
 both-by-one := 0x0000000100000001:u64
 strong-minus-weak-plus := 0xffffffff:u64

          
@@ 242,60 247,8 @@ inline findlsbofs (x i)
     x := rotr x i
     ((findlsb x) + i) % 64
 
-inline bitptr-split8 (x)
-    pass
-        x >> 3
-        (x & 7) as u8
-
-inline bitptr-split64 (x)
-    pass
-        (x >> 3) & -64:u64
-        x & 63
-
-@@ typed intptr intptr
-fn bitmemclear (beginbitptr endbitptr)
-    assert (beginbitptr >= bitmap-bitaddress)
-    assert (endbitptr < bitmap-bitaddress-end)
-    assert (beginbitptr <= endbitptr)
-    beginptr beginbit := bitptr-split8 beginbitptr
-    endptr endbit := bitptr-split8 endbitptr
-    beginmask := ~ (-1:u8 << beginbit)
-    endmask := -1:u8 << endbit
-    if (beginptr == endptr) # all within single byte
-        atomicrmw band (inttoptr beginptr (mutable@ u8)) (beginmask | endmask)
-        return;
-    beginptr := if (beginbit != 0)
-        atomicrmw band (inttoptr beginptr (mutable@ u8)) beginmask
-        beginptr + 1
-    else beginptr
-    ptr := inttoptr beginptr (mutable rawstring)
-    size := endptr - beginptr
-    memset ptr 0 size
-    if (endbit != 0)
-        atomicrmw band (inttoptr endptr (mutable@ u8)) endmask
-    ;
-@@ typed intptr intptr
-fn bitmemset (beginbitptr endbitptr)
-    assert (beginbitptr >= bitmap-bitaddress)
-    assert (endbitptr < bitmap-bitaddress-end)
-    assert (beginbitptr <= endbitptr)
-    beginptr beginbit := bitptr-split8 beginbitptr
-    endptr endbit := bitptr-split8 endbitptr
-    beginmask := -1:u8 << beginbit
-    endmask := ~ (-1:u8 << endbit)
-    if (beginptr == endptr) # all within single byte
-        atomicrmw bor (inttoptr beginptr (mutable@ u8)) (beginmask & endmask)
-        return;
-    beginptr := if (beginbit != 0)
-        atomicrmw bor (inttoptr beginptr (mutable@ u8)) beginmask
-        beginptr + 1
-    else beginptr
-    ptr := inttoptr beginptr (mutable rawstring)
-    size := endptr - beginptr
-    memset ptr -1 size
-    if (endbit != 0)
-        atomicrmw bor (inttoptr endptr (mutable@ u8)) endmask
-    ;
+inline valid-bitmap-bitptr? (bitptr)
+    ((bitptr >= bitmap-bitaddress) and (bitptr < bitmap-bitaddress-end))
 
 @@ typed usize
 fn arcmalloc (size)

          
@@ 350,7 303,7 @@ fn arcmalloc (size)
     h.weak = 0
     h.next = null
     # init bitmap; this also includes the header bits
-    bitmemclear
+    bitmemclearrange
         ptr-bitptr-floor ptr
         ptr-bitptr-floor
             inttoptr ((ptrtoint ptr intptr) + capacity) voidstar

          
@@ 367,15 320,15 @@ fn arcshare (ptr)
             atomicrmw add (& h.strong) 1
     ptr
 
-# equivalent to free(); do not call arcfree directly, but use arcdrop instead.
+# equivalent to free(); do not call arcvacate directly, but use arcfree instead.
 @@ typed voidstar
-fn arcfree (ptr)
+fn arcvacate (ptr)
     if (arcptr? ptr)
         assert (ptr == (ptr-base ptr))
-            "arcfree: pointer does not point at valid object"
+            "arcvacate: pointer does not point at valid object"
         h := ptr-aligned-freelist ptr
         assert (h.magic != free-list-magic)
-            "arcfree: double free detected"
+            "arcvacate: double free detected"
         h.magic = free-list-magic
         poolid := ptr-poolid ptr
         assert (poolid < pool-count)

          
@@ 391,7 344,7 @@ fn arcfree (ptr)
             volatile-store (ptrtoint ptr intptr) pfirst
             break;
     else
-        arclog "arcfree attempted on non-arcptr"
+        arclog "arcvacate attempted on non-arcptr"
         # fallback to regular free
         #free ptr
 

          
@@ 403,7 356,7 @@ inline softdrop (name ptr h)
             will not be touched again, so we can rely on this zero
         if ((volatile-load &h.strong) == 0)
             arclog name "free" ptr
-            arcfree ptr
+            arcvacate ptr
 
 @@ typed voidstar (mutable &voidstar)
 fn arcdrop1 (ptr stack)

          
@@ 442,23 395,27 @@ inline pointer-iterator (f)
         beginmask := -1:u64 << beginbit
         endmask := ~ (-1:u64 << endbit)
         if (beginptr == endptr)
-            word := load (inttoptr beginptr @u64)
-            word := word & (beginmask | endmask)
+            word := load beginptr
+            word := word & (beginmask & endmask)
             arcdropscanword beginbitptr word ...
         else
             beginptr := if (beginbit != 0)
-                word := load (inttoptr beginptr @u64)
+                word := load beginptr
                 word := word & beginmask
-                arcdropscanword (beginptr * 8) word ...
-                beginptr + (sizeof u64)
+                arcdropscanword beginbitptr word ...
+                & (beginptr @ 1)
             else beginptr
-            for addr in (range beginptr endptr (sizeof u64))
+            for addr in
+                range
+                    ptrtoint beginptr intptr
+                    ptrtoint endptr intptr
+                    sizeof u64
                 word := load (inttoptr addr @u64)
                 arcdropscanword (addr * 8) word ...
             if (endbit != 0)
-                word := load (inttoptr endptr @u64)
+                word := load endptr
                 word := word & endmask
-                arcdropscanword (endptr * 8) word ...
+                arcdropscanword endbitptr word ...
 
 @@ typed intptr intptr (mutable& voidstar)
 let bitptr-droptostack =

          
@@ 492,7 449,7 @@ fn arcmemdropstack (stack)
         repeat;
 
 @@ typed voidstar
-fn arcdrop (ptr)
+fn arcfree (ptr)
     local stack : voidstar = null
     arcdrop1 ptr stack
     arcmemdropstack stack

          
@@ 508,11 465,11 @@ inline ptraligned? (p)
 # true if pointer points to pointer
 inline ptrptr? (p)
     ptr bit := bitptr-split8 (ptr-bitptr-floor p)
-    ((load (ptr as @u8)) & (1:u8 << bit)) != 0
+    ((load ptr) & (1:u8 << bit)) != 0
 # set as pointer
 inline setptrptr (p)
     ptr bit := bitptr-split8 (ptr-bitptr-floor p)
-    atomicrmw bor (inttoptr ptr (mutable@ u8)) (1:u8 << bit)
+    atomicrmw bor ptr (1:u8 << bit)
 # pointer only partially points to pointer
 inline badptrptr? (p)
     (not ptraligned? p) and (ptrptr? p)

          
@@ 530,7 487,7 @@ fn ptr-memdrop (ptr size)
     local stack : voidstar = null
     bitptr-dropandcleartostack bd0 bd1 stack
     # clear all pointer bits
-    bitmemclear bd0 bd1
+    bitmemclearrange bd0 bd1
     # cleanup dependencies
     arcmemdropstack stack
 

          
@@ 557,10 514,11 @@ let bitptr-memcopy =
             srcptr := (bitptr-ptr bitptr) as (mutable@ voidstar)
             offset := (ptrtoint srcptr intptr) - (ptrtoint src intptr)
             destptr := bitptr-memcopy-data offset dest src copied
-            # todo: we can move this branch further up
+            # todo: we could move this branch further up
             if (ptraligned? destptr)
                 copied += pointer-size
                 @ (destptr as (mutable@ voidstar)) = arcshare (deref (@ srcptr))
+                # todo: copy bits separately, in one pass
                 if (arcptr? destptr)
                     setptrptr destptr
             else

          
@@ 574,7 532,7 @@ let bitptr-memcopy =
     to unaligned offsets, pointers will also be copied as null.
 
     all pointers fully or partially overwritten at `dest` will be untagged,
-    dropped and nulled as if arcdrop() were called on them.
+    dropped and nulled as if arcfree() were called on them.
 
     pointers duplicated from `src` will be copied as if arcshare() were called
     on them, and the destination address will be tagged as pointer.

          
@@ 629,7 587,7 @@ fn archold (dest)
     newptr := arcmalloc size
     arcmemcpy newptr ptr size
     dest = inttoptr ((ptrtoint newptr intptr) + offset) voidstar
-    arcdrop ptr
+    arcfree ptr
     true
 
 getheap;

          
@@ 654,10 612,10 @@ do
 
     p3b := arcshare p3
 
-    arcdrop p1
-    arcdrop p2
-    arcdrop p3
-    arcdrop p4
+    arcfree p1
+    arcfree p2
+    arcfree p3
+    arcfree p4
     print "-"
     q1 := arcmalloc 1
     q2 := arcmalloc 1

          
@@ 665,7 623,7 @@ do
     q4 := arcmalloc 1
     q5 := arcmalloc 1
     print q1 q2 q3 q4 q5
-    arcdrop p3b
+    arcfree p3b
 
 run-stage;
 

          
A => lib/scopes/compiler/pilot/bitmem.sc +146 -0
@@ 0,0 1,146 @@ 
+
+""""compiler.pilot.bitmem
+    =====================
+
+    Support for bitwise memory operations that use bitpointers, pointers that
+    have been left-shifted by 3 bits to permit addressing sub-byte bits
+    individually.
+
+inline... tobitptr (ptr : voidstar)
+    (ptrtoint ptr intptr) << 3
+
+inline... bitptr-split8 (x : intptr)
+    pass
+        inttoptr (x >> 3) (mutable @u8)
+        (x & 7) as u8
+inline... bitptr-split16 (x : intptr)
+    pass
+        inttoptr ((x >> 3) & -2:u64) (mutable @u16)
+        (x & 15) as u16
+inline... bitptr-split32 (x : intptr)
+    pass
+        inttoptr ((x >> 3) & -4:u64) (mutable @u32)
+        (x & 31) as u32
+inline... bitptr-split64 (x : intptr)
+    pass
+        inttoptr ((x >> 3) & -8:u64) (mutable @u64)
+        x & 63
+
+
+@@ typed intptr intptr
+fn bitmemclearrange (beginbitptr endbitptr)
+    beginptr beginbit := bitptr-split8 beginbitptr
+    endptr endbit := bitptr-split8 endbitptr
+    beginmask := ~ (-1:u8 << beginbit)
+    endmask := -1:u8 << endbit
+    if (beginptr == endptr) # all within single byte
+        atomicrmw band beginptr (beginmask | endmask)
+        return;
+    beginptr := if (beginbit != 0)
+        atomicrmw band beginptr beginmask
+        & (beginptr @ 1)
+    else beginptr
+    ptr := beginptr as (mutable rawstring)
+    size := (ptrtoint endptr intptr) - (ptrtoint beginptr intptr)
+    memset ptr 0 size
+    if (endbit != 0)
+        atomicrmw band endptr endmask
+    ;
+
+inline... bitmemclear (bitptr : intptr, size : usize)
+    bitmemclearrange bitptr (bitptr + size)
+
+@@ typed intptr intptr
+fn bitmemsetrange (beginbitptr endbitptr)
+    beginptr beginbit := bitptr-split8 beginbitptr
+    endptr endbit := bitptr-split8 endbitptr
+    beginmask := -1:u8 << beginbit
+    endmask := ~ (-1:u8 << endbit)
+    if (beginptr == endptr) # all within single byte
+        atomicrmw bor beginptr (beginmask & endmask)
+        return;
+    beginptr := if (beginbit != 0)
+        atomicrmw bor beginptr beginmask
+        & (beginptr @ 1)
+    else beginptr
+    ptr := beginptr as (mutable rawstring)
+    size := (ptrtoint endptr intptr) - (ptrtoint beginptr intptr)
+    memset ptr -1 size
+    if (endbit != 0)
+        atomicrmw bor endptr endmask
+    ;
+
+inline... bitmemset (bitptr : intptr, size : usize)
+    bitmemsetrange bitptr (bitptr + size)
+
+@@ typed intptr intptr usize
+fn bitmemcpy (destbitptr srcbitptr bitsize)
+    if (bitsize == 0)
+        return;
+    inline readbits (buf bufsize bitptr)
+        ptr bit := bitptr-split64 bitptr
+        cap := max bufsize bit
+        bitsread := 64:u64 - cap
+        data := load ptr
+        # truncate unused back bits
+        data := data << (cap - bit)
+        # truncate unused front bits
+        data := data >> cap
+        pass
+            # append to buffer
+            buf | (data << bufsize)
+            bufsize + bitsread
+            bitptr + bitsread
+    endbitptr := destbitptr + bitsize
+    loop (destbitptr buf bufsize srcbitptr = destbitptr 0:u64 0:u64 srcbitptr)
+        if (destbitptr == endbitptr)
+            break;
+        dstptr dstbit := bitptr-split64 destbitptr
+        toread := min (endbitptr - destbitptr) (64:u64 - dstbit)
+        # read into buffer until satisfied
+        buf bufsize srcbitptr := loop (it... = buf bufsize srcbitptr)
+            buf bufsize := it...
+            if (bufsize < toread)
+                repeat (readbits it...)
+            break it...
+        destbitptr := destbitptr + toread
+        if (toread == 64:u64)
+            assert (dstbit == 0)
+            store buf dstptr
+            repeat destbitptr 0:u64 0:u64 srcbitptr
+        else
+            bits := buf << dstbit
+            # pop from buffer
+            buf := buf >> toread
+            bufsize := bufsize - toread
+            # this is not an atomic operation: we only use atomics to not
+                destroy unrelated bits. only "our" bits from prevval are
+                used to make comparisons. as with regular memcpy, concurrent
+                writes are permitted to be out of order.
+            # masking untouched bits
+            lr := -1:u64 << dstbit
+            negmask := (lr << toread) | ~lr
+            # masking touched bits
+            mask := ~negmask
+            # insert all set bits
+            prevval := atomicrmw bor dstptr (bits & mask)
+            bitsmask := bits | negmask
+            # also update unset bits?
+            if ((prevval & bitsmask) != prevval)
+                # keep all other bits as they are and remove the unset bits
+                atomicrmw band dstptr bitsmask
+            repeat destbitptr buf bufsize srcbitptr
+
+do
+    let
+        bitptr-split8
+        bitptr-split16
+        bitptr-split32
+        bitptr-split64
+        bitmemclearrange
+        bitmemsetrange
+        bitmemclear
+        bitmemset
+        bitmemcpy
+        tobitptr
+    locals;

          
M testing/test_all.sc +1 -0
@@ 13,6 13,7 @@ test-modules
     .test_ast_quote
     #.test_atom
     .test_atomic
+    .test_bitmem
     .test_bitset
     .test_bool
     .test_box

          
A => testing/test_bitmem.sc +49 -0
@@ 0,0 1,49 @@ 
+
+using import testing compiler.pilot.bitmem print
+
+bufsize := 20:u64
+bitbufsize := bufsize * 8
+H := bitbufsize // 2
+
+buf := alloca-array i8 bufsize
+for i in (range bufsize)
+    #buf @ i = i as i8
+    buf @ i = 0xcd as i8
+bitptr := tobitptr buf
+inline dumpbuf ()
+    for i in (range bitbufsize)
+        if ((i > 0) and ((i % 7) == 0))
+            print " " /..
+        ptr ofs := bitptr-split8 (bitptr + i)
+        if ((((load ptr) >> ofs) & 1) == 0)
+            print "0" /..
+        else
+            print "1" /..
+    print ""
+dumpbuf;
+#do
+    bitmemclear bitptr H
+    dumpbuf;
+    bitmemset (bitptr + H) H
+    dumpbuf;
+
+#bitmemcpy (bitptr + 62) (bitptr + H - 2) 4
+#dumpbuf;
+
+inline... shufflepack (width : u64)
+    tempbuf := alloca-array i8 bufsize
+    for i in (range bufsize)
+        tempbuf @ i = 0xa0:i8
+    tempbitptr := tobitptr tempbuf
+    for i in (range 0:u64 (bitbufsize - 2 * width + 1) (2 * width))
+        bitmemcpy tempbitptr (bitptr + i) width
+        bitmemcpy (bitptr + i) (bitptr + i + width) width
+        bitmemcpy (bitptr + i + width) tempbitptr width
+
+shufflepack 7
+shufflepack 7
+dumpbuf;
+
+#print bitptr
+
+;
  No newline at end of file