implement chunked download
2 files changed, 124 insertions(+), 46 deletions(-)

M run-wispserve.w
M wispserve/serve.w
M run-wispserve.w +1 -2
@@ 47,7 47,6 @@ define : main args
                lazy : member "--lazy" arguments
              serve (second arguments) (second ip-opt) (string->number (second port-opt)) (not lazy)
        else
-         write : download-file : car arguments
-         newline
+         download-file : car arguments
 
 

          
M wispserve/serve.w +123 -44
@@ 122,14 122,41 @@ define : declare-download-mesh-headers!
 define-record-type <range>
     range start-end data
     . range?
-    start-end range-start-end ;; cons start end
+    start-end range-start-end ;; cons start end, start included, end excluded
     data range-data
 
+define 16B : expt 2 4
+define 4KiB : expt 2 12
+define 32KiB : expt 2 15
+define 64KiB : expt 2 16
+define 128KiB : expt 2 17
+define 256KiB : expt 2 18
+define 2MiB : expt 2 21
+define 32MiB : expt 2 25
+define 1GiB : expt 2 30
+define 64GiB : expt 2 36
+define 8TiB : expt 2 43
+
 define : range-start-> a b
      . "order the ranges, range with larger start first."
      > : car : range-start-end a
          car : range-start-end b
 
+define : range-start-< a b
+     . "order the ranges, range with larger start first."
+     < : car : range-start-end a
+         car : range-start-end b
+
+define : range-end-> a b
+     . "order the ranges, range with larger start first."
+     > : cdr : range-start-end a
+         cdr : range-start-end b
+
+define : range-end-< a b
+     . "order the ranges, range with larger start first."
+     < : cdr : range-start-end a
+         cdr : range-start-end b
+
 define : merge-ranges received-ranges
     . "merge ranges in RECEIVED-RANGES into a new list, ordered with the range with the highest start first."
     let loop : (merged '()) (original (sort received-ranges range-start->))

          
@@ 162,7 189,7 @@ define : merge-ranges received-ranges
 define : content-range->start-end-size content-range
     match content-range
       : 'content-range 'bytes (start . end) size
-        values start end : if (equal? size '*) #f size
+        values start end : if (equal? size '* ) #f size
       : range
         format #t "content-range not in canonical format: ~A\n" range
         let : : range-string : string-drop (cdr content-range) (string-length "bytes ")

          
@@ 174,59 201,118 @@ define : content-range->start-end-size c
                   . #f
                   string->number size
 
+define : initial-ranges-bytes-full start size
+         . "generate initial ranges as a single range for the whole size"
+         list : cons start size
+
+define : initial-ranges-bytes-fixed-size start size chunksize
+         . "Generate initial ranges wit ha fixed chunksize"
+         let loop : (ranges '()) (start 0) (remaining size)
+             if : <= remaining chunksize
+                reverse
+                    cons : cons start : + start (- remaining 1)
+                         . ranges
+                loop 
+                    cons : cons start : + start (- chunksize 1)
+                         . ranges
+                    + start chunksize
+                    - remaining chunksize
+
+define : initial-ranges-32k start size
+         initial-ranges-bytes-fixed-size start size 32KiB
+
+define : initial-ranges-tiered start size
+         . "Ranges that balance chunksize vs. chunk-count. The goal is to have a number of ranges equal to the square root of the 32KiB blocks."
+         initial-ranges-bytes-fixed-size start size
+           cond
+             : < size 64KiB 
+               . 32KiB ;; up to 2 chunks
+             : < size 256KiB
+               . 64KiB ;; up to 4 chunks
+             : < size 2MiB
+               . 256KiB ;; up to 8 chunks
+             : < size 32MiB
+               . 2MiB ;; up to 16 chunks
+             : < size 1GiB
+               . 32MiB ;; up to 32 chunks
+             : < size 64GiB
+               . 1GiB ;; up to 64 chunks
+             : < size 8TiB
+               . 64GiB ;; up to 128 chunks
+             else
+               . 8TiB ;; far in the future
+               
+          
 define : missing-ranges-bytes size received-ranges
+    define required : initial-ranges-tiered 0 size
     if : null? received-ranges
-       list : cons 0 size
-       let : :  ranges : map range-start-end : sort received-ranges range-start->
-           define missing-at-end
-               > {size - 1}
-                 cdr : first ranges
-           define missing-initial
-               if : not missing-at-end
-                    list
-                    list
-                        cons : + 1 : cdr : first ranges
-                             . {size - 1}
-           let loop : (missing missing-initial) (seen '()) (unseen ranges)
+       . required
+       let 
+           :  ranges-by-start : map range-start-end : sort received-ranges range-start-<
+           let loop : (missing '()) (to-check required) (received-by-start ranges-by-start)
+               ;; pretty-print
+               ;;     ` : missing ,missing
+               ;;         to-check ,to-check
+               ;;         received-by-start ,received-by-start                       
                cond
-                 : null? unseen
+                 : null? to-check
                    . missing
-                 : null? seen
-                   loop missing
-                        cons (car unseen) seen
-                        cdr unseen
+                 : null? received-by-start
+                   append missing to-check
                  else
-                   let : (seen-start (car (first seen))) (unseen-end (cdr (first unseen)))
-                         loop
-                             if {seen-start > unseen-end}
-                                cons (cons unseen-end seen-start) missing
-                                . missing
-                             cons (car unseen) seen
-                             cdr seen
-
+                   let : (next-range (first to-check)) (received (first received-by-start))
+                       define next-start : car next-range
+                       define next-end : cdr next-range
+                       define received-start : car received
+                       define received-end : cdr received
+                       cond
+                           ;; if all received are after the current chunk to check, the chunk is missing
+                           {received-start > next-end}
+                             loop : cons next-range missing
+                                    cdr to-check
+                                    . received-by-start
+                           ;; if a received is completely before the current to check, skip it
+                           {received-end < next-start}
+                             loop missing to-check (cdr received-by-start)
+                           ;; if a received contains the to-check, skip the to-check
+                           : and {received-start <= next-start} {received-end >= next-end}
+                             loop missing (cdr to-check) received-by-start
+                           ;; if a received is overlapping with the start, adjust the range to-check to start after the received
+                           : and {received-start <= next-start} {received-end > next-start}
+                             loop missing (cons (cons {received-end + 1} next-end) (cdr to-check)) received-by-start
+                           ;; if a received is overlapping with the end, add a reduced range to the missing
+                           : and {received-end >= next-end} {received-start < next-end}
+                             loop (cons (cons next-start received-start) missing) (cdr to-check) received-by-start
+                           ;; if a received is contained in the to-check, split the to-check, adding the first to missing and keeping the second as to-check
+                           : and {received-end < next-end} {received-start > next-start}
+                             loop (cons (cons next-start received-start) missing) (cons (cons received-end next-end) (cdr to-check)) received-by-start
 
 define : download-file url
     let loop : (size #f) (received-ranges '())
         define missing-ranges
             if : not size
-                 list : cons 0 1
+                 list : cons 0 #f ;; minimal range header content so that the server can serve a content range
                  missing-ranges-bytes size received-ranges
+        ;; pretty-print : list 'received-ranges-start-end : map range-start-end received-ranges
         if : null? missing-ranges
-             range-data (car received-ranges)
+             range-data : car received-ranges
              let*
                  : uri : string->uri-reference url
-                   range-to-request : list-ref missing-ranges : random-integer : length missing-ranges
-                   headers `((range bytes ,range-to-request)) ;; minimal range header so that the server can serve a content range
+                   range-to-request : list-ref missing-ranges 0 ;; : random-integer : length missing-ranges
+                   headers `((range bytes ,range-to-request))
                  display "Downloading file "
-                 display uri
-                 ;; TODO: parse content range response headers, assemble the file from chunks
+                 display url
                  newline
+                 pretty-print : cons 'length-received-ranges : length received-ranges
+                 when : not : null? received-ranges
+                     pretty-print : cons 'received-start-end-car : car : map range-start-end received-ranges
+                 pretty-print headers
                  let-values : : (resp body) : http-get uri #:headers headers
-                    define headers : response-headers resp
-                    define content-range : assoc 'content-range headers
+                    define resp-headers : response-headers resp
+                    define content-range : assoc 'content-range resp-headers
                     ;; pretty-print size
                     ;; pretty-print resp
-                    ;; pretty-print headers
+                    pretty-print resp-headers
                     ;; pretty-print content-range
                     ;; pretty-print : if (string? body) body : bytevector->string body "ISO-8859-1"
                     cond

          
@@ 238,6 324,7 @@ define : download-file url
                           cdr content-range
                       else
                         let-values : : (start end newsize) : content-range->start-end-size content-range
+                            ;; pretty-print : list 'start-end start end
                             loop : if size size newsize
                                 merge-ranges
                                     cons : range (cons start end) body

          
@@ 345,14 432,6 @@ define : hash-and-mime-if-unknown! serve
                 cons (car served-file) new-served
 
 define : server-serve-file range-requested begin-end served-file
-   define 16B : expt 2 4
-   define 4KiB : expt 2 12
-   define 32KiB : expt 2 15
-   define 64KiB : expt 2 16
-   define 128KiB : expt 2 17
-   define 256KiB : expt 2 18
-   define 2MiB : expt 2 21
-   define 32MiB : expt 2 25
    define range-begin : car begin-end
    define filesize
        or : and=> (and=> (and=> (and=> served-file cdr) served-accesspath) stat) stat:size