# HG changeset patch # User Arne Babenhauserheide # Date 1597092570 -7200 # Mon Aug 10 22:49:30 2020 +0200 # Node ID bb2584da0e480c7b038ed79f31d10a78d34d4655 # Parent 363a584d27eb7224672be6e56a509f95a2c7579f implement chunked download diff --git a/run-wispserve.w b/run-wispserve.w --- a/run-wispserve.w +++ b/run-wispserve.w @@ -47,7 +47,6 @@ lazy : member "--lazy" arguments serve (second arguments) (second ip-opt) (string->number (second port-opt)) (not lazy) else - write : download-file : car arguments - newline + download-file : car arguments diff --git a/wispserve/serve.w b/wispserve/serve.w --- a/wispserve/serve.w +++ b/wispserve/serve.w @@ -122,14 +122,41 @@ define-record-type range start-end data . range? - start-end range-start-end ;; cons start end + start-end range-start-end ;; cons start end, start included, end excluded data range-data +define 16B : expt 2 4 +define 4KiB : expt 2 12 +define 32KiB : expt 2 15 +define 64KiB : expt 2 16 +define 128KiB : expt 2 17 +define 256KiB : expt 2 18 +define 2MiB : expt 2 21 +define 32MiB : expt 2 25 +define 1GiB : expt 2 30 +define 64GiB : expt 2 36 +define 8TiB : expt 2 43 + define : range-start-> a b . "order the ranges, range with larger start first." > : car : range-start-end a car : range-start-end b +define : range-start-< a b + . "order the ranges, range with larger start first." + < : car : range-start-end a + car : range-start-end b + +define : range-end-> a b + . "order the ranges, range with larger start first." + > : cdr : range-start-end a + cdr : range-start-end b + +define : range-end-< a b + . "order the ranges, range with larger start first." + < : cdr : range-start-end a + cdr : range-start-end b + define : merge-ranges received-ranges . "merge ranges in RECEIVED-RANGES into a new list, ordered with the range with the highest start first." let loop : (merged '()) (original (sort received-ranges range-start->)) @@ -162,7 +189,7 @@ define : content-range->start-end-size content-range match content-range : 'content-range 'bytes (start . end) size - values start end : if (equal? size '*) #f size + values start end : if (equal? size '* ) #f size : range format #t "content-range not in canonical format: ~A\n" range let : : range-string : string-drop (cdr content-range) (string-length "bytes ") @@ -174,59 +201,118 @@ . #f string->number size +define : initial-ranges-bytes-full start size + . "generate initial ranges as a single range for the whole size" + list : cons start size + +define : initial-ranges-bytes-fixed-size start size chunksize + . "Generate initial ranges wit ha fixed chunksize" + let loop : (ranges '()) (start 0) (remaining size) + if : <= remaining chunksize + reverse + cons : cons start : + start (- remaining 1) + . ranges + loop + cons : cons start : + start (- chunksize 1) + . ranges + + start chunksize + - remaining chunksize + +define : initial-ranges-32k start size + initial-ranges-bytes-fixed-size start size 32KiB + +define : initial-ranges-tiered start size + . "Ranges that balance chunksize vs. chunk-count. The goal is to have a number of ranges equal to the square root of the 32KiB blocks." + initial-ranges-bytes-fixed-size start size + cond + : < size 64KiB + . 32KiB ;; up to 2 chunks + : < size 256KiB + . 64KiB ;; up to 4 chunks + : < size 2MiB + . 256KiB ;; up to 8 chunks + : < size 32MiB + . 2MiB ;; up to 16 chunks + : < size 1GiB + . 32MiB ;; up to 32 chunks + : < size 64GiB + . 1GiB ;; up to 64 chunks + : < size 8TiB + . 64GiB ;; up to 128 chunks + else + . 8TiB ;; far in the future + + define : missing-ranges-bytes size received-ranges + define required : initial-ranges-tiered 0 size if : null? received-ranges - list : cons 0 size - let : : ranges : map range-start-end : sort received-ranges range-start-> - define missing-at-end - > {size - 1} - cdr : first ranges - define missing-initial - if : not missing-at-end - list - list - cons : + 1 : cdr : first ranges - . {size - 1} - let loop : (missing missing-initial) (seen '()) (unseen ranges) + . required + let + : ranges-by-start : map range-start-end : sort received-ranges range-start-< + let loop : (missing '()) (to-check required) (received-by-start ranges-by-start) + ;; pretty-print + ;; ` : missing ,missing + ;; to-check ,to-check + ;; received-by-start ,received-by-start cond - : null? unseen + : null? to-check . missing - : null? seen - loop missing - cons (car unseen) seen - cdr unseen + : null? received-by-start + append missing to-check else - let : (seen-start (car (first seen))) (unseen-end (cdr (first unseen))) - loop - if {seen-start > unseen-end} - cons (cons unseen-end seen-start) missing - . missing - cons (car unseen) seen - cdr seen - + let : (next-range (first to-check)) (received (first received-by-start)) + define next-start : car next-range + define next-end : cdr next-range + define received-start : car received + define received-end : cdr received + cond + ;; if all received are after the current chunk to check, the chunk is missing + {received-start > next-end} + loop : cons next-range missing + cdr to-check + . received-by-start + ;; if a received is completely before the current to check, skip it + {received-end < next-start} + loop missing to-check (cdr received-by-start) + ;; if a received contains the to-check, skip the to-check + : and {received-start <= next-start} {received-end >= next-end} + loop missing (cdr to-check) received-by-start + ;; if a received is overlapping with the start, adjust the range to-check to start after the received + : and {received-start <= next-start} {received-end > next-start} + loop missing (cons (cons {received-end + 1} next-end) (cdr to-check)) received-by-start + ;; if a received is overlapping with the end, add a reduced range to the missing + : and {received-end >= next-end} {received-start < next-end} + loop (cons (cons next-start received-start) missing) (cdr to-check) received-by-start + ;; if a received is contained in the to-check, split the to-check, adding the first to missing and keeping the second as to-check + : and {received-end < next-end} {received-start > next-start} + loop (cons (cons next-start received-start) missing) (cons (cons received-end next-end) (cdr to-check)) received-by-start define : download-file url let loop : (size #f) (received-ranges '()) define missing-ranges if : not size - list : cons 0 1 + list : cons 0 #f ;; minimal range header content so that the server can serve a content range missing-ranges-bytes size received-ranges + ;; pretty-print : list 'received-ranges-start-end : map range-start-end received-ranges if : null? missing-ranges - range-data (car received-ranges) + range-data : car received-ranges let* : uri : string->uri-reference url - range-to-request : list-ref missing-ranges : random-integer : length missing-ranges - headers `((range bytes ,range-to-request)) ;; minimal range header so that the server can serve a content range + range-to-request : list-ref missing-ranges 0 ;; : random-integer : length missing-ranges + headers `((range bytes ,range-to-request)) display "Downloading file " - display uri - ;; TODO: parse content range response headers, assemble the file from chunks + display url newline + pretty-print : cons 'length-received-ranges : length received-ranges + when : not : null? received-ranges + pretty-print : cons 'received-start-end-car : car : map range-start-end received-ranges + pretty-print headers let-values : : (resp body) : http-get uri #:headers headers - define headers : response-headers resp - define content-range : assoc 'content-range headers + define resp-headers : response-headers resp + define content-range : assoc 'content-range resp-headers ;; pretty-print size ;; pretty-print resp - ;; pretty-print headers + pretty-print resp-headers ;; pretty-print content-range ;; pretty-print : if (string? body) body : bytevector->string body "ISO-8859-1" cond @@ -238,6 +324,7 @@ cdr content-range else let-values : : (start end newsize) : content-range->start-end-size content-range + ;; pretty-print : list 'start-end start end loop : if size size newsize merge-ranges cons : range (cons start end) body @@ -345,14 +432,6 @@ cons (car served-file) new-served define : server-serve-file range-requested begin-end served-file - define 16B : expt 2 4 - define 4KiB : expt 2 12 - define 32KiB : expt 2 15 - define 64KiB : expt 2 16 - define 128KiB : expt 2 17 - define 256KiB : expt 2 18 - define 2MiB : expt 2 21 - define 32MiB : expt 2 25 define range-begin : car begin-end define filesize or : and=> (and=> (and=> (and=> served-file cdr) served-accesspath) stat) stat:size