wisp 1.0.9: unlimited underscores and clean UTF-8
5 files changed, 56 insertions(+), 79 deletions(-)

M NEWS
M configure.ac
M examples/doctests.scm
M wisp-reader.w
M wisp-scheme.w
M NEWS +4 -0
@@ 1,3 1,7 @@ 
+wisp 1.0.9
+- remove limitation of the number of prefix underscores (_). Thanks to Maxime Devos for a much cleaner algorithm!
+- only set *port* encoding to UTF-8, do not change encoding for the application. Thanks to Maxime Devos!
+
 wisp 1.0.8
 - wisp2lisp can now process stdin when called with - as filename.
   And it has help output.

          
M configure.ac +1 -1
@@ 1,7 1,7 @@ 
 dnl run `autoreconf -i` to generate a configure script. 
 dnl Then run ./configure to generate a Makefile.
 dnl Finally run make to generate the project.
-AC_INIT([wisp], [1.0.8],
+AC_INIT([wisp], [1.0.9],
         [arne_bab@web.de])
 # Add macros in m4/ to ensure that wisp builds without having Guile in the aclocal path
 AC_CONFIG_MACRO_DIR([m4])

          
M examples/doctests.scm +9 -1
@@ 38,7 38,15 @@ exec guile -L $(dirname $(dirname $(real
 ;;     #((tests (test-eqv 'A (A))))
 ;;     #f)
 
-;; With wisp, you currently need to use the literal
+;; To run the tests when (main args) is called:
+
+;; (import (examples doctests))
+;; (define %this-module (current-module))
+;; (define (main args)
+;;          (doctests-testmod %this-module))
+
+
+;; With wisp, you need to use the literal
 ;; ##
 ;;    tests
 ;;        test-equal ...

          
M wisp-reader.w +23 -42
@@ 4,8 4,9 @@ 
 
 ;;; adapted from guile-sweet: https://gitorious.org/nacre/guile-sweet/source/ae306867e371cb4b56e00bb60a50d9a0b8353109:sweet/common.scm
 
-;;; Copyright (C) 2005-2014 by David A. Wheeler and Alan Manuel K. Gloria
-;;; Copyright (C) Arne Babenhauserheide (2014--2021). All Rights Reserved.
+;;; Copyright (C) 2005--2014 by David A. Wheeler and Alan Manuel K. Gloria
+;;; Copyright (C) 2014--2023 Arne Babenhauserheide.
+;;; Copyright (C) 2023 Maxime Devos <maximedevos@telenet.be>
 
 ;;; Permission is hereby granted, free of charge, to any person
 ;;; obtaining a copy of this software and associated documentation

          
@@ 37,50 38,30 @@ define-module : language wisp spec
   . #:use-module : language scheme decompile-tree-il
   . #:export : wisp
 
-; Set locale to something which supports unicode. Required to avoid using fluids.
-catch #t
-      lambda :
-        setlocale LC_ALL ""
-      lambda : key . parameters
-        let : : locale-fallback "en_US.UTF-8"
-          format (current-error-port)
-              string-join
-                  list ";;; Warning: setlocale LC_ALL \"\" failed with ~A: ~A"
-                     . "switching to explicit ~A locale. Please setup your locale."
-                     . "If this fails, you might need glibc support for unicode locales.\n"
-                  .  "\n;;;          "
-              . key parameters locale-fallback
-          catch #t
-            lambda :
-              setlocale LC_ALL locale-fallback
-            lambda : key . parameters
-              format (current-error-port)
-                  string-join
-                      list ";;; Warning: fallback setlocale LC_ALL ~A failed with ~A: ~A"
-                         . "Not switching to Unicode."
-                         . "You might need glibc support for unicode locales.\n"
-                      .  "\n;;;          "
-                  . locale-fallback key parameters
-
 ;;;
 ;;; Language definition
 ;;;
 
-define wisp-pending-sexps : list
-
-define : read-one-wisp-sexp port env
-         ;; allow using "# foo" as #(foo).
-         read-hash-extend #\# : λ (chr port) #\#
-         cond
-            : eof-object? : peek-char port
-              read-char port ; return eof: we’re done
-            else
-              let : : chunk : wisp-scheme-read-chunk port
-                cond
-                  : not : null? chunk
-                    car chunk
-                  else
-                    . #f
+define (read-one-wisp-sexp port env)
+  ;; Allow using "# foo" as #(foo).
+  ;; Don't use the globally-acting read-hash-extend, because this
+  ;; doesn't make much sense in parenthese-y (non-Wisp) Scheme.
+  ;; Instead, use fluids to temporarily add the extension.
+  read-hash-extend #\# : lambda (chr port) #\#
+  define %read-hash-procedures/parameter
+    fluid->parameter %read-hash-procedures
+  parameterize ((%read-hash-procedures/parameter
+                 `((#\# ,(lambda (chr port) #\# ))
+                   ,@(%read-hash-procedures/parameter))))
+    ;; Read Wisp files as UTF-8, to support non-ASCII characters.
+    ;; TODO: would be nice to support ';; coding: whatever' lines
+    ;; like in parenthese-y Scheme.
+    set-port-encoding! port "UTF-8"
+    if (eof-object? (peek-char port))
+        read-char port ; return eof: we’re done
+        let ((chunk (wisp-scheme-read-chunk port)))
+          and (not (null? chunk)) ; <---- XXX: maybe (pair? chunk)
+               car chunk
 
 define-language wisp
   . #:title "Wisp Scheme Syntax. See SRFI-119 for details. THIS IS EXPERIMENTAL, USE AT YOUR OWN RISK"

          
M wisp-scheme.w +19 -35
@@ 8,11 8,11 @@ exec guile -L . --language=wisp -s "$0" 
 ;; preprocessed file.
 
 ;; Limitations:
-;; - only unescapes up to 12 leading underscores at line start (\____________)
 ;; - in some cases the source line information is missing in backtraces.
 ;;   check for set-source-property!
 
-;; Copyright (C) Arne Babenhauserheide (2014--2021). All Rights Reserved.
+;; Copyright (C) 2014--2023 Arne Babenhauserheide. All Rights Reserved.
+;; Copyright (C) 2023 Maxime Devos <maximedevos@telenet.be>
 
 ;; Permission is hereby granted, free of charge, to any person
 ;; obtaining a copy of this software and associated documentation

          
@@ 616,38 616,22 @@ define : wisp-scheme-strip-indentation-m
                   cdr unprocessed
 
 define : wisp-unescape-underscore-and-colon code
-         . "replace \\_ and \\: by _ and :"
-         match code
-             : a ...
-               map wisp-unescape-underscore-and-colon a
-             '\_
-               . '_
-             '\__
-               . '__
-             '\___
-               . '___
-             '\____
-               . '____
-             '\_____
-               . '_____
-             '\______
-               . '______
-             '\_______
-               . '_______
-             '\________
-               . '________
-             '\_________
-               . '_________
-             '\__________
-               . '__________
-             '\___________
-               . '___________
-             '\____________
-               . '____________
-             '\:
-               . ':
-             a
-               . a
+  . "replace \\_ and \\: by _ and :"
+  cond
+      : list? code
+        map wisp-unescape-underscore-and-colon code
+      : eq? code '\:
+        . ':
+      ;; Look for symbols like \____ and remove the \.
+      :  symbol? code
+         let : : as-string : symbol->string code
+           if
+               and : >= (string-length as-string) 2 ; at least a single underscore
+                     char=? (string-ref as-string 0) #\\ 
+                     string-every #\_ : substring as-string 1
+               string->symbol : substring as-string 1
+               . code
+      #t code
 
 
 define : wisp-replace-empty-eof code

          
@@ 698,7 682,7 @@ define : wisp-replace-paren-quotation-re
                with-input-from-string ;; hack to defer to read
                    string-append "#"
                        with-output-to-string
-                           λ :
+                           lambda :
                              write : map wisp-replace-paren-quotation-repr a
                                      current-output-port
                    . read