revamp tokenizing
1 files changed, 96 insertions(+), 112 deletions(-)

M ttc.lisp
M ttc.lisp +96 -112
@@ 1,9 1,6 @@ 
-;;; a teeny tiny compiler:
-;;; https://web.eecs.utk.edu/~azh/blog/teenytinycompiler1.html
-
 ;;; lexer
 
-(defstruct (token) text kind)
+(defstruct (token) text kind (value nil))
 
 (defclass lexer ()
   ((source :accessor source :initarg :source)

          
@@ 36,94 33,99 @@ 
       (loop while (member (current-char l) '(#\Return #\Tab #\Space nil) :test #'equal)
             do (next-char l))))
 
-(defgeneric skip-comment (lexer)
-  (:method ((l lexer))
-    (if (equal (current-char l) #\#)
-        (loop until (or (char= (current-char l) #\Nul)
-                        (char= (current-char l) #\Newline))
-              do (next-char l)))))
+(defmacro string-switch (arg (&rest key-values))
+  `(cond
+     ,@(loop for (k v) in key-values
+             collect
+             `((string= ,arg ,k) ,v))))
+
+(defun keyword? (str)
+  (string-switch (string-upcase str)
+      (
+       ;; constants
+       ("HR"                   (make-token :text str :kind :constant :value 0))
+       ("ENGINEERING"          (make-token :text str :kind :constant :value 1))
+       ("LEGAL"                (make-token :text str :kind :constant :value 2))
+       ("PR"                   (make-token :text str :kind :constant :value 3))
+       ("FINANCE"              (make-token :text str :kind :constant :value 4))
+       ("MARKETING"            (make-token :text str :kind :constant :value 5))
+       ("R&D"                  (make-token :text str :kind :constant :value 6))
+       ("SALES"                (make-token :text str :kind :constant :value 7))
+       ("MANUFACTURING"        (make-token :text str :kind :constant :value 8))
+       ("EXECUTIVE MANAGEMENT" (make-token :text str :kind :constant :value 9))
 
-(defun is-keyword (s)
-  (cadar (member (string-upcase s) '(("LABEL"    :LABEL)
-                                     ("GOTO"     :GOTO)
-                                     ("PRINT"    :PRINT)
-                                     ("INPUT"    :INPUT)
-                                     ("LET"      :LET)
-                                     ("IF"       :IF)
-                                     ("THEN"     :THEN)
-                                     ("ENDIF"    :ENDIF)
-                                     ("WHILE"    :WHILE)
-                                     ("REPEAT"   :REPEAT)
-                                     ("ENDWHILE" :ENDWHILE))
-                 :key #'car :test #'equal)))
+       ;; registers
+       ("CUSTOMER EXPERIENCE"        (make-token :text str :kind :register :value "r0"))
+       ("REVENUE STREAMS"            (make-token :text str :kind :register :value "r1"))
+       ("CORE COMPETENCIES"          (make-token :text str :kind :register :value "r2"))
+       ("BEST PRACTICES"             (make-token :text str :kind :register :value "r3"))
+       ("STAKEHOLDER ENGAGEMENT"     (make-token :text str :kind :register :value "r4"))
+       ("KEY PERFORMANCE INDICATORS" (make-token :text str :kind :register :value "r5"))
+       ("RETURN ON INVESTMENT"       (make-token :text str :kind :register :value "r6"))
+       ("ASSETS"                     (make-token :text str :kind :register :value "r7"))
+
+       ;; operations
+       ("INNOVATE"       (make-token :text str :kind :operation :value :increment))
+       ("VALUE-ADD"      (make-token :text str :kind :operation :value :increment))
+       ("STREAMLINE"     (make-token :text str :kind :operation :value :decrement))
+       ("OPTIMIZE"       (make-token :text str :kind :operation :value :decrement))
+       ("REVAMP"         (make-token :text str :kind :operation :value :negate))
+       ("OVERHAUL"       (make-token :text str :kind :operation :value :negate))
+       ("AMPLIFY"        (make-token :text str :kind :operation :value :double))
+       ("INCENTIVIZE"    (make-token :text str :kind :operation :value :double))
+       ("BACKBURNER"     (make-token :text str :kind :operation :value :halve))
+       ("PARADIGM SHIFT" (make-token :text str :kind :operation :value :random))
+       ("ALIGN"          (make-token :text str :kind :operation :value :assignment))
+       ("WITH"           (make-token :text str :kind :operation :value :designator))
+       ("SYNERGIZE"      (make-token :text str :kind :operation :value :add))
+       ("INTEGRATE"      (make-token :text str :kind :operation :value :add))
+       ("AND"            (make-token :text str :kind :operation :value :NOOP))
+       ("DIFFERENTIATE"  (make-token :text str :kind :operation :value :subtract))
+       ("CROWDSOURCE"    (make-token :text str :kind :operation :value :input))
+       ("DELIVER"        (make-token :text str :kind :operation :value :output))
+       ("PRODUCE"        (make-token :text str :kind :operation :value :output))
+       ("MOVING FORWARD" (make-token :text str :kind :operation :value :label))
+       ("GOING FORWARD"  (make-token :text str :kind :operation :value :label))
+       ("CIRCLE BACK TO" (make-token :text str :kind :operation :value :jump))
+       ("REVISIT"        (make-token :text str :kind :operation :value :jump))
+       ("PIVOT"          (make-token :text str :kind :operation :value :jz))
+       ("TO"             (make-token :text str :kind :operation :value :target))
+       ("RESTRUCTURE"    (make-token :text str :kind :operation :value :jneg)))))
 
 (defgeneric get-token (lexer)
   (:method ((l lexer))
     (skip-whitespace l)
-    (skip-comment l)
     (let* ((c (current-char l))
            (token (case c
-                    (#\+       (make-token :text c :kind :PLUS))
-                    (#\-       (make-token :text c :kind :MINUS))
-                    (#\*       (make-token :text c :kind :ASTERISK))
-                    (#\/       (make-token :text c :kind :SLASH))
+                    (#\,       (make-token :text c :kind :NOOP))
                     (#\Newline (make-token :text c :kind :NEWLINE))
                     (#\Nul     (make-token :text c :kind :EOF))
-
-                    (#\= (if (equal (peek l) #\=)
-                             (progn (next-char l) (make-token :text "==" :kind :EQEQ))
-                             (make-token :text "="  :kind :EQ)))
-
-                    (#\> (if (equal (peek l) #\=)
-                             (progn (next-char l) (make-token :text ">="  :kind :GTEQ))
-                             (make-token :text ">"  :kind :GT)))
-
-                    (#\< (if (equal (peek l) #\=)
-                             (progn (next-char l) (make-token :text "<="  :kind :LTEQ))
-                             (make-token :text "<"  :kind :LT)))
-
-                    (#\! (if (equal (peek l) #\=)
-                             (progn (next-char l) (make-token :text "!="  :kind :NOTEQ))
-                             (error 'lexing-error
-                                    :text (format nil "Expected != instead got: ~a" c))))
-                    ;; strings
-                    (#\" (progn (next-char l)
-                                (let ((start-position (current-position l)))
-                                  (loop until (equal #\" (current-char l))
-                                        if (member (current-char l) '(#\Return #\Newline #\Tab #\\ #\%) :test #'equal)
-                                          do (error 'lexing-error
-                                                    :text (format nil "illegal character in string: ~a" (current-char l)))
-                                        do (next-char l))
-                                  (make-token :text (subseq (source l) start-position (current-position l))
-                                              :kind :STRING))))
-
-                    ;; numbers
-                    ((#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\0)
-                     (let ((start-position (current-position l)))
-                       (loop while (digit-char-p (peek l))
-                             do (next-char l))
-                       (if (equal (peek l) #\.)
-                           (progn
-                             (next-char l)
-                             (if (digit-char-p (peek l))
-                                 (loop while (digit-char-p (peek l))
-                                       do (next-char l))
-                                 (error 'lexing-error
-                                        :text "illegal character in number"))))
-                       (make-token :text (subseq (source l) start-position (1+ (current-position l))) :kind :NUMBER)))
-
-                    ;; identifiers and keywords
-                    (otherwise (cond ((alpha-char-p c)
-                                      (let ((start-position (current-position l)))
-                                        (loop while (alphanumericp (peek l))
-                                              do (next-char l))
-                                        (let* ((str (subseq (source l) start-position (1+ (current-position l))))
-                                               (keyword (is-keyword str)))
-                                          (if keyword
-                                              (make-token :text str :kind keyword)
-                                              (make-token :text str :kind :IDENTIFIER)))))
-                                     (t (error 'lexing-error
-                                               :text (format nil "unknown token: ~a" c))))))))
+                    (otherwise
+                     (cond ((alpha-char-p c)
+                            (let ((start-position (current-position l)))
+                              (loop while (or (alpha-char-p (peek l))
+                                              (member (peek l) '(#\& #\-) :test #'equal)
+                                              (and (member (peek l) '(#\Space) :test #'equal)
+                                                   ;; this *looks* horrible but seems to work:
+                                                   ;; next char is a space but the preceding string is part of a multi-word token
+                                                   (member (string-upcase (subseq (source l) start-position (1+ (current-position l))))
+                                                           '("EXECUTIVE" "PARADIGM" "MOVING" "GOING" "CIRCLE"
+                                                             "CIRCLE BACK" "CUSTOMER" "REVENUE" "CORE" "BEST"
+                                                             "STAKEHOLDER" "KEY" "KEY PERFORMANCE" "RETURN" "RETURN ON")
+                                                           :test #'equal)))
+                                    do (next-char l))
+                              (let* ((str (subseq (source l) start-position (1+ (current-position l))))
+                                     (keyword (keyword? str)))
+                                (if keyword
+                                    keyword
+                                    (progn
+                                      (loop until (equal #\Newline (current-char l))
+                                            do (next-char l))
+                                      ;; FIXME: I think there's a condition here that says strings should disallow keywords
+                                      (make-token :text (subseq (source l) start-position (current-position l))
+                                                  :kind :STRING))))))
+                           (t (error 'lexing-error
+                                     :text (format nil "unknown token: ~a" c))))))))
       (next-char l)
       token)))
 

          
@@ 137,14 139,13 @@ 
 
 (defclass parser ()
   ((lexer :accessor lexer :initarg :lexer)
-   (emitter :accessor emitter :initarg :emitter)
    (current-token :accessor current-token :initform nil)
    (peek-token :accessor peek-token :initform nil)
    (symbols :accessor symbols :initform '())
    (labels-declared :accessor labels-declared :initform '())
    (labels-gotoed :accessor labels-gotoed :initform '())))
 
-(defmethod initialize-instance :after ((p parser) &rest args)
+(defmethod initialize-instance :after ((p parser) &rest args)  ; how do you suppress unused arguments warnings?
   (next-token p)
   (next-token p))
 

          
@@ 353,32 354,15 @@ 
   (:method ((e emitter) new-code)
     (setf (header e) (format nil "~a~a~%" (header e) new-code))))
 
-;;; sample program
+;;; testing
 
 (let* ((l (make-instance 'lexer :source "
-
-# Compute average of given values.
-
-LET a = 0
-WHILE a < 1 REPEAT
-    PRINT \"Enter number of scores: \"
-    INPUT a
-ENDWHILE
-
-LET b = 0
-LET s = 0
-PRINT \"Enter one value at a time: \"
-WHILE b < a REPEAT
-    INPUT c
-    LET s = s + c
-    LET b = b + 1
-ENDWHILE
-
-PRINT \"Average: \"
-PRINT s / a
-
-"))
-       (e (make-instance 'emitter))
-       (p (make-instance 'parser :lexer l :emitter e)))
-  (program p)
-  (format t "~a~a~%" (header (emitter p)) (code (emitter p))))
+Engineering, Marketing, and HR
+Executive Management stakeholder engagement
+align R&D Engineering and HR with stakeholder engagement
+")))
+  (let ((ct nil) (list nil))
+    (loop until (and ct (eq (token-kind ct) :EOF))
+          do (setf ct (get-token l))
+             (push ct list) 
+             (format t "~a~%" (if (token-value ct) (token-value ct) (token-kind ct))))))