add part one

lexing
1 files changed, 119 insertions(+), 0 deletions(-)

A => ttc.lisp
A => ttc.lisp +119 -0
@@ 0,0 1,119 @@ 
+(defstruct (token) text kind)
+
+(defclass lexer ()
+  ((source :accessor source :initarg :source)
+   (current-position :accessor current-position :initarg :current-position))
+  (:default-initargs
+   :current-position 0))
+
+(define-condition lexing-error (error)
+  ((text :initarg :text :reader text)))
+
+(defmethod current-char ((l lexer))
+  (if (< (current-position l) (length (source l)))
+      (char (source l) (current-position l))
+      #\Nul))
+
+(defmethod next-char ((l lexer))
+  (incf (current-position l)))
+
+(defmethod peek ((l lexer))
+  (if (>= (1+ (current-position l)) (length (source l)))
+      #\Nul
+      (char (source l) (1+ (current-position l)))))
+
+(defun lex-error (msg)
+  (error 'lexing-error :text (format nil "Lexing error: ~a~%" msg)))
+
+(defmethod skip-whitespace ((l lexer))
+  ;; nil primes the lexer
+  (loop while (member (current-char l) '(#\Return #\Tab #\Space nil) :test #'equal)
+        do (next-char l)))
+
+(defmethod skip-comment ((l lexer))
+  (if (equal (current-char l) #\#)
+      (loop until (or (char= (current-char l) #\Nul)
+                      (char= (current-char l) #\Newline))
+            do (next-char l))))
+
+(defun is-keyword (s)
+  (cadar (member (string-upcase s) '(("LABEL"    :LABEL)
+                                     ("GOTO"     :GOTO)
+                                     ("PRINT"    :PRINT)
+                                     ("INPUT"    :INPUT)
+                                     ("LET"      :LET)
+                                     ("IF"       :IF)
+                                     ("THEN"     :THEN)
+                                     ("ENDIF"    :ENDIF)
+                                     ("WHILE"    :WHILE)
+                                     ("REPEAT"   :REPEAT)
+                                     ("ENDWHILE" :ENDWHILE))
+                 :key #'car :test #'string=)))
+
+(defmethod get-token ((l lexer))
+  (skip-whitespace l)
+  (skip-comment l)
+  (let* ((c (current-char l))
+         (token (case c
+                  (#\+       (make-token :text c :kind :PLUS))
+                  (#\-       (make-token :text c :kind :MINUS))
+                  (#\*       (make-token :text c :kind :ASTERISK))
+                  (#\/       (make-token :text c :kind :SLASH))
+                  (#\Newline (make-token :text c :kind :NEWLINE))
+                  (#\Nul     (make-token :text c :kind :EOF))
+                  (#\= (if (equal (peek l) #\=)
+                           (progn (next-char l) (make-token :text "==" :kind :EQEQ))
+                           (make-token :text "="  :kind :EQ)))
+                  (#\> (if (equal (peek l) #\=)
+                           (progn (next-char l) (make-token :text ">="  :kind :GTEQ))
+                           (make-token :text ">"  :kind :GT)))
+                  (#\< (if (equal (peek l) #\=)
+                           (progn (next-char l) (make-token :text "<="  :kind :LTEQ))
+                           (make-token :text "<"  :kind :LT)))
+                  (#\! (if (equal (peek l) #\=)
+                           (progn (next-char l) (make-token :text "!="  :kind :NOTEQ))
+                           (lex-error (format nil "Expected != instead got: ~a" c))))
+                  ;; strings
+                  (#\" (progn (next-char l)
+                              (let ((start-position (current-position l)))
+                                (loop until (equal #\" (current-char l))
+                                      if (member (current-char l) '(#\Return #\Newline #\Tab #\\ #\%) :test #'equal)
+                                        do (lex-error (format nil "illegal character in string: ~a" (current-char l)))
+                                      do (next-char l))
+                                (make-token :text (subseq (source l) start-position (1+ (current-position l)))
+                                            :kind :STRING))))
+
+                  ;; numbers
+                  ((#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\0)
+                   (let ((start-position (current-position l)))
+                     (loop while (digit-char-p (peek l))
+                           do (next-char l))
+                     (if (equal (peek l) #\.)
+                         (progn
+                           (next-char l)
+                           (if (digit-char-p (peek l))
+                               (loop while (digit-char-p (peek l))
+                                     do (next-char l))
+                               (lex-error "illegal character in number"))))
+                     (make-token :text (subseq (source l) start-position (1+ (current-position l))) :kind :NUMBER)))
+
+                  ;; identifiers and keywords
+                  (otherwise (cond ((alpha-char-p c)
+                                    (let ((start-position (current-position l)))
+                                      (loop while (alphanumericp (peek l))
+                                            do (next-char l))
+                                      (let* ((str (subseq (source l) start-position (1+ (current-position l))))
+                                             (keyword (is-keyword str)))
+                                        (if keyword
+                                            (make-token :text str :kind keyword)
+                                            (make-token :text str :kind :IDENTIFIER)))))
+                                   (t (lex-error (format nil "unknown token: ~a" c))))))))
+    (next-char l)
+    token))
+
+;;; pfft tests
+
+(let* ((l (make-instance 'lexer :source "IF+-023 foo*THEN/"))
+       (tok nil))
+  (loop until (and tok (eq (token-kind tok) :eof))
+        collect (setf tok (get-token l))))