# HG changeset patch # User Nolan Prescott # Date 1617509327 14400 # Sun Apr 04 00:08:47 2021 -0400 # Node ID ede9fca45949f0cc228c0402d8d2a2d89f4463a8 # Parent 0000000000000000000000000000000000000000 add part one lexing diff --git a/ttc.lisp b/ttc.lisp new file mode 100644 --- /dev/null +++ b/ttc.lisp @@ -0,0 +1,119 @@ +(defstruct (token) text kind) + +(defclass lexer () + ((source :accessor source :initarg :source) + (current-position :accessor current-position :initarg :current-position)) + (:default-initargs + :current-position 0)) + +(define-condition lexing-error (error) + ((text :initarg :text :reader text))) + +(defmethod current-char ((l lexer)) + (if (< (current-position l) (length (source l))) + (char (source l) (current-position l)) + #\Nul)) + +(defmethod next-char ((l lexer)) + (incf (current-position l))) + +(defmethod peek ((l lexer)) + (if (>= (1+ (current-position l)) (length (source l))) + #\Nul + (char (source l) (1+ (current-position l))))) + +(defun lex-error (msg) + (error 'lexing-error :text (format nil "Lexing error: ~a~%" msg))) + +(defmethod skip-whitespace ((l lexer)) + ;; nil primes the lexer + (loop while (member (current-char l) '(#\Return #\Tab #\Space nil) :test #'equal) + do (next-char l))) + +(defmethod skip-comment ((l lexer)) + (if (equal (current-char l) #\#) + (loop until (or (char= (current-char l) #\Nul) + (char= (current-char l) #\Newline)) + do (next-char l)))) + +(defun is-keyword (s) + (cadar (member (string-upcase s) '(("LABEL" :LABEL) + ("GOTO" :GOTO) + ("PRINT" :PRINT) + ("INPUT" :INPUT) + ("LET" :LET) + ("IF" :IF) + ("THEN" :THEN) + ("ENDIF" :ENDIF) + ("WHILE" :WHILE) + ("REPEAT" :REPEAT) + ("ENDWHILE" :ENDWHILE)) + :key #'car :test #'string=))) + +(defmethod get-token ((l lexer)) + (skip-whitespace l) + (skip-comment l) + (let* ((c (current-char l)) + (token (case c + (#\+ (make-token :text c :kind :PLUS)) + (#\- (make-token :text c :kind :MINUS)) + (#\* (make-token :text c :kind :ASTERISK)) + (#\/ (make-token :text c :kind :SLASH)) + (#\Newline (make-token :text c :kind :NEWLINE)) + (#\Nul (make-token :text c :kind :EOF)) + (#\= (if (equal (peek l) #\=) + (progn (next-char l) (make-token :text "==" :kind :EQEQ)) + (make-token :text "=" :kind :EQ))) + (#\> (if (equal (peek l) #\=) + (progn (next-char l) (make-token :text ">=" :kind :GTEQ)) + (make-token :text ">" :kind :GT))) + (#\< (if (equal (peek l) #\=) + (progn (next-char l) (make-token :text "<=" :kind :LTEQ)) + (make-token :text "<" :kind :LT))) + (#\! (if (equal (peek l) #\=) + (progn (next-char l) (make-token :text "!=" :kind :NOTEQ)) + (lex-error (format nil "Expected != instead got: ~a" c)))) + ;; strings + (#\" (progn (next-char l) + (let ((start-position (current-position l))) + (loop until (equal #\" (current-char l)) + if (member (current-char l) '(#\Return #\Newline #\Tab #\\ #\%) :test #'equal) + do (lex-error (format nil "illegal character in string: ~a" (current-char l))) + do (next-char l)) + (make-token :text (subseq (source l) start-position (1+ (current-position l))) + :kind :STRING)))) + + ;; numbers + ((#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\0) + (let ((start-position (current-position l))) + (loop while (digit-char-p (peek l)) + do (next-char l)) + (if (equal (peek l) #\.) + (progn + (next-char l) + (if (digit-char-p (peek l)) + (loop while (digit-char-p (peek l)) + do (next-char l)) + (lex-error "illegal character in number")))) + (make-token :text (subseq (source l) start-position (1+ (current-position l))) :kind :NUMBER))) + + ;; identifiers and keywords + (otherwise (cond ((alpha-char-p c) + (let ((start-position (current-position l))) + (loop while (alphanumericp (peek l)) + do (next-char l)) + (let* ((str (subseq (source l) start-position (1+ (current-position l)))) + (keyword (is-keyword str))) + (if keyword + (make-token :text str :kind keyword) + (make-token :text str :kind :IDENTIFIER))))) + (t (lex-error (format nil "unknown token: ~a" c)))))))) + (next-char l) + token)) + +;;; pfft tests + +(let* ((l (make-instance 'lexer :source "IF+-023 foo*THEN/")) + (tok nil)) + (loop until (and tok (eq (token-kind tok) :eof)) + collect (setf tok (get-token l))))