@@ 0,0 1,119 @@
+(defstruct (token) text kind)
+
+(defclass lexer ()
+ ((source :accessor source :initarg :source)
+ (current-position :accessor current-position :initarg :current-position))
+ (:default-initargs
+ :current-position 0))
+
+(define-condition lexing-error (error)
+ ((text :initarg :text :reader text)))
+
+(defmethod current-char ((l lexer))
+ (if (< (current-position l) (length (source l)))
+ (char (source l) (current-position l))
+ #\Nul))
+
+(defmethod next-char ((l lexer))
+ (incf (current-position l)))
+
+(defmethod peek ((l lexer))
+ (if (>= (1+ (current-position l)) (length (source l)))
+ #\Nul
+ (char (source l) (1+ (current-position l)))))
+
+(defun lex-error (msg)
+ (error 'lexing-error :text (format nil "Lexing error: ~a~%" msg)))
+
+(defmethod skip-whitespace ((l lexer))
+ ;; nil primes the lexer
+ (loop while (member (current-char l) '(#\Return #\Tab #\Space nil) :test #'equal)
+ do (next-char l)))
+
+(defmethod skip-comment ((l lexer))
+ (if (equal (current-char l) #\#)
+ (loop until (or (char= (current-char l) #\Nul)
+ (char= (current-char l) #\Newline))
+ do (next-char l))))
+
+(defun is-keyword (s)
+ (cadar (member (string-upcase s) '(("LABEL" :LABEL)
+ ("GOTO" :GOTO)
+ ("PRINT" :PRINT)
+ ("INPUT" :INPUT)
+ ("LET" :LET)
+ ("IF" :IF)
+ ("THEN" :THEN)
+ ("ENDIF" :ENDIF)
+ ("WHILE" :WHILE)
+ ("REPEAT" :REPEAT)
+ ("ENDWHILE" :ENDWHILE))
+ :key #'car :test #'string=)))
+
+(defmethod get-token ((l lexer))
+ (skip-whitespace l)
+ (skip-comment l)
+ (let* ((c (current-char l))
+ (token (case c
+ (#\+ (make-token :text c :kind :PLUS))
+ (#\- (make-token :text c :kind :MINUS))
+ (#\* (make-token :text c :kind :ASTERISK))
+ (#\/ (make-token :text c :kind :SLASH))
+ (#\Newline (make-token :text c :kind :NEWLINE))
+ (#\Nul (make-token :text c :kind :EOF))
+ (#\= (if (equal (peek l) #\=)
+ (progn (next-char l) (make-token :text "==" :kind :EQEQ))
+ (make-token :text "=" :kind :EQ)))
+ (#\> (if (equal (peek l) #\=)
+ (progn (next-char l) (make-token :text ">=" :kind :GTEQ))
+ (make-token :text ">" :kind :GT)))
+ (#\< (if (equal (peek l) #\=)
+ (progn (next-char l) (make-token :text "<=" :kind :LTEQ))
+ (make-token :text "<" :kind :LT)))
+ (#\! (if (equal (peek l) #\=)
+ (progn (next-char l) (make-token :text "!=" :kind :NOTEQ))
+ (lex-error (format nil "Expected != instead got: ~a" c))))
+ ;; strings
+ (#\" (progn (next-char l)
+ (let ((start-position (current-position l)))
+ (loop until (equal #\" (current-char l))
+ if (member (current-char l) '(#\Return #\Newline #\Tab #\\ #\%) :test #'equal)
+ do (lex-error (format nil "illegal character in string: ~a" (current-char l)))
+ do (next-char l))
+ (make-token :text (subseq (source l) start-position (1+ (current-position l)))
+ :kind :STRING))))
+
+ ;; numbers
+ ((#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\0)
+ (let ((start-position (current-position l)))
+ (loop while (digit-char-p (peek l))
+ do (next-char l))
+ (if (equal (peek l) #\.)
+ (progn
+ (next-char l)
+ (if (digit-char-p (peek l))
+ (loop while (digit-char-p (peek l))
+ do (next-char l))
+ (lex-error "illegal character in number"))))
+ (make-token :text (subseq (source l) start-position (1+ (current-position l))) :kind :NUMBER)))
+
+ ;; identifiers and keywords
+ (otherwise (cond ((alpha-char-p c)
+ (let ((start-position (current-position l)))
+ (loop while (alphanumericp (peek l))
+ do (next-char l))
+ (let* ((str (subseq (source l) start-position (1+ (current-position l))))
+ (keyword (is-keyword str)))
+ (if keyword
+ (make-token :text str :kind keyword)
+ (make-token :text str :kind :IDENTIFIER)))))
+ (t (lex-error (format nil "unknown token: ~a" c))))))))
+ (next-char l)
+ token))
+
+;;; pfft tests
+
+(let* ((l (make-instance 'lexer :source "IF+-023 foo*THEN/"))
+ (tok nil))
+ (loop until (and tok (eq (token-kind tok) :eof))
+ collect (setf tok (get-token l))))