@@ 1,40 1,47 @@
+;;; lexer
+
(defstruct (token) text kind)
(defclass lexer ()
((source :accessor source :initarg :source)
- (current-position :accessor current-position :initarg :current-position))
- (:default-initargs
- :current-position 0))
+ (current-position :accessor current-position
+ :initarg :current-position
+ :initform 0)))
(define-condition lexing-error (error)
((text :initarg :text :reader text)))
-(defmethod current-char ((l lexer))
- (if (< (current-position l) (length (source l)))
- (char (source l) (current-position l))
- #\Nul))
+(defgeneric current-char (lexer)
+ (:method ((l lexer))
+ (if (< (current-position l) (length (source l)))
+ (char (source l) (current-position l))
+ #\Nul)))
-(defmethod next-char ((l lexer))
- (incf (current-position l)))
+(defgeneric next-char (lexer)
+ (:method ((l lexer))
+ (incf (current-position l))))
-(defmethod peek ((l lexer))
- (if (>= (1+ (current-position l)) (length (source l)))
- #\Nul
- (char (source l) (1+ (current-position l)))))
+(defgeneric peek (lexer)
+ (:method ((l lexer))
+ (if (>= (1+ (current-position l)) (length (source l)))
+ #\Nul
+ (char (source l) (1+ (current-position l))))))
(defun lex-error (msg)
(error 'lexing-error :text (format nil "Lexing error: ~a~%" msg)))
-(defmethod skip-whitespace ((l lexer))
- ;; nil primes the lexer
- (loop while (member (current-char l) '(#\Return #\Tab #\Space nil) :test #'equal)
- do (next-char l)))
+(defgeneric skip-whitespace (lexer)
+ (:method ((l lexer))
+ ;; nil primes the lexer; FIXME make this an :after method
+ (loop while (member (current-char l) '(#\Return #\Tab #\Space nil) :test #'equal)
+ do (next-char l))))
-(defmethod skip-comment ((l lexer))
- (if (equal (current-char l) #\#)
- (loop until (or (char= (current-char l) #\Nul)
- (char= (current-char l) #\Newline))
- do (next-char l))))
+(defgeneric skip-comment (lexer)
+ (:method ((l lexer))
+ (if (equal (current-char l) #\#)
+ (loop until (or (char= (current-char l) #\Nul)
+ (char= (current-char l) #\Newline))
+ do (next-char l)))))
(defun is-keyword (s)
(cadar (member (string-upcase s) '(("LABEL" :LABEL)
@@ 48,72 55,244 @@
("WHILE" :WHILE)
("REPEAT" :REPEAT)
("ENDWHILE" :ENDWHILE))
- :key #'car :test #'string=)))
+ :key #'car :test #'equal)))
+
+(defgeneric get-token (lexer)
+ (:method ((l lexer))
+ (skip-whitespace l)
+ (skip-comment l)
+ (let* ((c (current-char l))
+ (token (case c
+ (#\+ (make-token :text c :kind :PLUS))
+ (#\- (make-token :text c :kind :MINUS))
+ (#\* (make-token :text c :kind :ASTERISK))
+ (#\/ (make-token :text c :kind :SLASH))
+ (#\Newline (make-token :text c :kind :NEWLINE))
+ (#\Nul (make-token :text c :kind :EOF))
+ (#\= (if (equal (peek l) #\=)
+ (progn (next-char l) (make-token :text "==" :kind :EQEQ))
+ (make-token :text "=" :kind :EQ)))
+ (#\> (if (equal (peek l) #\=)
+ (progn (next-char l) (make-token :text ">=" :kind :GTEQ))
+ (make-token :text ">" :kind :GT)))
+ (#\< (if (equal (peek l) #\=)
+ (progn (next-char l) (make-token :text "<=" :kind :LTEQ))
+ (make-token :text "<" :kind :LT)))
+ (#\! (if (equal (peek l) #\=)
+ (progn (next-char l) (make-token :text "!=" :kind :NOTEQ))
+ (lex-error (format nil "Expected != instead got: ~a" c))))
+ ;; strings
+ (#\" (progn (next-char l)
+ (let ((start-position (current-position l)))
+ (loop until (equal #\" (current-char l))
+ if (member (current-char l) '(#\Return #\Newline #\Tab #\\ #\%) :test #'equal)
+ do (lex-error (format nil "illegal character in string: ~a" (current-char l)))
+ do (next-char l))
+ (make-token :text (subseq (source l) start-position (1+ (current-position l)))
+ :kind :STRING))))
+
+ ;; numbers
+ ((#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\0)
+ (let ((start-position (current-position l)))
+ (loop while (digit-char-p (peek l))
+ do (next-char l))
+ (if (equal (peek l) #\.)
+ (progn
+ (next-char l)
+ (if (digit-char-p (peek l))
+ (loop while (digit-char-p (peek l))
+ do (next-char l))
+ (lex-error "illegal character in number"))))
+ (make-token :text (subseq (source l) start-position (1+ (current-position l))) :kind :NUMBER)))
-(defmethod get-token ((l lexer))
- (skip-whitespace l)
- (skip-comment l)
- (let* ((c (current-char l))
- (token (case c
- (#\+ (make-token :text c :kind :PLUS))
- (#\- (make-token :text c :kind :MINUS))
- (#\* (make-token :text c :kind :ASTERISK))
- (#\/ (make-token :text c :kind :SLASH))
- (#\Newline (make-token :text c :kind :NEWLINE))
- (#\Nul (make-token :text c :kind :EOF))
- (#\= (if (equal (peek l) #\=)
- (progn (next-char l) (make-token :text "==" :kind :EQEQ))
- (make-token :text "=" :kind :EQ)))
- (#\> (if (equal (peek l) #\=)
- (progn (next-char l) (make-token :text ">=" :kind :GTEQ))
- (make-token :text ">" :kind :GT)))
- (#\< (if (equal (peek l) #\=)
- (progn (next-char l) (make-token :text "<=" :kind :LTEQ))
- (make-token :text "<" :kind :LT)))
- (#\! (if (equal (peek l) #\=)
- (progn (next-char l) (make-token :text "!=" :kind :NOTEQ))
- (lex-error (format nil "Expected != instead got: ~a" c))))
- ;; strings
- (#\" (progn (next-char l)
- (let ((start-position (current-position l)))
- (loop until (equal #\" (current-char l))
- if (member (current-char l) '(#\Return #\Newline #\Tab #\\ #\%) :test #'equal)
- do (lex-error (format nil "illegal character in string: ~a" (current-char l)))
- do (next-char l))
- (make-token :text (subseq (source l) start-position (1+ (current-position l)))
- :kind :STRING))))
+ ;; identifiers and keywords
+ (otherwise (cond ((alpha-char-p c)
+ (let ((start-position (current-position l)))
+ (loop while (alphanumericp (peek l))
+ do (next-char l))
+ (let* ((str (subseq (source l) start-position (1+ (current-position l))))
+ (keyword (is-keyword str)))
+ (if keyword
+ (make-token :text str :kind keyword)
+ (make-token :text str :kind :IDENTIFIER)))))
+ (t (lex-error (format nil "unknown token: ~a" c))))))))
+ (next-char l)
+ token)))
+
+;;; parser
+
+(define-condition parsing-error (error)
+ ((text :initarg :text :reader text)))
+
+;; Parser object keeps track of current token and checks if the code
+;; matches the grammar.
+
+(defclass parser ()
+ ((lexer :accessor lexer :initarg :lexer)
+ (current-token :accessor current-token :initform nil)
+ (peek-token :accessor peek-token :initform nil)
+ (symbols :accessor symbols :initform '())
+ (labels-declared :accessor labels-declared :initform '())
+ (labels-gotoed :accessor labels-gotoed :initform '())))
+
+(defmethod initialize-instance :after ((p parser) &rest args)
+ (next-token p)
+ (next-token p))
+
+(defgeneric check-token (parser kind)
+ (:method ((p parser) kind)
+ (eq kind (token-kind (current-token p)))))
+
+(defgeneric check-peek (parser kind)
+ (:method ((p parser) kind)
+ (eq kind (token-kind (peek-token p)))))
+
+(defgeneric match (parser kind)
+ (:method ((p parser) kind)
+ (if (check-token p kind)
+ (next-token p)
+ (error 'parsing-error :text (format nil "Expected ~a, got ~a" kind (current-token p))))))
+
+(defgeneric next-token (parser)
+ (:method ((p parser))
+ (setf (current-token p) (peek-token p)
+ (peek-token p) (get-token (lexer p)))))
+
+(defgeneric program (parser)
+ (:method ((p parser))
+ (format t "program~%")
+ (loop while (check-token p :NEWLINE)
+ do (next-token p))
+ (loop until (check-token p :EOF)
+ do (statement p))
+ (loop for str in (labels-gotoed p)
+ unless (member str (labels-declared p) :test #'equal)
+ do (error 'parsing-error :text (format nil "GOTO bad label: ~a" str)))))
- ;; numbers
- ((#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\0)
- (let ((start-position (current-position l)))
- (loop while (digit-char-p (peek l))
- do (next-char l))
- (if (equal (peek l) #\.)
- (progn
- (next-char l)
- (if (digit-char-p (peek l))
- (loop while (digit-char-p (peek l))
- do (next-char l))
- (lex-error "illegal character in number"))))
- (make-token :text (subseq (source l) start-position (1+ (current-position l))) :kind :NUMBER)))
+(defgeneric statement (parser)
+ (:method ((p parser))
+ (format t "statement~%")
+ (cond ((check-token p :PRINT) (progn
+ (next-token p)
+ (if (check-token p :STRING)
+ (next-token p)
+ (expression p))))
+ ((check-token p :IF) (progn
+ (next-token p)
+ (comparison p)
+ (match p :THEN)
+ (newline p)
+ (loop until (check-token p :ENDIF)
+ do (statement p))
+ (match p :ENDIF)))
+ ((check-token p :WHILE) (progn
+ (next-token p)
+ (comparison p)
+ (match p :REPEAT)
+ (newline p)
+ (loop until (check-token p :ENDWHILE)
+ do (statement p))
+ (match p :ENDWHILE)))
+ ((check-token p :LABEL) (progn
+ (next-token p)
+ (if (member (token-text (current-token p)) (labels-declared p) :test #'equal)
+ (error 'parsing-error
+ :text (format nil "label already exists: ~a" (current-token p))))
+ (pushnew (token-text (current-token p)) (labels-declared p) :test #'equal)
+ (match p :IDENTIFIER)))
+ ((check-token p :GOTO) (progn
+ (next-token p)
+ (pushnew (token-text (current-token p)) (labels-gotoed p) :test #'equal)
+ (match p :IDENTIFIER)))
+ ((check-token p :LET) (progn
+ (next-token p)
+ (pushnew (token-text (current-token p)) (symbols p) :test #'equal)
+ (match p :IDENTIFIER)
+ (match p :EQ)
+ (expression p)))
+ ((check-token p :INPUT) (progn
+ (next-token p)
+ (pushnew (token-text (current-token p)) (symbols p) :test #'equal)
+ (match p :IDENTIFIER)))
+ (t (error 'parsing-error :text (format nil "Unknown token: ~a" (current-token p)))))
+ (newline p)))
+
+(defgeneric newline (parser)
+ (:method ((p parser))
+ (format t "newline~%")
+ (match p :NEWLINE)
+ (loop while (check-token p :NEWLINE)
+ do (next-token p))))
- ;; identifiers and keywords
- (otherwise (cond ((alpha-char-p c)
- (let ((start-position (current-position l)))
- (loop while (alphanumericp (peek l))
- do (next-char l))
- (let* ((str (subseq (source l) start-position (1+ (current-position l))))
- (keyword (is-keyword str)))
- (if keyword
- (make-token :text str :kind keyword)
- (make-token :text str :kind :IDENTIFIER)))))
- (t (lex-error (format nil "unknown token: ~a" c))))))))
- (next-char l)
- token))
+(defgeneric comparison (parser)
+ (:method ((p parser))
+ (format t "comparison~%")
+ (labels ((comparison-operator? ()
+ (some (lambda (kind) (check-token p kind)) '(:GT :GTEQ :LT :LTEQ :EQEQ :NOTEQ))))
+ (expression p)
+ (if (comparison-operator?)
+ (progn
+ (next-token p)
+ (expression p))
+ (error 'parsing-error :text (format nil "Expected comparison, instead: ~a" (current-token p))))
+ (loop while (comparison-operator?)
+ do (next-token p)
+ (expression p)))))
+
+(defgeneric expression (parser)
+ (:method ((p parser))
+ (format t "expression~%")
+ (term p)
+ (loop while (or (check-token p :PLUS)
+ (check-token p :MINUS))
+ do (next-token p)
+ (term p))))
+
+(defgeneric term (parser)
+ (:method ((p parser))
+ (format t "term~%")
+ (unary p)
+ (loop while (or (check-token p :ASTERISK)
+ (check-token p :SLASH))
+ do (next-token p)
+ (unary p))))
-;;; pfft tests
+(defgeneric unary (parser)
+ (:method ((p parser))
+ (format t "unary~%")
+ (loop while (or (check-token p :PLUS)
+ (check-token p :MINUS))
+ do (next-token p))
+ (primary p)))
+
+(defgeneric primary (parser)
+ (:method ((p parser))
+ (format t "primary~%")
+ (cond ((check-token p :NUMBER) (next-token p))
+ ((check-token p :IDENTIFIER) (progn
+ (unless (member (token-text (current-token p)) (symbols p) :test #'equal)
+ (error 'parsing-error :text (format nil "referencing variable before assignment: ~a" (current-token p))))
+ (next-token p)))
+ (t (error 'parsing-error :text (format nil "Unexpected token: ~a" (current-token p)))))))
-(let* ((l (make-instance 'lexer :source "IF+-023 foo*THEN/"))
- (tok nil))
- (loop until (and tok (eq (token-kind tok) :eof))
- collect (setf tok (get-token l))))
+;;; "tests"
+(let* ((l (make-instance 'lexer :source "
+
+PRINT \"How many fibonacci numbers do you want?\"
+INPUT nums
+PRINT \"\"
+
+LET a = 0
+LET b = 1
+WHILE nums > 0 REPEAT
+ PRINT a
+ LET c = a + b
+ LET a = b
+ LET b = c
+ LET nums = nums - 1
+ENDWHILE
+
+"))
+ (p (make-instance 'parser :lexer l)))
+ (program p))
+