marginally better style

trying out lisp-critic, I don't hate _all_ the advice
1 files changed, 169 insertions(+), 173 deletions(-)

M stemmer.lisp
M stemmer.lisp +169 -173
@@ 3,70 3,65 @@ 
 
 (defclass stemword ()
   ((str     :accessor str     :initarg :str)
-   (R1start :accessor R1start :initarg :R1start)
-   (R2start :accessor R2start :initarg :R2start))
-  (:default-initargs :R1start 0 :R2start 0))
+   (R1start :accessor R1start :initarg :R1start :initform 0)
+   (R2start :accessor R2start :initarg :R2start :initform 0)))
 
-(defgeneric replace-suffix (word suffix replacement)
-  (:documentation "Replace a suffix and adjust R1start and R2start as needed."))
-
-(defgeneric strip-suffix (word n)
-  (:documentation "Remove suffix `n` from the right-hand side of a word."))
+(defgeneric has-suffix-p (word suffix)
+  (:documentation "Predicate for whether `word` ends in `suffix`")
+  (:method ((word stemword) suffix)
+    (and (>= (length (str word)) (length suffix))
+         (search suffix (str word) :test #'string=
+                                   :start2 (- (length (str word)) (length suffix))))))
 
 (defgeneric reset-R1R2 (word)
-  (:documentation "Resets R1start and R2start to ensure they are within bounds of the current string."))
+  (:documentation "Resets R1start and R2start to ensure they are within bounds of the current string.")
+  (:method ((word stemword))
+    (let ((word-length (length (str word))))
+      (when (> (R1start word) word-length)
+        (setf (R1start word) word-length))
+      (when (> (R2start word) word-length)
+        (setf (R2start word) word-length)))))
+
+(defgeneric replace-suffix (word suffix replacement)
+  (:documentation "Replace a suffix and adjust R1start and R2start as needed.")
+  (:method ((word stemword) suffix replacement-str)
+    (when (has-suffix-p word suffix)
+      (let* ((length-no-suffix (- (length (str word)) (length suffix)))
+             (chopped (subseq (str word) 0 length-no-suffix)))
+        (setf (str word) (strcat chopped replacement-str))
+        (reset-R1R2 word)))))
+
+(defgeneric strip-suffix (word n)
+  (:documentation "Remove suffix `n` from the right-hand side of a word.")
+  (:method ((word stemword) (s string))
+    (setf (str word) (subseq (str word) 0 (- (length (str word)) (length s))))
+    (reset-R1R2 word)))
 
 (defgeneric first-prefix (word prefixes)
-  (:documentation "Return the first prefix found or nil"))
+  (:documentation "Return the first prefix found or nil")
+  (:method ((word stemword) prefixes)
+    (loop for prefix in prefixes
+          if (search prefix (str word) :test #'string= :end2 (min (length (str word))
+                                                                  (length prefix)))
+            return prefix)))
 
 (defgeneric first-suffix (word suffixes)
-  (:documentation "Return the first suffix found or nil"))
-
-(defgeneric has-suffix? (word suffix)
-  (:documentation "Predicate for whether `word` ends in `suffix`"))
+  (:documentation "Return the first suffix found or nil")
+  (:method ((word stemword) suffixes)
+    (loop for suffix in suffixes
+          if (has-suffix-p word suffix)
+            return suffix)))
 
 (defgeneric strip-n-chars (word n)
-  (:documentation "Remove `n` characters from word"))
-
-(defmethod strip-n-chars ((word stemword) (n number))
-  (setf (str word) (subseq (str word) 0 (- (length (str word)) n)))
-  (reset-R1R2 word))
-
-(defmethod strip-suffix ((word stemword) (s string))
-  (setf (str word) (subseq (str word) 0 (- (length (str word)) (length s))))
-  (reset-R1R2 word))
-
-(defmethod reset-R1R2 ((word stemword))
-  (let ((word-length (length (str word))))
-    (if (> (R1start word) word-length)
-        (setf (R1start word) word-length))
-    (if (> (R2start word) word-length)
-        (setf (R2start word) word-length))))
+  (:documentation "Remove `n` characters from word")
+  (:method ((word stemword) (n number))
+    (setf (str word) (subseq (str word) 0 (- (length (str word)) n)))
+    (reset-R1R2 word)))
 
-(defmethod replace-suffix ((word stemword) suffix replacement-str)
-  (if (has-suffix? word suffix)
-      (let* ((length-no-suffix (- (length (str word)) (length suffix)))
-             (chopped (subseq (str word) 0 length-no-suffix)))
-        (setf (str word) (concatenate 'string chopped replacement-str))
-        (reset-R1R2 word))))
+(defun strcat (s1 s2)
+  (concatenate 'string s1 s2))
 
-(defmethod first-prefix ((word stemword) prefixes)
-  (loop for prefix in prefixes
-     if (search prefix (str word) :test #'string= :end2 (min (length (str word))
-                                                             (length prefix)))
-     return prefix))
-
-(defmethod has-suffix? ((word stemword) suffix)
-  (and (>= (length (str word)) (length suffix))
-       (search suffix (str word) :test #'string=
-               :start2 (- (length (str word)) (length suffix)))))
-
-(defmethod first-suffix ((word stemword) suffixes)
-  (loop for suffix in suffixes
-     if (has-suffix? word suffix)
-     return suffix))
-
-(defun lower-vowel? (char)
+(defun lower-vowel-p (char)
   (member char (list #\a #\e #\i #\o #\u #\y)))
 
 ;; Finds the region after the first non-vowel following a vowel, or a

          
@@ 76,39 71,40 @@ 
 (defun vnv-suffix (word offset)
   (let ((word (str word)))
     (loop for char across (subseq word offset)
-       and j from (1+ offset)
-       if (and (lower-vowel? char)
-               (< j (length word))
-               (not (lower-vowel? (char word j))))
-       return (1+ j)
-       finally (return (length word)))))
+          for j from (1+ offset)
+          if (and (lower-vowel-p char)
+                  (< j (length word))
+                  (not (lower-vowel-p (char word j))))
+            return (1+ j)
+          finally (return (length word)))))
 
 ;;; common (english)
 
 (defun normalize-apostrophes (word)
   (loop for c across (str word) and i from 0
-     if (member c (list #\RIGHT_SINGLE_QUOTATION_MARK
-                        #\LEFT_SINGLE_QUOTATION_MARK
-                        #\SINGLE_HIGH-REVERSED-9_QUOTATION_MARK))
-     do (setf (char (str word) i) #\')))
+        if (member c (list #\RIGHT_SINGLE_QUOTATION_MARK
+                           #\LEFT_SINGLE_QUOTATION_MARK
+                           #\SINGLE_HIGH-REVERSED-9_QUOTATION_MARK))
+          do (setf (char (str word) i) #\')))
 
 (defun trim-left-apostrophes (word)
-  (if (char= (char (str word) 0) #\')
-      (setf (str word) (subseq (str word) 1)
-            (R1start word) (1- (R1start word))
-            (R2start word) (1- (R2start word)))))
+  (when (char= (char (str word) 0) #\')
+    (setf (str word) (subseq (str word) 1)
+          (R1start word) (1- (R1start word))
+          (R2start word) (1- (R2start word)))))
 
 (defun capitalize-Ys (word)
-  (loop for c across (str word) and i from 0
-     if (and (char= c #\y)
-             (or (= i 0)
-                 (lower-vowel? (char (str word) (1- i)))))
-     do (setf (char (str word) i) #\Y)))
+  (loop for c across (str word)
+        for i from 0
+        if (and (char= c #\y)
+                (or (= i 0)
+                    (lower-vowel-p (char (str word) (1- i)))))
+          do (setf (char (str word) i) #\Y)))
 
 (defun uncapitalize-Ys (word)
   (loop for c across (str word) and i from 0
-     if (char= c #\Y)
-     do (setf (char (str word) i) #\y)))
+        if (char= c #\Y)
+          do (setf (char (str word) i) #\y)))
 
 ;; R1 is the region after the first non-vowel following a vowel,
 ;; or is the null region at the end of the word if there is no

          
@@ 131,11 127,11 @@ 
 (defmacro string-switch (arg (&rest key-values))
   `(cond
      ,@(loop for (k v) in key-values
-          collect
-            `(,(etypecase k
-                 (CONS `(loop for s in ',k thereis (string= ,arg s)))
-                 (STRING `(string= ,arg ,k))
-                 (BOOLEAN `,k))
+             collect
+             `(,(etypecase k
+                  (CONS `(loop for s in ',k thereis (string= ,arg s)))
+                  (STRING `(string= ,arg ,k))
+                  (BOOLEAN `,k))
                ,v))))
 
 (defun stem-special-string (str)

          
@@ 174,8 170,8 @@ 
 ;; (b) a vowel at the beginning of the word followed by a non-vowel.
 (defun ends-short-syllable (word i)
   (cond ((= i 2)
-         (and (lower-vowel? (char (str word) 0))
-              (not (lower-vowel? (char (str word) 1)))))
+         (and (lower-vowel-p (char (str word) 0))
+              (not (lower-vowel-p (char (str word) 1)))))
 
         ((>= i 3)
          (let ((s1 (char (str word) (- i 1)))

          
@@ 183,10 179,11 @@ 
                (s3 (char (str word) (- i 3))))
            ;; Check for a vowel followed by a non-vowel other than w, x or Y
            ;; and preceded by a non-vowel.
-           (and (not (lower-vowel? s1))
+           (and (not (lower-vowel-p s1))
                 (not (member s1 (list #\w #\x #\Y)))
-                (lower-vowel? s2)
-                (not (lower-vowel? s3)))))))
+                (lower-vowel-p s2)
+                (not (lower-vowel-p s3)))))
+        (t nil)))
 
 (defun is-short-word (word)
   (unless (< (R1start word) (length (str word)))

          
@@ 195,8 192,8 @@ 
 ;; Step 0 is to strip off apostrophes and "s".
 (defun step-0 (word)
   (let ((suffix (first-suffix word (list "'s'" "'s" "'"))))
-    (if suffix
-        (strip-suffix word suffix))))
+    (when suffix
+      (strip-suffix word suffix))))
 
 ;; Step 1a is normalization of various special "s"-endings.
 (defun step-1a (word)

          
@@ 215,51 212,52 @@ 
                     ;; immediately before the s (so gas and this retain the s,
                     ;; gaps and kiwis lose it)
                     ("s" (loop for char across (subseq (str word) 0 (max 0 (- word-len 2)))
-                            if (lower-vowel? char)
-                            return (strip-suffix word suffix)))))))
+                               if (lower-vowel-p char)
+                                 return (strip-suffix word suffix)))))))
 
 ;; Step 1b is the normalization of various "ly" and "ed" sufficies.
 (defun step-1b (word)
   (let ((suffix (first-suffix word (list "eedly" "ingly" "edly" "ing" "eed" "ed"))))
     (string-switch suffix
                    ((("eed" "eedly")
-                     (if (<= (length suffix) (- (length (str word)) (R1start word)))
-                         (replace-suffix word suffix "ee")))
+                     (when (<= (length suffix) (- (length (str word)) (R1start word)))
+                       (replace-suffix word suffix "ee")))
 
                     (("ed" "edly" "ing" "ingly")
                      (let* ((root-len (- (length (str word)) (length suffix)))
                             (has-lower-vowel (loop for char across (subseq (str word) 0 root-len)
-                                                thereis (lower-vowel? char))))
-                       (if has-lower-vowel
-                           (let ((original-R1start (R1start word))
-                                 (original-R2start (R2start word)))
-                             (strip-suffix word suffix)
-                             (let ((new-suffix (first-suffix word (list "at" "bl" "iz" "bb"
-                                                                        "dd" "ff" "gg" "mm"
-                                                                        "nn" "pp" "rr" "tt"))))
-                               (if new-suffix
-                                   (string-switch new-suffix
-                                                  ((("at" "bl" "iz")
-                                                    ;; If the word ends "at", "bl" or "iz" add "e"
-                                                    (replace-suffix word new-suffix (concatenate 'string new-suffix "e")))
+                                                     thereis (lower-vowel-p char))))
+                       (when has-lower-vowel
+                         (let ((original-R1start (R1start word))
+                               (original-R2start (R2start word)))
+                           (strip-suffix word suffix)
+                           (let ((new-suffix (first-suffix word (list "at" "bl" "iz" "bb"
+                                                                      "dd" "ff" "gg" "mm"
+                                                                      "nn" "pp" "rr" "tt"))))
+                             (cond (new-suffix
+                                    (string-switch new-suffix
+                                                   ((("at" "bl" "iz")
+                                                     ;; If the word ends "at", "bl" or "iz" add "e"
+                                                     (replace-suffix word new-suffix (strcat new-suffix "e")))
 
-                                                   (("bb" "dd" "ff" "gg" "mm" "nn" "pp" "rr" "tt")
-                                                    ;; If the word ends with a double remove the last letter.
-                                                    (strip-n-chars word 1))))
+                                                    (("bb" "dd" "ff" "gg" "mm" "nn" "pp" "rr" "tt")
+                                                     ;; If the word ends with a double remove the last letter.
+                                                     (strip-n-chars word 1)))))
+                                   
                                    ;; otherwise, if the word is short, add "e"
-                                   (if (is-short-word word)
-                                       (setf (str word) (concatenate 'string (str word) "e")
-                                             (R1start word) (length (str word))
-                                             (R2start word) (length (str word)))))
+                                   ((is-short-word word)
+                                    (setf (str word) (strcat (str word) "e")
+                                          (R1start word) (length (str word))
+                                          (R2start word) (length (str word)))))
 
-                               ;; Because we did a double replacement, we need
-                               ;; to fix R1 and R2 manually.
-                               (if (< original-R1start (length (str word)))
-                                   (setf (R1start word) original-R1start)
-                                   (setf (R1start word) (length (str word))))
-                               (if (< original-R2start (length (str word)))
-                                   (setf (R2start word) original-R2start)
-                                   (setf (R2start word) (length (str word)))))))))))))
+                             ;; Because we did a double replacement, we need
+                             ;; to fix R1 and R2 manually.
+                             (if (< original-R1start (length (str word)))
+                                 (setf (R1start word) original-R1start)
+                                 (setf (R1start word) (length (str word))))
+                             (if (< original-R2start (length (str word)))
+                                 (setf (R2start word) original-R2start)
+                                 (setf (R2start word) (length (str word)))))))))))))
 
 ;; Step 1c is the normalization of various "y" endings.
 (defun step-1c (word)

          
@@ 267,11 265,11 @@ 
     ;; Replace suffix y or Y by i if preceded by a non-vowel which is
     ;; not the first letter of the word:
     ;; cry -> cri, by -> by, say -> say
-    (if (and (> rs-length 2)
-             (or (char= #\y (char (str word) (- rs-length 1)))
-                 (char= #\Y (char (str word) (- rs-length 1))))
-             (not (lower-vowel? (char (str word) (- rs-length 2)))))
-        (setf (char (str word) (- rs-length 1)) #\i))))
+    (when (and (> rs-length 2)
+               (or (char= #\y (char (str word) (1- rs-length)))
+                   (char= #\Y (char (str word) (1- rs-length))))
+               (not (lower-vowel-p (char (str word) (- rs-length 2)))))
+      (setf (char (str word) (1- rs-length)) #\i))))
 
 ;; Step 2 is the stemming of various endings found in R1 including
 ;; "al", "ness", and "li"

          
@@ 283,32 281,32 @@ 
                                          "izer"    "bli"     "ogi"     "li")))
         (word-len (length (str word)))
         (cdeghkmnrt (loop for c across "cdeghkmnrt" collect c)))
-    (if (and suffix
-             (<= (length suffix) (- word-len (R1start word))))
-        (string-switch suffix
-                       (("li" (if (and (>= word-len 3)
+    (when (and suffix
+               (<= (length suffix) (- word-len (R1start word))))
+      (string-switch suffix
+                     (("li" (when (and (>= word-len 3)
                                        (member (char (str word) (- word-len 3)) cdeghkmnrt))
-                                  (strip-suffix word suffix)))
+                              (strip-suffix word suffix)))
 
-                        ("ogi" (if (and (>= word-len 4)
+                      ("ogi" (when (and (>= word-len 4)
                                         (char= (char (str word) (- word-len 4)) #\l))
-                                   (replace-suffix word suffix "og")))
+                               (replace-suffix word suffix "og")))
 
-                        (t (replace-suffix word suffix (string-switch suffix
-                                                                      (("tional"  "tion")
-                                                                       ("enci"    "ence")
-                                                                       ("anci"    "ance")
-                                                                       ("abli"    "able")
-                                                                       ("entli"   "ent")
-                                                                       ("fulness" "ful")
-                                                                       ("fulli"   "ful")
-                                                                       ("lessli"  "less")
-                                                                       (("izer"    "ization") "ize")
-                                                                       (("ousli"   "ousness") "ous")
-                                                                       (("biliti"  "bli")     "ble")
-                                                                       (("iveness" "iviti")   "ive")
-                                                                       (("alism"   "aliti" "alli") "al")
-                                                                       (("ational" "ation" "ator") "ate"))))))))))
+                      (t (replace-suffix word suffix (string-switch suffix
+                                                                    (("tional"  "tion")
+                                                                     ("enci"    "ence")
+                                                                     ("anci"    "ance")
+                                                                     ("abli"    "able")
+                                                                     ("entli"   "ent")
+                                                                     ("fulness" "ful")
+                                                                     ("fulli"   "ful")
+                                                                     ("lessli"  "less")
+                                                                     (("izer"    "ization") "ize")
+                                                                     (("ousli"   "ousness") "ous")
+                                                                     (("biliti"  "bli")     "ble")
+                                                                     (("iveness" "iviti")   "ive")
+                                                                     (("alism"   "aliti" "alli") "al")
+                                                                     (("ational" "ation" "ator") "ate"))))))))))
 
 ;; Step 3 is the stemming of various longer sufficies
 (defun step-3 (word)

          
@@ 318,16 316,16 @@ 
         (word-len (length (str word))))
     (unless (or (> (length suffix) (- word-len (R1start word)))
                 (not suffix))
-        (string-switch suffix
-                       (("ative" (if (>= (- word-len (R2start word)) 5)
-                                     (strip-suffix word suffix)))
+      (string-switch suffix
+                     (("ative" (when (>= (- word-len (R2start word)) 5)
+                                 (strip-suffix word suffix)))
 
-                        (t (replace-suffix word suffix (string-switch suffix
-                                                                      (("alize"   "al")
-                                                                       ("tional"  "tion")
-                                                                       ("ational" "ate")
-                                                                       (("ful" "ness") "")
-                                                                       (("icate" "iciti" "ical") "ic"))))))))))
+                      (t (replace-suffix word suffix (string-switch suffix
+                                                                    (("alize"   "al")
+                                                                     ("tional"  "tion")
+                                                                     ("ational" "ate")
+                                                                     (("ful" "ness") "")
+                                                                     (("icate" "iciti" "ical") "ic"))))))))))
 
 ;; Step 4 search for the longest of the following and if found in R2, act
 (defun step-4 (word)

          
@@ 337,12 335,12 @@ 
                                          "al"    "er"   "ic")))
         (word-len (length (str word))))
     (unless (or (> (length suffix) (- word-len (R2start word)))
-            (not suffix))
-        (string-switch suffix
-                       (("ion" (if (and (>= word-len 4) (or (char= (char (str word) (- word-len 4)) #\s)
+                (not suffix))
+      (string-switch suffix
+                     (("ion" (when (and (>= word-len 4) (or (char= (char (str word) (- word-len 4)) #\s)
                                                             (char= (char (str word) (- word-len 4)) #\t)))
-                                (strip-suffix word suffix)))
-                        (t (strip-suffix word suffix)))))))
+                               (strip-suffix word suffix)))
+                      (t (strip-suffix word suffix)))))))
 
 ;; Step 5 is the stemming of "e" and "l" sufficies found in R2
 (defun step-5 (word)

          
@@ 375,17 373,15 @@ 
          (downcased (string-downcase trimmed))
          (w (make-instance 'stemword :str downcased))
          (special-string (stem-special-string str)))
-    (if special-string
-        special-string
-        (progn
-          (preprocess w)
-          (step-0 w)
-          (step-1a w)
-          (step-1b w)
-          (step-1c w)
-          (step-2 w)
-          (step-3 w)
-          (step-4 w)
-          (step-5 w)
-          (postprocess w)
-          (str w)))))
+    (cond (special-string special-string)
+          (t (preprocess w)
+             (step-0 w)
+             (step-1a w)
+             (step-1b w)
+             (step-1c w)
+             (step-2 w)
+             (step-3 w)
+             (step-4 w)
+             (step-5 w)
+             (postprocess w)
+             (str w)))))