db870a62221a — Leonard Ritter 2 months ago
* for prefixed strings, only `"` can be escaped, all other characters are taken as-is.
5 files changed, 51 insertions(+), 7 deletions(-)

M src/lexerparser.cpp
M src/lexerparser.hpp
M src/string.cpp
M src/string.hpp
M testing/test_parser.sc
M src/lexerparser.cpp +15 -5
@@ 684,6 684,14 @@ const String *LexerParser::get_string() 
     auto size = unescape_string(dest);
     return String::from(dest, size);
 }
+const String *LexerParser::get_unescaped_string() {
+    auto len = string_len - 2;
+    char dest[len + 1];
+    memcpy(dest, string + 1, len);
+    dest[len] = 0;
+    auto size = unescape_string_light(dest);
+    return String::from(dest, size);
+}
 const String *LexerParser::get_block_string() {
     int strip_col = column() + 4;
     auto len = string_len - 4;

          
@@ 800,13 808,13 @@ SCOPES_RESULT(const List *) LexerParser:
     return builder.get_result();
 }
 
-SCOPES_RESULT(ValueRef) LexerParser::parse_string() {
+SCOPES_RESULT(ValueRef) LexerParser::parse_prefixed_string() {
     SCOPES_RESULT_TYPE(ValueRef);
     assert(this->token != tok_eof);
     const Anchor *anchor = this->anchor();
     switch (this->token) {
     case tok_string: {
-        return ValueRef(anchor, ConstPointer::string_from(get_string()));
+        return ValueRef(anchor, ConstPointer::string_from(get_unescaped_string()));
     } break;
     case tok_block_string: {
         return ValueRef(anchor, ConstPointer::string_from(get_block_string()));

          
@@ 847,9 855,11 @@ SCOPES_RESULT(ValueRef) LexerParser::par
         SCOPES_TRACE_PARSER(this->anchor());
         SCOPES_ERROR(ParserStrayClosingBracket);
     } break;
-    case tok_string:
+    case tok_string: {
+        return ValueRef(anchor, ConstPointer::string_from(get_string()));
+    } break;
     case tok_block_string: {
-        return parse_string();
+        return ValueRef(anchor, ConstPointer::string_from(get_block_string()));
     } break;
     case tok_symbol: {
         return ValueRef(anchor, ConstInt::symbol_from(get_symbol()));

          
@@ 868,7 878,7 @@ SCOPES_RESULT(ValueRef) LexerParser::par
             wrapped = wrappedsym;
         }
         SCOPES_CHECK_RESULT(this->read_token());
-        ValueRef str = SCOPES_GET_RESULT(parse_string());
+        ValueRef str = SCOPES_GET_RESULT(parse_prefixed_string());
         return ValueRef(anchor, ConstPointer::list_from(
             List::from(ref(anchor, wrapped), str)));
     } break;

          
M src/lexerparser.hpp +2 -1
@@ 133,6 133,7 @@ struct LexerParser {
 
     Symbol get_symbol();
     const String *get_string();
+    const String *get_unescaped_string();
     const String *get_block_string();
     ValueRef get_number();
     //Const *get();

          
@@ 143,7 144,7 @@ struct LexerParser {
     // parses the next sequence and returns it wrapped in a cell that points
     // to prev
     SCOPES_RESULT(ValueRef) parse_any();
-    SCOPES_RESULT(ValueRef) parse_string();
+    SCOPES_RESULT(ValueRef) parse_prefixed_string();
 
     SCOPES_RESULT(ValueRef) parse_naked(int column, Token end_token);
 

          
M src/string.cpp +26 -1
@@ 44,7 44,7 @@ int unescape_string(char *buf) {
             src++;
             if (*src == 0) {
                 break;
-            } if (*src == 'n') {
+            } else if (*src == 'n') {
                 *dst = '\n';
             } else if (*src == 't') {
                 *dst = '\t';

          
@@ 74,6 74,31 @@ int unescape_string(char *buf) {
     return dst - buf;
 }
 
+int unescape_string_light(char *buf) {
+    char *dst = buf;
+    char *src = buf;
+    while (*src) {
+        if (*src == '\\') {
+            auto c = *(src + 1);
+            if (c == 0) {
+                *dst++ = *src;
+                break;
+            } else if (c == '"') {
+                *dst = *(++src);
+            } else {
+                *dst = *src;
+            }
+        } else {
+            *dst = *src;
+        }
+        src++;
+        dst++;
+    }
+    // terminate
+    *dst = 0;
+    return dst - buf;
+}
+
 #define B_SNFORMAT 512 // how many characters per callback
 typedef char *(*vsformatcb_t)(const char *buf, void *user, int len);
 

          
M src/string.hpp +1 -0
@@ 84,6 84,7 @@ const String *format( const char *fmt, .
 size_t distance(const String *_s, const String *_t);
 
 int unescape_string(char *buf);
+int unescape_string_light(char *buf);
 int escape_string(char *buf, const char *str, int strcount, const char *quote_chars);
 
 } // namespace scopes

          
M testing/test_parser.sc +7 -0
@@ 24,4 24,11 @@ do
               line3
 
     test (S == "<line1\nline2\nline3\n>")
+
+    # string literal prefix does not escape
+    inline prefix:r (s) s
+
+    test (r"\\test\n\"test\"" == "\\\\test\\n\"test\"")
+
+
 ;