M jasm/exceptions/error_codes.h +2 -2
@@ 23,10 23,10 @@ enum class AssemblyErrorCodes
TooLongCharacterConstant,
MultilineCommentWasNotTerminated,
MissingClosingStringQuote,
- Unused1, // not used!
+ UnexpectedEndOfStringOrLiteral,
IllegalCharacterInBinaryConstant,
IllegalCharacterInHexConstant,
- Unused2, // not used!
+ UnexpectedEndOfNumeric,
UnmatchedProcessorPop,
InvalidProcessorName,
ExpectedProcessorNameOrPop,
M jasm/tokenize/tokenizer.cpp +39 -13
@@ 74,6 74,7 @@ void Tokenizer::tokenize(uint32_t file_i
PositionTracker tracker(wide_contents, char_masks, _row_locations);
uint64_t hash; // used as out parameter for finder::match
while (!tracker.at_end()) {
+ size_t characters_left = tracker.characters_left();
if (tracker.is_space()) {
// matched whitespace
Token &t = token_chain.reserve<Token>();
@@ 94,12 95,12 @@ void Tokenizer::tokenize(uint32_t file_i
tracker.consume();
}
- } else if (tracker.characters_left() >= 2 && tracker.peek_char() == L'/' && tracker.peek_char(1) == L'/') {
+ } else if (characters_left >= 2 && tracker.peek_char() == L'/' && tracker.peek_char(1) == L'/') {
// matched a single line comment
while (!tracker.at_end() && !tracker.is_newline())
tracker.consume();
- } else if (tracker.characters_left() >= 2 && tracker.peek_char() == L'/' && tracker.peek_char(1) == L'*') {
+ } else if (characters_left >= 2 && tracker.peek_char() == L'/' && tracker.peek_char(1) == L'*') {
// matched a multi line comment
uint32_t row = tracker.row;
uint32_t column = tracker.column;
@@ 132,16 133,16 @@ void Tokenizer::tokenize(uint32_t file_i
_temp_wstring.clear();
tokenize_int_or_float(tracker, token_chain);
- } else if (tracker.characters_left() >= 2 && tracker.peek_char() == L'.' && tracker.is_digit(1)) {
+ } else if (characters_left >= 2 && tracker.peek_char() == L'.' && tracker.is_digit(1)) {
// matched a floating point number
_temp_wstring.clear();
tokenize_float(tracker, token_chain, tracker.column, tracker.row);
- } else if (tracker.peek_char() == L'$') {
+ } else if (characters_left >= 1 && tracker.peek_char() == L'$') {
// matched a hex number
tokenize_hex(tracker, token_chain);
- } else if (tracker.characters_left() >= 2 && tracker.peek_char() == L'%' && tracker.is_digit(1)) {
+ } else if (characters_left >= 1 && tracker.peek_char() == L'%') {
// matched a binary number
tokenize_binary(tracker, token_chain);
@@ 162,11 163,11 @@ void Tokenizer::tokenize(uint32_t file_i
constexpr bool allow_keyword = true;
tokenize_symbol_or_keyword(tracker, token_chain, strings, allow_keyword);
- } else if (tracker.peek_char() == L'\'') {
+ } else if (characters_left >= 1 && tracker.peek_char() == L'\'') {
// char literal
tokenize_char(tracker, token_chain);
- } else if (tracker.peek_char() == L'\"') {
+ } else if (characters_left >= 1 && tracker.peek_char() == L'\"') {
// string literal
tokenize_string(tracker, token_chain, strings);
@@ 202,10 203,16 @@ std::string Tokenizer::to_front_slashes(
wchar_t Tokenizer::parse_next_string_character(PositionTracker &tracker)
{
+ if (tracker.at_end()) {
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedEndOfStringOrLiteral, "Unexpected end of string or literal");
+ }
wchar_t value = tracker.peek_char();
tracker.consume();
if (value == L'\\') {
+ if (tracker.at_end()) {
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedEndOfStringOrLiteral, "Unexpected end of string or literal");
+ }
wchar_t c = tracker.peek_char();
if (c != L't' && c != L'n' && c != L'r' && c != L'0' && c != L'\\' && c != L'\'' && c != L'\"')
// Unknown escape code. This used to be an error but this needs to be allowed because
@@ 240,7 247,7 @@ void Tokenizer::tokenize_int_or_float(Po
_temp_wstring.push_back(tracker.peek_char());
tracker.consume();
}
- if (tracker.peek_char() == L'.' || tracker.peek_char() == L'e') {
+ if (!tracker.at_end() && (tracker.peek_char() == L'.' || tracker.peek_char() == L'e')) {
tokenize_float(tracker, token_chain, column, row);
return;
}
@@ 286,7 293,7 @@ void Tokenizer::tokenize_float(PositionT
tracker.consume();
}
// exponent may follow decimal digits
- parse_exp = tracker.peek_char() == L'e';
+ parse_exp = !tracker.at_end() && tracker.peek_char() == L'e';
}
if (parse_exp) {
@@ 295,7 302,7 @@ void Tokenizer::tokenize_float(PositionT
tracker.consume();
// parse optional + or -
- if (tracker.peek_char() == L'-' || tracker.peek_char() == L'+') {
+ if (!tracker.at_end() && (tracker.peek_char() == L'-' || tracker.peek_char() == L'+')) {
_temp_wstring.push_back(tracker.peek_char());
tracker.consume();
}
@@ 347,6 354,12 @@ void Tokenizer::tokenize_binary(Position
// consume percent character
tracker.consume();
+ if (tracker.at_end()) {
+ std::stringstream ss;
+ ss << "Unexpected end of numeric constant";
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedEndOfNumeric, ss.str());
+ }
+
int32_t value = 0;
while (tracker.is_alpha_numeric()) {
wchar_t c = tracker.peek_char();
@@ 382,6 395,12 @@ void Tokenizer::tokenize_hex(PositionTra
// consume dollar character
tracker.consume();
+ if (tracker.at_end()) {
+ std::stringstream ss;
+ ss << "Unexpected end of numeric constant";
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedEndOfNumeric, ss.str());
+ }
+
int32_t value = 0;
while (tracker.is_alpha_numeric()) {
wchar_t c = tracker.peek_char();
@@ 425,6 444,10 @@ void Tokenizer::tokenize_char(PositionTr
wchar_t value = parse_next_string_character(tracker);
+ if (tracker.at_end()) {
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedEndOfStringOrLiteral, "Unexpected end of string or literal");
+ }
+
if (tracker.peek_char() != L'\'')
throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::TooLongCharacterConstant, "Character constant longer than one character.");
tracker.consume();
@@ 558,11 581,11 @@ void Tokenizer::parse_symbol_name_to_tem
void Tokenizer::parse_processor(PositionTracker &tracker, TokenChain &token_chain)
{
// we are past the processor keyword
- while (tracker.peek_char() != 0 && tracker.is_space()) {
+ while (tracker.is_space()) {
tracker.consume();
}
- if (tracker.peek_char() == L'\"') {
+ if (!tracker.at_end() && tracker.peek_char() == L'\"') {
uint32_t row = tracker.row;
uint32_t col = tracker.column;
@@ 632,9 655,12 @@ void Tokenizer::parse_quoted_string(Posi
// consume quote
tracker.consume();
- while (tracker.peek_char() != L'\"') {
+ while (true) {
if (tracker.at_end() || tracker.is_newline())
throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::MissingClosingStringQuote, "Missing closing string quote.");
+ if (tracker.peek_char() == L'\"') {
+ break;
+ }
wchar_t c = parse_next_string_character(tracker);
_temp_wstring.push_back(c);
A => jasm/unit_tests/results/test_eof_in_character_literal.stdout +1 -0
@@ 0,0 1,1 @@
+unit_tests/test_eof_in_character_literal.asm(3,3) : Error 1008 : Unexpected end of string or literal
A => jasm/unit_tests/results/test_eof_start_of_binary_literal.stdout +1 -0
@@ 0,0 1,1 @@
+unit_tests/text_eof_start_of_binary_literal.asm(3,12) : Error 1011 : Unexpected end of numeric constant
A => jasm/unit_tests/results/test_eof_start_of_character_literal.stdout +1 -0
@@ 0,0 1,1 @@
+unit_tests/test_eof_start_of_character_literal.asm(3,2) : Error 1004 : Missing character in character constant
A => jasm/unit_tests/results/test_eof_start_of_hex_literal.stdout +1 -0
@@ 0,0 1,1 @@
+unit_tests/test_eof_start_of_hex_literal.asm(3,2) : Error 1011 : Unexpected end of numeric constant
A => jasm/unit_tests/test_eof_in_character_literal.asm +3 -0
@@ 0,0 1,3 @@
+// assembler command line arguments: 6502 [-v0 -hla]
+
+'a
No newline at end of file
A => jasm/unit_tests/test_eof_start_of_binary_literal.asm +3 -0
@@ 0,0 1,3 @@
+// assembler command line arguments: 6502 [-v0 -hla]
+
+const a = %
No newline at end of file
A => jasm/unit_tests/test_eof_start_of_character_literal.asm +3 -0
@@ 0,0 1,3 @@
+// assembler command line arguments: 6502 [-v0 -hla]
+
+'
No newline at end of file
A => jasm/unit_tests/test_eof_start_of_hex_literal.asm +3 -0
@@ 0,0 1,3 @@
+// assembler command line arguments: 6502 [-v0 -hla]
+
+$
No newline at end of file