Fixed parsing outside text bugs.
M jasm/exceptions/error_codes.h +2 -2
@@ 23,10 23,10 @@ enum class AssemblyErrorCodes
 	TooLongCharacterConstant,
 	MultilineCommentWasNotTerminated,
 	MissingClosingStringQuote,
-	Unused1, // not used!
+	UnexpectedEndOfStringOrLiteral,
 	IllegalCharacterInBinaryConstant,
 	IllegalCharacterInHexConstant,
-	Unused2, // not used!
+	UnexpectedEndOfNumeric,
 	UnmatchedProcessorPop,
 	InvalidProcessorName,
 	ExpectedProcessorNameOrPop,

          
M jasm/tokenize/tokenizer.cpp +39 -13
@@ 74,6 74,7 @@ void Tokenizer::tokenize(uint32_t file_i
 	PositionTracker tracker(wide_contents, char_masks, _row_locations);
 	uint64_t hash; // used as out parameter for finder::match
 	while (!tracker.at_end()) {
+		size_t characters_left = tracker.characters_left();
 		if (tracker.is_space()) {
 			// matched whitespace
 			Token &t = token_chain.reserve<Token>();

          
@@ 94,12 95,12 @@ void Tokenizer::tokenize(uint32_t file_i
 					tracker.consume();
 			}
 
-		} else if (tracker.characters_left() >= 2 && tracker.peek_char() == L'/' && tracker.peek_char(1) == L'/') {
+		} else if (characters_left >= 2 && tracker.peek_char() == L'/' && tracker.peek_char(1) == L'/') {
 			// matched a single line comment
 			while (!tracker.at_end() && !tracker.is_newline())
 				tracker.consume();
 
-		} else if (tracker.characters_left() >= 2 && tracker.peek_char() == L'/' && tracker.peek_char(1) == L'*') {
+		} else if (characters_left >= 2 && tracker.peek_char() == L'/' && tracker.peek_char(1) == L'*') {
 			// matched a multi line comment
 			uint32_t row = tracker.row;
 			uint32_t column = tracker.column;

          
@@ 132,16 133,16 @@ void Tokenizer::tokenize(uint32_t file_i
 			_temp_wstring.clear();
 			tokenize_int_or_float(tracker, token_chain);
 
-		} else if (tracker.characters_left() >= 2 && tracker.peek_char() == L'.' && tracker.is_digit(1)) {
+		} else if (characters_left >= 2 && tracker.peek_char() == L'.' && tracker.is_digit(1)) {
 			// matched a floating point number
 			_temp_wstring.clear();
 			tokenize_float(tracker, token_chain, tracker.column, tracker.row);
 
-		} else if (tracker.peek_char() == L'$') {
+		} else if (characters_left >= 1 && tracker.peek_char() == L'$') {
 			// matched a hex number
 			tokenize_hex(tracker, token_chain);
 
-		} else if (tracker.characters_left() >= 2 && tracker.peek_char() == L'%' && tracker.is_digit(1)) {
+		} else if (characters_left >= 1 && tracker.peek_char() == L'%') {
 			// matched a binary number
 			tokenize_binary(tracker, token_chain);
 

          
@@ 162,11 163,11 @@ void Tokenizer::tokenize(uint32_t file_i
 			constexpr bool allow_keyword = true;
 			tokenize_symbol_or_keyword(tracker, token_chain, strings, allow_keyword);
 
-		} else if (tracker.peek_char() == L'\'') {
+		} else if (characters_left >= 1 && tracker.peek_char() == L'\'') {
 			// char literal
 			tokenize_char(tracker, token_chain);
 
-		} else if (tracker.peek_char() == L'\"') {
+		} else if (characters_left >= 1 && tracker.peek_char() == L'\"') {
 			// string literal
 			tokenize_string(tracker, token_chain, strings);
 

          
@@ 202,10 203,16 @@ std::string Tokenizer::to_front_slashes(
 
 wchar_t Tokenizer::parse_next_string_character(PositionTracker &tracker)
 {
+	if (tracker.at_end()) {
+		throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedEndOfStringOrLiteral, "Unexpected end of string or literal");
+	}
 	wchar_t value = tracker.peek_char();
 	tracker.consume();
 
 	if (value == L'\\') {
+		if (tracker.at_end()) {
+			throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedEndOfStringOrLiteral, "Unexpected end of string or literal");
+		}
 		wchar_t c = tracker.peek_char();
 		if (c != L't' && c != L'n' && c != L'r' && c != L'0' && c != L'\\' && c != L'\'' && c != L'\"')
 			// Unknown escape code. This used to be an error but this needs to be allowed because

          
@@ 240,7 247,7 @@ void Tokenizer::tokenize_int_or_float(Po
 		_temp_wstring.push_back(tracker.peek_char());
 		tracker.consume();
 	}
-	if (tracker.peek_char() == L'.' || tracker.peek_char() == L'e') {
+	if (!tracker.at_end() && (tracker.peek_char() == L'.' || tracker.peek_char() == L'e')) {
 		tokenize_float(tracker, token_chain, column, row);
 		return;
 	}

          
@@ 286,7 293,7 @@ void Tokenizer::tokenize_float(PositionT
 			tracker.consume();
 		}
 		// exponent may follow decimal digits
-		parse_exp = tracker.peek_char() == L'e';
+		parse_exp = !tracker.at_end() && tracker.peek_char() == L'e';
 	}
 
 	if (parse_exp) {

          
@@ 295,7 302,7 @@ void Tokenizer::tokenize_float(PositionT
 		tracker.consume();
 
 		// parse optional + or -
-		if (tracker.peek_char() == L'-' || tracker.peek_char() == L'+') {
+		if (!tracker.at_end() && (tracker.peek_char() == L'-' || tracker.peek_char() == L'+')) {
 			_temp_wstring.push_back(tracker.peek_char());
 			tracker.consume();
 		}

          
@@ 347,6 354,12 @@ void Tokenizer::tokenize_binary(Position
 	// consume percent character
 	tracker.consume();
 
+	if (tracker.at_end()) {
+		std::stringstream ss;
+		ss << "Unexpected end of numeric constant";
+		throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedEndOfNumeric, ss.str());
+	}
+
 	int32_t value = 0;
 	while (tracker.is_alpha_numeric()) {
 		wchar_t c = tracker.peek_char();

          
@@ 382,6 395,12 @@ void Tokenizer::tokenize_hex(PositionTra
 	// consume dollar character
 	tracker.consume();
 
+	if (tracker.at_end()) {
+		std::stringstream ss;
+		ss << "Unexpected end of numeric constant";
+		throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedEndOfNumeric, ss.str());
+	}
+
 	int32_t value = 0;
 	while (tracker.is_alpha_numeric()) {
 		wchar_t c = tracker.peek_char();

          
@@ 425,6 444,10 @@ void Tokenizer::tokenize_char(PositionTr
 
 	wchar_t value = parse_next_string_character(tracker);
 
+	if (tracker.at_end()) {
+		throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedEndOfStringOrLiteral, "Unexpected end of string or literal");
+	}
+
 	if (tracker.peek_char() != L'\'')
 		throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::TooLongCharacterConstant, "Character constant longer than one character.");
 	tracker.consume();

          
@@ 558,11 581,11 @@ void Tokenizer::parse_symbol_name_to_tem
 void Tokenizer::parse_processor(PositionTracker &tracker, TokenChain &token_chain)
 {
 	// we are past the processor keyword
-	while (tracker.peek_char() != 0 && tracker.is_space()) {
+	while (tracker.is_space()) {
 		tracker.consume();
 	}
 
-	if (tracker.peek_char() == L'\"') {
+	if (!tracker.at_end() && tracker.peek_char() == L'\"') {
 		uint32_t row = tracker.row;
 		uint32_t col = tracker.column;
 

          
@@ 632,9 655,12 @@ void Tokenizer::parse_quoted_string(Posi
 	// consume quote
 	tracker.consume();
 
-	while (tracker.peek_char() != L'\"') {
+	while (true) {
 		if (tracker.at_end() || tracker.is_newline())
 			throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::MissingClosingStringQuote, "Missing closing string quote.");
+		if (tracker.peek_char() == L'\"') {
+			break;
+		}
 
 		wchar_t c = parse_next_string_character(tracker);
 		_temp_wstring.push_back(c);

          
A => jasm/unit_tests/results/test_eof_in_character_literal.stdout +1 -0
@@ 0,0 1,1 @@ 
+unit_tests/test_eof_in_character_literal.asm(3,3) : Error 1008 : Unexpected end of string or literal

          
A => jasm/unit_tests/results/test_eof_start_of_binary_literal.stdout +1 -0
@@ 0,0 1,1 @@ 
+unit_tests/text_eof_start_of_binary_literal.asm(3,12) : Error 1011 : Unexpected end of numeric constant

          
A => jasm/unit_tests/results/test_eof_start_of_character_literal.stdout +1 -0
@@ 0,0 1,1 @@ 
+unit_tests/test_eof_start_of_character_literal.asm(3,2) : Error 1004 : Missing character in character constant

          
A => jasm/unit_tests/results/test_eof_start_of_hex_literal.stdout +1 -0
@@ 0,0 1,1 @@ 
+unit_tests/test_eof_start_of_hex_literal.asm(3,2) : Error 1011 : Unexpected end of numeric constant

          
A => jasm/unit_tests/test_eof_in_character_literal.asm +3 -0
@@ 0,0 1,3 @@ 
+// assembler command line arguments: 6502 [-v0 -hla]
+
+'a
  No newline at end of file

          
A => jasm/unit_tests/test_eof_start_of_binary_literal.asm +3 -0
@@ 0,0 1,3 @@ 
+// assembler command line arguments: 6502 [-v0 -hla]
+
+const a = %
  No newline at end of file

          
A => jasm/unit_tests/test_eof_start_of_character_literal.asm +3 -0
@@ 0,0 1,3 @@ 
+// assembler command line arguments: 6502 [-v0 -hla]
+
+'
  No newline at end of file

          
A => jasm/unit_tests/test_eof_start_of_hex_literal.asm +3 -0
@@ 0,0 1,3 @@ 
+// assembler command line arguments: 6502 [-v0 -hla]
+
+$
  No newline at end of file