162 files changed, 5201 insertions(+), 2475 deletions(-)

M core/core.vcxproj
M core/core.vcxproj.filters
M core/core/debug/timer.cpp
M core/core/debug/timer.h
M core/core/environment/log.cpp
M core/core/environment/log.h
M core/core/exceptions/exception.h
M core/core/exceptions/file_exception.h
M core/core/io/file_helpers.cpp
M core/core/io/file_helpers.h
M core/core/io/file_helpers_linux.cpp
M core/core/io/file_helpers_win.cpp
M core/core/io/file_id.h
M core/core/io/file_id_linux.cpp
M core/core/io/file_id_win.cpp
M core/core/io/file_writer.cpp
M core/core/io/file_writer.h
M core/core/io/text_reader.cpp
M core/core/io/text_reader.h
A => core/core/math/sign.h
M core/core/strings/murmur_hash.h
M core/core/strings/string_helpers.cpp
M core/core/strings/string_helpers.h
M core/core/strings/utf8.cpp
M core/core/strings/utf8.h
M hasher/main.cpp
A => jasm-6502/convert_6502_keyword_case.py
M jasm/assembling/assembler.cpp
M jasm/assembling/assembler.h
M jasm/assembling/assembler_impl/assembler_impl.cpp
M jasm/assembling/assembler_impl/assembler_impl.h
M jasm/assembling/assembler_impl/expressions_impl.cpp
M jasm/assembling/assembler_impl/functions_impl.cpp
M jasm/assembling/assembler_impl/methods_impl.cpp
M jasm/assembling/assembler_impl/operators_impl.cpp
M jasm/assembling/assembler_impl/symbols_impl.cpp
M jasm/assembling/assembler_impl/syntax_impl.cpp
M jasm/assembling/functions.cpp
M jasm/assembling/functions.h
M jasm/assembling/instructions_6502.cpp
M jasm/assembling/instructions_6502.h
A => jasm/assembling/instructions_common.h
M jasm/assembling/instructions_z80.cpp
M jasm/assembling/instructions_z80.h
M jasm/assembling/methods.cpp
M jasm/assembling/methods.h
M jasm/assembling/symbol_environment.cpp
M jasm/assembling/value.cpp
M jasm/assembling/value.h
M jasm/docs/jasm.md
M jasm/docs/syntax_highlight.py
M jasm/environment/command_line_args.cpp
M jasm/environment/command_line_args.h
M jasm/exceptions/assembly_exception.h
M jasm/exceptions/error_codes.h
M jasm/io/data_reader.cpp
M jasm/io/data_reader.h
M jasm/jasm.cbp
M jasm/jasm.vcxproj
M jasm/jasm.vcxproj.filters
M jasm/main.cpp
M jasm/parsing/keyword_finder.cpp
M jasm/parsing/keyword_finder.h
M jasm/parsing/keywords.cpp
M jasm/parsing/keywords.h
M jasm/parsing/operators.cpp
M jasm/parsing/operators.h
M jasm/parsing/processor_keywords_6502.cpp
M jasm/parsing/processor_keywords_6502.h
M jasm/parsing/processor_keywords_z80.cpp
M jasm/parsing/processor_keywords_z80.h
M jasm/parsing/syntax_parser.cpp
M jasm/parsing/syntax_parser.h
M jasm/parsing/syntax_tokens.cpp
M jasm/parsing/syntax_tokens.h
M jasm/parsing/token_print.cpp
M jasm/parsing/token_print.h
M jasm/parsing/tokenizer.cpp
M jasm/parsing/tokenizer.h
M jasm/parsing/types.cpp
M jasm/parsing/types.h
M jasm/revision_hash.h
M jasm/strings/string_conversions.cpp
M jasm/strings/string_conversions.h
M jasm/strings/string_hasher.h
A => jasm/strings/string_locale.cpp
A => jasm/strings/string_locale.h
M jasm/strings/string_repository.cpp
M jasm/strings/string_repository.h
M jasm/unit_tests/results/test_define_array_follows_references.bin
A => jasm/unit_tests/results/test_function_lowercase_default.bin
A => jasm/unit_tests/results/test_function_lowercase_english.bin
A => jasm/unit_tests/results/test_function_lowercase_invalid_type.stdout
A => jasm/unit_tests/results/test_function_lowercase_swedish.bin
A => jasm/unit_tests/results/test_function_uppercase_default.bin
A => jasm/unit_tests/results/test_function_uppercase_english.bin
A => jasm/unit_tests/results/test_function_uppercase_invalid_type.stdout
A => jasm/unit_tests/results/test_function_uppercase_swedish.bin
A => jasm/unit_tests/results/test_instruction_data_label_has_lo_hi_properties_6502.bin
A => jasm/unit_tests/results/test_instruction_data_label_has_lo_hi_properties_z80.bin
A => jasm/unit_tests/results/test_instruction_data_label_sizes_z80.bin
A => jasm/unit_tests/results/test_lowercase_too_many_arguments.stdout
M jasm/unit_tests/results/test_map_range_for_with_local_loop_variables.bin
A => jasm/unit_tests/results/test_pseudo_instructions_16_bit_register_load_z80.bin
A => jasm/unit_tests/results/test_pseudo_instructions_for_branching_6502.bin
A => jasm/unit_tests/results/test_pseudo_instructions_in_standard_mode_6502.stdout
A => jasm/unit_tests/results/test_pseudo_instructions_in_standard_mode_z80.stdout
A => jasm/unit_tests/results/test_pseudo_instructions_use_names_in_standard_mode_6502.bin
A => jasm/unit_tests/results/test_section_child_exceeds_its_size.stdout
A => jasm/unit_tests/results/test_subroutine_call_6502.bin
A => jasm/unit_tests/results/test_subroutine_call_must_be_in_code_section_6502.stdout
A => jasm/unit_tests/results/test_subroutine_call_must_be_in_code_section_z80.stdout
A => jasm/unit_tests/results/test_subroutine_call_negative_argument_6502.stdout
A => jasm/unit_tests/results/test_subroutine_call_recursive_data_generation_6502.stdout
A => jasm/unit_tests/results/test_subroutine_call_recursive_data_generation_z80.stdout
A => jasm/unit_tests/results/test_subroutine_call_too_large_argument_6502.stdout
A => jasm/unit_tests/results/test_subroutine_call_too_large_argument_z80.stdout
A => jasm/unit_tests/results/test_subroutine_call_with_arguments_6502.stdout
A => jasm/unit_tests/results/test_subroutine_call_with_arguments_z80.stdout
A => jasm/unit_tests/results/test_subroutine_call_z80.bin
A => jasm/unit_tests/results/test_uppercase_too_many_arguments.stdout
A => jasm/unit_tests/test_function_logn.asm
A => jasm/unit_tests/test_function_lowercase_default.asm
A => jasm/unit_tests/test_function_lowercase_english.asm
A => jasm/unit_tests/test_function_lowercase_invalid_type.asm
A => jasm/unit_tests/test_function_lowercase_swedish.asm
A => jasm/unit_tests/test_function_uppercase_default.asm
A => jasm/unit_tests/test_function_uppercase_english.asm
A => jasm/unit_tests/test_function_uppercase_invalid_type.asm
A => jasm/unit_tests/test_function_uppercase_swedish.asm
A => jasm/unit_tests/test_instruction_data_label_has_lo_hi_properties_6502.asm
A => jasm/unit_tests/test_instruction_data_label_has_lo_hi_properties_z80.asm
A => jasm/unit_tests/test_instruction_data_label_offsets_6502.asm
A => jasm/unit_tests/test_instruction_data_label_sizes_6502.asm
A => jasm/unit_tests/test_instruction_data_label_sizes_z80.asm
A => jasm/unit_tests/test_lowercase_too_many_arguments.asm
A => jasm/unit_tests/test_offset_word_has_lo_hi_property.asm
A => jasm/unit_tests/test_pseudo_instructions_16_bit_register_load_z80.asm
A => jasm/unit_tests/test_pseudo_instructions_for_branching_6502.asm
A => jasm/unit_tests/test_pseudo_instructions_in_standard_mode_6502.asm
A => jasm/unit_tests/test_pseudo_instructions_in_standard_mode_z80.asm
A => jasm/unit_tests/test_pseudo_instructions_use_names_in_standard_mode_6502.asm
A => jasm/unit_tests/test_section_child_exceeds_its_size.asm
A => jasm/unit_tests/test_subroutine_call_6502.asm
A => jasm/unit_tests/test_subroutine_call_must_be_in_code_section_6502.asm
A => jasm/unit_tests/test_subroutine_call_must_be_in_code_section_z80.asm
A => jasm/unit_tests/test_subroutine_call_negative_argument_6502.asm
A => jasm/unit_tests/test_subroutine_call_recursive_data_generation_6502.asm
A => jasm/unit_tests/test_subroutine_call_recursive_data_generation_z80.asm
A => jasm/unit_tests/test_subroutine_call_too_large_argument_6502.asm
A => jasm/unit_tests/test_subroutine_call_too_large_argument_z80.asm
A => jasm/unit_tests/test_subroutine_call_with_arguments_6502.asm
A => jasm/unit_tests/test_subroutine_call_with_arguments_z80.asm
A => jasm/unit_tests/test_subroutine_call_z80.asm
A => jasm/unit_tests/test_uppercase_too_many_arguments.asm
M jasm/version.h
M jasm/version.py
M jasm/website/site/docs/index.html
M jasm/website/site/index.html
M release.py
M sublime/m6502/jAsm.sublime-syntax
M sublime/z80/jAsm.sublime-syntax
M core/core.vcxproj +1 -0
@@ 221,6 221,7 @@ 
     <ClInclude Include="core\io\file_writer.h" />
     <ClInclude Include="core\io\text_reader.h" />
     <ClInclude Include="core\math\algorithm.h" />
+    <ClInclude Include="core\math\sign.h" />
     <ClInclude Include="core\ownership\destruct_call.h" />
     <ClInclude Include="core\strings\murmur_hash.h" />
     <ClInclude Include="core\strings\string_helpers.h" />

          
M core/core.vcxproj.filters +3 -0
@@ 73,6 73,9 @@ 
     <ClInclude Include="core\math\algorithm.h">
       <Filter>math</Filter>
     </ClInclude>
+    <ClInclude Include="core\math\sign.h">
+      <Filter>math</Filter>
+    </ClInclude>
     <ClInclude Include="core\ownership\destruct_call.h">
       <Filter>ownership</Filter>
     </ClInclude>

          
M core/core/debug/timer.cpp +2 -2
@@ 5,7 5,7 @@ 
 
 namespace core {
 
-TimerScope::TimerScope(const wchar_t *name)
+TimerScope::TimerScope(const char *name)
 	: _name(name)
 {
 	_start_time = std::chrono::system_clock::now();

          
@@ 16,7 16,7 @@ TimerScope::~TimerScope()
 	auto now = std::chrono::system_clock::now();
 	auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(now - _start_time);
 
-	debug() << _name << L" took " << duration.count() / 1000.0 << L'\n';
+	debug() << _name << " took " << duration.count() / 1000.0 << '\n';
 }
 
 } // namespace core

          
M core/core/debug/timer.h +2 -2
@@ 12,11 12,11 @@ class TimerScope
 {
 public:
 	/// @param name A pointer to a string to display at the end of the scope. The string will be copied.
-	explicit TimerScope(const wchar_t *name);
+	explicit TimerScope(const char *name);
 	~TimerScope();
 
 private:
-	std::wstring _name;
+	std::string _name;
 	std::chrono::time_point<std::chrono::system_clock> _start_time;
 };
 

          
M core/core/environment/log.cpp +10 -10
@@ 8,12 8,12 @@ namespace core
 namespace
 {
 	ErrorLevel __error_level = ErrorLevel::Errors;
-	std::wostream *__null_ostream = nullptr; ///< This is a stream that will get a bad state and not output anything.
+	std::ostream *__null_ostream = nullptr; ///< This is a stream that will get a bad state and not output anything.
 }
 
 LogScope::LogScope()
 {
-	__null_ostream = new std::wostream(nullptr);
+	__null_ostream = new std::ostream(nullptr);
 }
 
 LogScope::~LogScope()

          
@@ 27,30 27,30 @@ void set_log_level(ErrorLevel level)
 	__error_level = level;
 }
 
-std::wostream &error()
+std::ostream &error()
 {
-	return std::wcout;
+	return std::cout;
 }
 
-std::wostream &warning()
+std::ostream &warning()
 {
 	if (__error_level < ErrorLevel::Warnings)
 		return *__null_ostream;
-	return std::wcout;
+	return std::cout;
 }
 
-std::wostream &info()
+std::ostream &info()
 {
 	if (__error_level < ErrorLevel::Info)
 		return *__null_ostream;
-	return std::wcout;
+	return std::cout;
 }
 
-std::wostream &debug()
+std::ostream &debug()
 {
 	if (__error_level < ErrorLevel::Debug)
 		return *__null_ostream;
-	return std::wcout;
+	return std::cout;
 }
 
 } // namespace core

          
M core/core/environment/log.h +4 -4
@@ 19,10 19,10 @@ enum class ErrorLevel
 /// Logging with this level and lower will be printed.
 void set_log_level(ErrorLevel level);
 
-std::wostream &error();
-std::wostream &warning();
-std::wostream &info();
-std::wostream &debug();
+std::ostream &error();
+std::ostream &warning();
+std::ostream &info();
+std::ostream &debug();
 
 /// Let this live as long as printouts can happen.
 class LogScope

          
M core/core/exceptions/exception.h +2 -2
@@ 11,8 11,8 @@ namespace core
 /// It is more tricky since the std::runtime_error isn't wide string compatible so that can't be the base class.
 struct Exception
 {
-	Exception(const std::wstring &msg) : message(msg) {}
-	std::wstring message;
+	Exception(const std::string &msg) : message(msg) {}
+	std::string message;
 };
 
 /// @}

          
M core/core/exceptions/file_exception.h +1 -1
@@ 11,7 11,7 @@ namespace core
 /// Exception object for file errors.
 struct FileException : public Exception
 {
-	FileException(const std::wstring &msg) : Exception(msg) {}
+	FileException(const std::string &msg) : Exception(msg) {}
 };
 
 /// @}

          
M core/core/io/file_helpers.cpp +14 -14
@@ 6,17 6,17 @@ 
 namespace core
 {
 
-bool match_include_dir_and_file(const std::wstring &file, const std::vector<std::wstring> &include_dirs, std::wstring &result)
+bool match_include_dir_and_file(const std::string &file, const std::vector<std::string> &include_dirs, std::string &result)
 {
-	std::vector<wchar_t> temp_string;
+	std::vector<char> temp_string;
 	for (auto &dir : include_dirs) {
 		temp_string.clear();
 		// add dir
 		temp_string.insert(temp_string.end(), dir.begin(), dir.end());
 
 		// add slash if not already there
-		if (temp_string.empty() || (temp_string.back() != L'/' && temp_string.back() != L'\\'))
-			temp_string.push_back(L'/');
+		if (temp_string.empty() || (temp_string.back() != '/' && temp_string.back() != '\\'))
+			temp_string.push_back('/');
 
 		// add file
 		temp_string.insert(temp_string.end(), file.begin(), file.end());

          
@@ 25,32 25,32 @@ bool match_include_dir_and_file(const st
 		temp_string.push_back(0);
 
 		// check if file exists
-		if (file_exists(&temp_string[0])) {
-			result = &temp_string[0];
+		if (file_exists(temp_string.data())) {
+			result = temp_string.data();
 			return true;
 		}
 	}
 	return false;
 }
 
-std::wstring base_name(const std::wstring &filename)
+std::string base_name(const std::string &filename)
 {
-	size_t pos = filename.rfind(L".");
+	size_t pos = filename.rfind(".");
 	// early out if no punctual character was found
-	if (pos == std::wstring::npos)
+	if (pos == std::string::npos)
 		return filename;
 
 	return filename.substr(0, pos);
 }
 
-std::wstring file_extension(const std::wstring &filename)
+std::string file_extension(const std::string &filename)
 {
-	size_t pos = filename.rfind(L".");
+	size_t pos = filename.rfind(".");
 	// early out if no punctual character was found
-	if (pos == std::wstring::npos)
-		return std::wstring();
+	if (pos == std::string::npos)
+		return std::string();
 
-	return filename.substr(pos, std::wstring::npos);
+	return filename.substr(pos, std::string::npos);
 }
 
 } // namespace core

          
M core/core/io/file_helpers.h +4 -4
@@ 8,17 8,17 @@ namespace core
 
 /// Determines if there is an include directory that matches a file part.
 /// @ return True if a file part matches an include directory. @a path is updated in that case.
-bool match_include_dir_and_file(const std::wstring &file, const std::vector<std::wstring> &include_dirs, std::wstring &result);
+bool match_include_dir_and_file(const std::string &file, const std::vector<std::string> &include_dirs, std::string &result);
 
 /// Check if a file exists.
 /// @return True if the file exists.
-bool file_exists(const wchar_t *file);
+bool file_exists(const char *file);
 
 /// Returns the filename without extension.
-std::wstring base_name(const std::wstring &filename);
+std::string base_name(const std::string &filename);
 
 /// Returns the file extension including the punctual character.
-std::wstring file_extension(const std::wstring &filename);
+std::string file_extension(const std::string &filename);
 
 /// @}
 

          
M core/core/io/file_helpers_linux.cpp +2 -3
@@ 3,7 3,6 @@ 
 #if defined(__linux) || defined(__APPLE__)
 
 #include <core/io/file_helpers.h>
-#include <core/strings/utf8.h>
 #include <cstring>
 #include <sys/stat.h>
 #include <sys/types.h>

          
@@ 12,11 11,11 @@ 
 namespace core
 {
 
-bool file_exists(const wchar_t *file)
+bool file_exists(const char *file)
 {
 	struct stat s;
 	memset(&s, 0, sizeof(s));
-	int success = stat(convert_wide_to_utf8(file).c_str(), &s);
+	int success = stat(file, &s);
 	if (success != 0)
 		return false;
 

          
M core/core/io/file_helpers_win.cpp +4 -2
@@ 3,13 3,15 @@ 
 #if defined(_WIN32)
 
 #include <core/io/file_helpers.h>
+#include <core/strings/utf8.h>
 
 namespace core
 {
 
-bool file_exists(const wchar_t *file)
+bool file_exists(const char *file)
 {
-	DWORD attributes = GetFileAttributesW(file);
+	std::wstring wide_file = utf8_to_wide(file);
+	DWORD attributes = GetFileAttributesW(wide_file.c_str());
 	return (attributes != INVALID_FILE_ATTRIBUTES && !(attributes & FILE_ATTRIBUTE_DIRECTORY));
 }
 

          
M core/core/io/file_id.h +1 -1
@@ 17,7 17,7 @@ namespace core
 /// Get the file id from a path. This can be used to determine if
 /// two files are the exact same.
 /// @return False if the file can't be opened.
-bool file_id(const std::wstring &file, FileId &id);
+bool file_id(const std::string &file, FileId &id);
 
 /// @}
 

          
M core/core/io/file_id_linux.cpp +2 -3
@@ 3,7 3,6 @@ 
 #if defined(__linux) || defined(__APPLE__)
 
 #include <core/io/file_id.h>
-#include <core/strings/utf8.h>
 #include <cstring>
 #include <sys/stat.h>
 #include <unistd.h>

          
@@ 11,11 10,11 @@ 
 namespace core
 {
 
-bool file_id(const std::wstring &file, FileId &id)
+bool file_id(const std::string &file, FileId &id)
 {
 	struct stat s;
 	memset(&s, 0, sizeof(s));
-	int success = stat(convert_wide_to_utf8(file).c_str(), &s);
+	int success = stat(file.c_str(), &s);
 	if (success != 0)
 		return false;
 

          
M core/core/io/file_id_win.cpp +3 -2
@@ 3,13 3,14 @@ 
 #if defined(_WIN32)
 
 #include <core/io/file_id.h>
+#include <core/strings/utf8.h>
 
 namespace core
 {
 
-bool file_id(const std::wstring &file, FileId &id)
+bool file_id(const std::string &file, FileId &id)
 {
-	HANDLE h = CreateFileW(file.c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+	HANDLE h = CreateFileW(utf8_to_wide(file).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
 	if (h == INVALID_HANDLE_VALUE)
 		return false;
 	BY_HANDLE_FILE_INFORMATION hfi;

          
M core/core/io/file_writer.cpp +12 -11
@@ 2,27 2,28 @@ 
 
 #include <core/exceptions/file_exception.h>
 #include <core/io/file_writer.h>
+#include <core/strings/utf8.h>
 
 namespace core {
 
-void FileWriter::open(const std::wstring &filename)
+void FileWriter::open(const std::string &filename)
 {
 	#if defined(_MSC_VER)
-		_file.open(filename, std::ios::out | std::ios::trunc | std::ios::binary);
+		std::wstring wide_filename;
+		try {
+			wide_filename = convert_utf8_to_wide(filename);
+		} catch (Exception &e) {
+			throw FileException("Path cannot be converted to wide byte format: " + filename);
+		}
+		_file.open(wide_filename, std::ios::out | std::ios::trunc | std::ios::binary);
 	#elif defined(__GNUC__)
-		char name_buffer[1024];
-		size_t result = wcstombs(name_buffer, filename.c_str(), 1024);
-		if (result == sizeof(name_buffer))
-			throw FileException(L"Too long path: " + filename);
-		if (result == static_cast<size_t>(-1))
-			throw FileException(L"Path cannot be converted to utf8: " + filename);
-		_file.open(name_buffer, std::ios::out | std::ios::trunc | std::ios::binary);
+		_file.open(filename, std::ios::out | std::ios::trunc | std::ios::binary);
 	#else
 		#error "Platform not supported"
 	#endif
 
 	if (!_file.is_open())
-		throw FileException(L"Failed to open " + filename);
+		throw FileException("Failed to open " + filename);
 }
 
 void FileWriter::write(const uint8_t *data, uint32_t size)

          
@@ 32,7 33,7 @@ void FileWriter::write(const uint8_t *da
 
 	_file.write(reinterpret_cast<const char *>(data), size);
 	if (_file.fail())
-		throw FileException(L"Error when writing file");
+		throw FileException("Error when writing file");
 }
 
 } // namespace core

          
M core/core/io/file_writer.h +1 -1
@@ 11,7 11,7 @@ namespace core {
 class FileWriter
 {
 public:
-	void open(const std::wstring &filename);
+	void open(const std::string &filename);
 	void write(const uint8_t *data, uint32_t size);
 
 private:

          
M core/core/io/text_reader.cpp +10 -16
@@ 10,26 10,26 @@ 
 namespace core
 {
 
-std::string load_char_file(const std::wstring &filename)
+std::string load_file(const std::string &filename)
 {
 	// load the file contents
 	std::ifstream file;
 	// open file and place read offset at end to measure size
 	#if defined(_MSC_VER)
-		file.open(filename, std::ios::in | std::ios::ate | std::ios::binary);
+		std::wstring wide_filename;
+		try {
+			wide_filename = convert_utf8_to_wide(filename);
+		} catch (Exception &e) {
+			throw FileException("Path cannot be converted to wide byte format: " + filename);
+		}
+		file.open(wide_filename, std::ios::in | std::ios::ate | std::ios::binary);
 	#elif defined(__GNUC__)
-		char name_buffer[1024];
-		size_t result = wcstombs(name_buffer, filename.c_str(), 1024);
-		if (result == sizeof(name_buffer))
-			throw FileException(L"Too long path: " + filename);
-		if (result == static_cast<size_t>(-1))
-			throw FileException(L"Path cannot be converted to utf8: " + filename);
-		file.open(name_buffer, std::ios::in | std::ios::ate | std::ios::binary);
+		file.open(filename, std::ios::in | std::ios::ate | std::ios::binary);
 	#else
 		#error "Platform not supported"
 	#endif
 	if (!file.is_open())
-		throw FileException(L"Failed to open " + filename);
+		throw FileException("Failed to open " + filename);
 
 	// get file size
 	uint64_t size = static_cast<uint64_t>(file.tellg());

          
@@ 44,10 44,4 @@ std::string load_char_file(const std::ws
 	return std::string(data.get());
 }
 
-std::wstring load_file(const std::wstring &filename)
-{
-	auto utf8_data = load_char_file(filename);
-	return convert_utf8_to_wide(utf8_data);
-}
-
 } // namespace core

          
M core/core/io/text_reader.h +2 -2
@@ 6,8 6,8 @@ namespace core
 /// @addtogroup io
 /// @{
 
-/// Read an utf8 encoded file and return the contents as a wide character array.
-std::wstring load_file(const std::wstring &filename);
+/// Read an utf8 encoded file and return the contents as an utf8 encoded character array.
+std::string load_file(const std::string &filename);
 
 /// @}
 

          
A => core/core/math/sign.h +34 -0
@@ 0,0 1,34 @@ 
+#pragma once
+
+#include <assert.h>
+#include <limits>
+#include <type_traits>
+
+namespace core
+{
+
+template<typename T>
+typename std::make_signed<T>::type sign_cast(T t)
+{
+	using to_type = typename std::make_signed<T>::type;
+	static_assert(!std::is_same<T, to_type>::value, "cast is ineffective and can be removed");
+
+	// range check
+	assert(t <= std::numeric_limits<to_type>::max());
+
+	return static_cast<to_type>(t);
+}
+
+template<typename T>
+typename std::make_unsigned<T>::type unsign_cast(T t)
+{
+	using to_type = typename std::make_unsigned<T>::type;
+	static_assert(!std::is_same<T, to_type>::value, "cast is ineffective and can be removed");
+
+	// range check
+	assert(t >= 0);
+
+	return static_cast<to_type>(t);
+}
+
+}

          
M core/core/strings/murmur_hash.h +9 -31
@@ 1,5 1,6 @@ 
 #pragma once
 
+#include <cstring>
 #include <string>
 
 namespace core

          
@@ 11,53 12,30 @@ namespace core
 uint64_t murmur_hash3_x64_64(const void *key, const int len, const uint64_t seed = 0);
 
 /// Hash a string with optional seed.
-/// @param len Length of string in chars.
-inline uint64_t murmur_hash3_string_x64_64(const wchar_t *str, const int len, const uint64_t seed = 0)
+inline uint64_t murmur_hash3_string_x64_64(const std::string &str, const uint64_t seed = 0)
 {
-	size_t len_size = static_cast<size_t>(len);
-	static_assert(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4, "unsupported char size");
-
-	if (sizeof(wchar_t) == 2) {
-		return murmur_hash3_x64_64(str, static_cast<int>(len_size * sizeof(wchar_t)), seed);
-	} else if (sizeof(wchar_t) == 4) {
-		// To make hashes the same between 4 byte char compilers and 2 byte char compilers,
-		// a two byte variant of the string is constructed on the fly. Not perfect for strings
-		// with lots of strange characters but enough.
-
-		std::vector<uint16_t> wide_str;
-		wide_str.resize(len_size + 1); // add one to be sure to be able to get a pointer
-		for(size_t i = 0; i < len_size; ++i)
-			wide_str[i] = static_cast<uint16_t>(str[i]);
-
-		return murmur_hash3_x64_64(&wide_str[0], static_cast<int>(len_size * sizeof(uint16_t)), seed);
-	}
+	return murmur_hash3_x64_64(str.data(), static_cast<int>(str.size()), seed);
 }
 
 /// Hash a string with optional seed.
-inline uint64_t murmur_hash3_string_x64_64(const std::wstring &str, const uint64_t seed = 0)
+inline uint64_t murmur_hash3_string_x64_64(const std::string_view &str, const uint64_t seed = 0)
 {
-	return murmur_hash3_string_x64_64(str.c_str(), static_cast<int>(str.size()), seed);
-}
-
-/// Hash a string with optional seed.
-inline uint64_t murmur_hash3_string_x64_64(const std::wstring_view &str, const uint64_t seed = 0)
-{
-	return murmur_hash3_string_x64_64(str.data(), static_cast<int>(str.size()), seed);
+	return murmur_hash3_x64_64(str.data(), static_cast<int>(str.size()), seed);
 }
 
 /// Hash a static string without specifying its size.
 template<int N>
-uint64_t murmur_hash3_string_x64_64(const wchar_t (&str)[N], const uint64_t seed = 0)
+uint64_t murmur_hash3_string_x64_64(const char (&str)[N], const uint64_t seed = 0)
 {
-	return murmur_hash3_string_x64_64(str, N, seed);
+	return murmur_hash3_x64_64(str, N, seed);
 }
 
 /// Return the hash constant that was sent in.
 /// In a debug build, the hash constant is verified against the string.
-inline uint64_t hash_constant(uint64_t hash, const wchar_t *str)
+inline uint64_t hash_constant(uint64_t hash, const char *str)
 {
 	MARK_USE(str);
-	assert(murmur_hash3_string_x64_64(str, static_cast<int>(wcslen(str))) == hash);
+	assert(murmur_hash3_x64_64(str, static_cast<int>(std::strlen(str))) == hash);
 	return hash;
 }
 

          
M core/core/strings/string_helpers.cpp +3 -3
@@ 6,7 6,7 @@ 
 namespace core
 {
 
-std::wstring to_hex_string(uint32_t value)
+std::string to_hex_string(uint32_t value)
 {
 	// select length of string
 	int length = 2;

          
@@ 16,8 16,8 @@ std::wstring to_hex_string(uint32_t valu
 	length += 4;
 
 	// convert number to hex string
-	std::wstringstream ss;
-	ss << std::hex << std::setw(length) << std::setfill(L'0') << value;
+	std::stringstream ss;
+	ss << std::hex << std::setw(length) << std::setfill('0') << value;
 	return ss.str();
 }
 

          
M core/core/strings/string_helpers.h +1 -1
@@ 6,7 6,7 @@ namespace core
 /// @addtogroup strings
 /// @{
 
-std::wstring to_hex_string(uint32_t value);
+std::string to_hex_string(uint32_t value);
 
 /// @}
 

          
M core/core/strings/utf8.cpp +427 -65
@@ 2,89 2,451 @@ 
 
 #include <core/exceptions/exception.h>
 #include <core/strings/utf8.h>
-#if defined(_MSC_VER)
-	#include <codecvt>
-#endif
 #include <cstring>
 #include <locale>
+#include <sstream>
 
 namespace core
 {
 
-std::string convert_wide_to_utf8(const std::wstring &wide)
+std::string wide_to_utf8(std::wstring_view wide)
+{
+	std::stringstream ss;
+	for(wchar_t c : wide) {
+		if (!wide_to_utf8(c, ss)) {
+			throw Exception("Wide to UTF8 string conversion failed.");
+		}
+	}
+	return ss.str();
+}
+
+std::wstring utf8_to_wide(std::string_view utf8)
 {
-	// early out for empty string because it isn't supported by the conversion code
-	if (wide.size() == 0)
-		return std::string();
+	std::wstringstream ss;
+	const char *source = utf8.data();
+	size_t source_size = utf8.size();
+	wchar_t target;
+	while(source_size != 0)  {
+		if (!utf8_to_wide(source, source_size, target)) {
+			throw Exception("UTF8 to wide string conversion failed.");
+		}
+		ss.put(target);
+	}
+	return ss.str();
+}
 
-	#if defined(_MSC_VER)
-		// this may be faster than the gcc version - test and keep/delete
-		// setup converter
-		try {
-			using convert_type = std::codecvt_utf8<wchar_t>;
-			std::wstring_convert<convert_type, wchar_t> converter;
-			std::string converted_str = converter.to_bytes(wide.c_str(), wide.c_str() + wide.size());
-			return converted_str;
-		} catch (std::range_error &) {
-			throw Exception(L"Wide to utf8 string conversion failed.");
+/*
+std::u16string wide_to_utf16le(std::u32string_view wide)
+{
+	u16stringstream ss;
+	for(char32_t c : wide) {
+		if (!wide_to_utf16le(c, ss)) {
+			throw Exception("UTF16 to UTF8 string conversion failed.");
+		}
+	}
+	return ss.str();
+}
+
+std::u32string utf16le_to_wide(std::u16string_view utf16)
+{
+	u32stringstream ss;
+	const char16_t *source = utf16.data();
+	size_t source_size = utf16.size();
+	char32_t c;
+	while(source_size != 0) {
+		if (!utf16le_to_wide(source, source_size, c)) {
+			throw Exception("UTF16 to wide string conversion failed.");
 		}
-	#elif defined(__GNUC__)
+		ss.put(c);
+	}
+	return ss.str();
+}
+
+std::string utf16le_to_utf8(std::u16string_view utf16)
+{
+	std::stringstream ss;
+	const char16_t *source = utf16.data();
+	size_t source_size = utf16.size();
+	char32_t c;
+	while(source_size != 0) {
+		if (!utf16le_to_wide(source, source_size, c)) {
+			throw Exception("UTF16 to UTF8 string conversion failed.");
+		}
+		if (!wide_to_utf8(c, ss)) {
+			throw Exception("UTF16 to UTF8 string conversion failed.");
+		}
+	}
+	return ss.str();
+}
 
-		// determine size of converted string in narrow characters
-		std::mbstate_t state;
-		memset(&state, 0, sizeof(state)); // there seems to be no other way without warnings
-		const wchar_t *char_ptr = wide.c_str();
-		size_t narrow_chars = std::wcsrtombs(nullptr, &char_ptr, 0, &state);
-		if (narrow_chars == static_cast<size_t>(-1))
-			throw Exception(L"Wide to utf8 string conversion failed.");
+std::u16string utf8_to_utf16le(std::string_view utf8)
+{
+	u16stringstream ss;
+	const char *source = utf8.data();
+	size_t source_size = utf8.size();
+	char32_t target;
+	while(source_size != 0)  {
+		if (!utf8_to_wide(source, source_size, target)) {
+			throw Exception("UTF8 to UTF16 string conversion failed.");
+		}
+		if (!wide_to_utf16le(target, ss)) {
+			throw Exception("UTF8 to UTF16 string conversion failed.");
+		}
+	}
+	return ss.str();
+}
+*/
+
+std::wstring utf8_to_wstring(std::string_view utf8)
+{
+	std::wstringstream ss;
+	const char *source = utf8.data();
+	size_t source_size = utf8.size();
+	wchar_t wide;
+	while(source_size != 0)  {
+		if (!utf8_to_wide(source, source_size, wide)) {
+			throw Exception("UTF8 to wide string conversion failed.");
+		}
+		ss.put(wide);
+	}
+	return ss.str();
+}
+
+std::string wstring_to_utf8(std::wstring_view wide)
+{
+	std::stringstream ss;
+	for(wchar_t c : wide)  {
+		if (!wide_to_utf8(c, ss)) {
+			throw Exception("UTF8 to wide string conversion failed.");
+		}
+	}
+	return ss.str();
+}
+
 
-		// do the conversion
-		std::string converted_str(narrow_chars + 1, L'\0');
-		char_ptr = wide.c_str();
-		std::wcsrtombs(&converted_str[0], &char_ptr, converted_str.size(), &state);
-		converted_str.pop_back();
+uint8_t utf8_size(const char *ptr)
+{
+	uint8_t c = static_cast<uint8_t>(*ptr);
+	if ((c & 0b1000'0000) == 0) {
+		return 1;
+	} else if ((c & 0b1110'0000) == 0b1100'0000) {
+		return 2;
+	} else if ((c & 0b1111'0000) == 0b1110'0000) {
+		return 3;
+	} else if ((c & 0b1111'1000) == 0b1111'0000) {
+		return 4;
+	} else {
+		return 0;
+	}
+}
 
-		return converted_str;
-	#else
-		#error "Compiler not supported"
-	#endif
+bool step_utf8(const char *&ptr, size_t &size)
+{
+	uint8_t char_size = utf8_size(ptr);
+	if (char_size == 0) {
+		return false;
+	}
+	if (char_size > size) {
+		return false;
+	}
+	for(uint8_t i = 1; i < char_size; ++i) {
+		if ((ptr[i] & 0b1100'0000) != 0b1000'0000) {
+			return false;
+		}
+	}
+	ptr += char_size;
+	size -= char_size;
+	return true;
+}
+
+bool is_utf8(const std::string &utf8)
+{
+	const char *ptr = utf8.data();
+	size_t size = utf8.size();
+	while(size != 0) {
+		if (!step_utf8(ptr, size)) {
+			return false;
+		}
+	}
+	return true;
 }
 
-std::wstring convert_utf8_to_wide(const std::string &utf8)
+bool utf8_to_wide(const char *&source, size_t &source_size, wchar_t &target)
 {
-	// early out for empty string because it isn't supported by the conversion code
-	if (utf8.size() == 0)
-		return std::wstring();
+	if (source_size == 0) {
+		return false;
+	}
+	uint8_t c = static_cast<uint8_t>(*source);
+	uint32_t wc;
+	if ((c & 0b1000'0000) == 0) {
+		wc = c & 0b0111'1111;
+		target = static_cast<wchar_t>(wc);
+		source += 1;
+		source_size -= 1;
+		return true;
+		
+	} else if ((c & 0b1110'0000) == 0b1100'0000) {
+		if (source_size < 2) {
+			return false;
+		}
+		uint8_t c2 = static_cast<uint8_t>(source[1]);
+		if ((c2 & 0b1100'0000) != 0b1000'0000) {
+			return false;
+		}
+		wc = ((c & 0b0001'1111U) << 6) | (c2 & 0b0011'1111U);
+		target = static_cast<wchar_t>(wc);
+		source += 2;
+		source_size -= 2;
+		return true;
+		
+	} else if ((c & 0b1111'0000) == 0b1110'0000) {
+		if (source_size < 3) {
+			return false;
+		}
+		uint8_t c2 = static_cast<uint8_t>(source[1]);
+		uint8_t c3 = static_cast<uint8_t>(source[2]);
+		if ((c2 & 0b1100'0000) != 0b1000'0000) {
+			return false;
+		}
+		if ((c3 & 0b1100'0000) != 0b1000'0000) {
+			return false;
+		}
+		wc = ((c & 0b0000'1111U) << 12) | ((c2 & 0b0011'1111U) << 6) | (c3 & 0b0011'1111U);
+		target = static_cast<wchar_t>(wc);
+		source += 3;
+		source_size -= 3;
+		return true;
+		
+	} else if ((c & 0b1111'1000) == 0b1111'0000) {
+		if (source_size < 4) {
+			return false;
+		}
+		uint8_t c2 = static_cast<uint8_t>(source[1]);
+		uint8_t c3 = static_cast<uint8_t>(source[2]);
+		uint8_t c4 = static_cast<uint8_t>(source[3]);
+		if ((c2 & 0b1100'0000) != 0b1000'0000) {
+			return false;
+		}
+		if ((c3 & 0b1100'0000) != 0b1000'0000) {
+			return false;
+		}
+		if ((c4 & 0b1100'0000) != 0b1000'0000) {
+			return false;
+		}
+		wc = ((c & 0b0000'0111U) << 18) | ((c2 & 0b0011'1111U) << 12) | ((c3 & 0b0011'1111U) << 6) | (c4 & 0b0011'1111U);
+		target = static_cast<wchar_t>(wc);
+		source += 4;
+		source_size -= 4;
+		return true;
+		
+	} else {
+		return false;
+	}
+}
 
-	#if defined(_MSC_VER)
-		// this may be faster than the gcc version - test and keep/delete
-		// setup converter
-		try {
-			using convert_type = std::codecvt_utf8<wchar_t>;
-			std::wstring_convert<convert_type, wchar_t> converter;
-			std::wstring converted_str = converter.from_bytes(utf8.c_str(), utf8.c_str() + utf8.size());
-			return converted_str;
-		} catch (std::range_error &) {
-			throw Exception(L"Utf8 to wide string conversion failed. ");
+/*
+bool utf16le_to_wide(const char16_t *&source, size_t &source_size, char32_t &target)
+{
+	if (source_size < 1) {
+		return false;
+	}
+	
+	uint16_t c1 = static_cast<uint16_t>(source[0]);
+	if ((c1 & 0b1111'1100'0000'0000) == 0b1101'1000'0000'0000) {
+		// start of multiword
+		if (source_size < 2) {
+			return false;
+		}
+		uint16_t c2 = static_cast<uint16_t>(source[1]);
+		if ((c2 & 0b1111'1100'0000'0000) != 0b1101'1100'0000'0000) {
+			return false;
 		}
-	#elif defined(__GNUC__)
-		// determine size of converted string in wide characters
-		std::mbstate_t state;
-		memset(&state, 0, sizeof(state)); // there seems to be no other way without warnings
-		const char *char_ptr = utf8.c_str();
-		size_t wide_chars = std::mbsrtowcs(nullptr, &char_ptr, 0, &state);
-		if (wide_chars == static_cast<size_t>(-1))
-			throw Exception(L"Utf8 to wide string conversion failed.");
+		
+		target = 0x10000U + (((c2 & 0b0000'0011'1111'1111U) << 10) | (c1 & 0b0000'0011'1111'1111U));
+		source += 2;
+		source_size -= 2;
+		return true;
+		
+	} else if ((c1 & 0b1111'1100'0000'0000) == 0b1101'1100'0000'0000) {
+		// end of multiword
+		return false;
+
+	} else {
+		// single word
+		target = c1;
+		source += 1;
+		source_size -= 1;
+		return true;
+	}
+}
+*/
+
+bool wide_to_utf8(wchar_t source, std::stringstream &target)
+{
+	uint32_t usource = static_cast<uint32_t>(source);
+	if (usource < (1 << 7)) {
+		target.put(static_cast<char>(usource));
+		return true;
+	} else if (usource < (1 << 11)) {
+		target.put(static_cast<char>(0b1100'0000 | ((usource >> 6) & 0b0001'1111)));
+		target.put(static_cast<char>(0b1000'0000 | (usource & 0b0011'1111)));
+		return true;
+	} else if (usource < (1 << 16)) {
+		target.put(static_cast<char>(0b1110'0000 | ((usource >> 12) & 0b0000'1111)));
+		target.put(static_cast<char>(0b1000'0000 | ((usource >> 6) & 0b0011'1111)));
+		target.put(static_cast<char>(0b1000'0000 | (usource & 0b0011'1111)));
+		return true;
+	} else if (usource < (1 << 21)) {
+		target.put(static_cast<char>(0b1111'0000 | ((usource >> 18) & 0b0000'0111)));
+		target.put(static_cast<char>(0b1000'0000 | ((usource >> 12) & 0b0011'1111)));
+		target.put(static_cast<char>(0b1000'0000 | ((usource >> 6) & 0b0011'1111)));
+		target.put(static_cast<char>(0b1000'0000 | (usource & 0b0011'1111)));
+		return true;
+	} else {
+		return false;
+	}
+}
+
+bool wide_to_utf8(wchar_t source, std::vector<char> &target)
+{
+	uint32_t usource = static_cast<uint32_t>(source);
+	if (usource < (1 << 7)) {
+		target.push_back(static_cast<char>(usource));
+		return true;
+	} else if (usource < (1 << 11)) {
+		target.push_back(static_cast<char>(0b1100'0000 | ((usource >> 6) & 0b0001'1111)));
+		target.push_back(static_cast<char>(0b1000'0000 | (usource & 0b0011'1111)));
+		return true;
+	} else if (usource < (1 << 16)) {
+		target.push_back(static_cast<char>(0b1110'0000 | ((usource >> 12) & 0b0000'1111)));
+		target.push_back(static_cast<char>(0b1000'0000 | ((usource >> 6) & 0b0011'1111)));
+		target.push_back(static_cast<char>(0b1000'0000 | (usource & 0b0011'1111)));
+		return true;
+	} else if (usource < (1 << 21)) {
+		target.push_back(static_cast<char>(0b1111'0000 | ((usource >> 18) & 0b0000'0111)));
+		target.push_back(static_cast<char>(0b1000'0000 | ((usource >> 12) & 0b0011'1111)));
+		target.push_back(static_cast<char>(0b1000'0000 | ((usource >> 6) & 0b0011'1111)));
+		target.push_back(static_cast<char>(0b1000'0000 | (usource & 0b0011'1111)));
+		return true;
+	} else {
+		return false;
+	}
+}
 
-		// do the conversion
-		std::wstring converted_str(wide_chars + 1, L'\0');
-		char_ptr = utf8.c_str();
-		std::mbsrtowcs(&converted_str[0], &char_ptr, converted_str.size(), &state);
-		converted_str.pop_back();
-		return converted_str;
-	#else
-		#error "Compiler not supported"
-	#endif
+bool wide_to_utf8(wchar_t source, std::string &target)
+{
+	uint32_t usource = static_cast<uint32_t>(source);
+	if (usource < (1 << 7)) {
+		target.push_back(static_cast<char>(usource));
+		return true;
+	} else if (usource < (1 << 11)) {
+		target.push_back(static_cast<char>(0b1100'0000 | ((usource >> 6) & 0b0001'1111)));
+		target.push_back(static_cast<char>(0b1000'0000 | (usource & 0b0011'1111)));
+		return true;
+	} else if (usource < (1 << 16)) {
+		target.push_back(static_cast<char>(0b1110'0000 | ((usource >> 12) & 0b0000'1111)));
+		target.push_back(static_cast<char>(0b1000'0000 | ((usource >> 6) & 0b0011'1111)));
+		target.push_back(static_cast<char>(0b1000'0000 | (usource & 0b0011'1111)));
+		return true;
+	} else if (usource < (1 << 21)) {
+		target.push_back(static_cast<char>(0b1111'0000 | ((usource >> 18) & 0b0000'0111)));
+		target.push_back(static_cast<char>(0b1000'0000 | ((usource >> 12) & 0b0011'1111)));
+		target.push_back(static_cast<char>(0b1000'0000 | ((usource >> 6) & 0b0011'1111)));
+		target.push_back(static_cast<char>(0b1000'0000 | (usource & 0b0011'1111)));
+		return true;
+	} else {
+		return false;
+	}
+}
+
+/*
+bool wide_to_utf16le(char32_t source, u16stringstream &target)
+{
+	uint32_t usource = static_cast<uint32_t>(source);
+	if (usource < 0x10000) {
+		if ((usource & 0b1111'1000'0000'0000) == 0b1101'1000'0000'0000) {
+			// invalid code point
+			return false;
+		}
+		target.put(static_cast<char16_t>(usource & 0xffff));
+		return true;
+	} else {
+		usource -= 0x10000;
+		target.put(static_cast<char16_t>(0b1101'1000'0000'0000 | ((usource >> 10) & 0b0000'0011'1111'1111)));
+		target.put(static_cast<char16_t>(0b1101'1100'0000'0000 | (usource & 0b0000'0011'1111'1111)));
+		return true;
+	}
+}
+
+
+bool wide_to_utf16le(char32_t source, std::vector<char16_t> &target)
+{
+	uint32_t usource = static_cast<uint32_t>(source);
+	if (usource < 0x10000) {
+		if ((usource & 0b1111'1000'0000'0000) == 0b1101'1000'0000'0000) {
+			// invalid code point
+			return false;
+		}
+		target.push_back(static_cast<char16_t>(usource & 0xffff));
+		return true;
+	} else {
+		usource -= 0x10000;
+		target.push_back(static_cast<char16_t>(0b1101'1000'0000'0000 | ((usource >> 10) & 0b0000'0011'1111'1111)));
+		target.push_back(static_cast<char16_t>(0b1101'1100'0000'0000 | (usource & 0b0000'0011'1111'1111)));
+		return true;
+	}
+}
+
+
+bool wide_to_utf16le(char32_t source, std::u16string &target)
+{
+	uint32_t usource = static_cast<uint32_t>(source);
+	if (usource < 0x10000) {
+		if ((usource & 0b1111'1000'0000'0000) == 0b1101'1000'0000'0000) {
+			// invalid code point
+			return false;
+		}
+		target.push_back(static_cast<char16_t>(usource & 0xffff));
+		return true;
+	} else {
+		usource -= 0x10000;
+		target.push_back(static_cast<char16_t>(0b1101'1000'0000'0000 | ((usource >> 10) & 0b0000'0011'1111'1111)));
+		target.push_back(static_cast<char16_t>(0b1101'1100'0000'0000 | (usource & 0b0000'0011'1111'1111)));
+		return true;
+	}
+}
+*/
+
+size_t num_utf8_characters(const std::string_view &string)
+{
+	size_t size = 0;
+	
+	const char *ptr = string.data();
+	size_t source_size = string.size();
+	while(source_size > 0) {
+		if (!step_utf8(ptr, source_size)) {
+			break;
+		}
+		++size;
+	}
+	
+	return size;
+}
+
+const char *utf8_offset(const std::string_view &string, size_t n)
+{
+	const char *ptr = string.data();
+	size_t source_size = string.size();
+	for(size_t i = 0; i < n; ++i) {
+		if (!step_utf8(ptr, source_size)) {
+			return nullptr;
+		}
+	}
+	if (source_size == 0) {
+		// at exact end of string
+		return nullptr;
+	}
+	return ptr;
 }
 
 } // namespace core

          
M core/core/strings/utf8.h +37 -2
@@ 6,9 6,44 @@ namespace core
 /// @addtogroup strings
 /// @{
 
-std::string convert_wide_to_utf8(const std::wstring &wide);
+std::string wide_to_utf8(std::wstring_view wide);
+std::wstring utf8_to_wide(std::string_view utf8);
+
+/// @return Size of the utf8 character pointed to by @a ptr, or 0 if the character is invalid.
+uint8_t utf8_size(const char *ptr);
+
+/// @param ptr Pointer to utf8 string data. This will be modified to point to next character if the function returns true.
+/// @param size Size in bytes of string data. This will be reduced by the size of the character if the function returns true.
+/// @return True if there is a valid utf8 character at @a ptr. If size is zero it returns false as well.
+bool step_utf8(const char *&ptr, size_t &size);
+
+/// @return True if the string only has valid utf8 characters.
+bool is_utf8(const std::string &utf8);
 
-std::wstring convert_utf8_to_wide(const std::string &utf8);
+/// Convert a single multi byte code point to a wide character.
+/// @param source Pointer to multibyte character. This will be updated to point to next character if the function returns true.
+/// @param source_size Size of source data. This will be updated to size left after stepping past the next character if the function returns true.
+/// @return True if successful.
+bool utf8_to_wide(const char *&source, size_t &source_size, wchar_t &target);
+
+/// Write one wide character into an utf8 string stream.
+/// @return True if the source character was in the valid range.
+bool wide_to_utf8(wchar_t source, std::stringstream &target);
+
+/// Write one wide character into an utf8 string buffer.
+/// @return True if the source character was in the valid range.
+bool wide_to_utf8(wchar_t source, std::vector<char> &target);
+
+/// Write one wide character into an utf8 string.
+/// @return True if the source character was in the valid range.
+bool wide_to_utf8(wchar_t source, std::string &target);
+
+/// Counts the number of characters in the utf8 string. Stops at invalid characters.
+/// @return Number of valid utf8 characters from the beginning of the string.
+size_t num_utf8_characters(const std::string_view &string);
+
+/// @return A pointer to the n'th utf8 character of @a string, or nullptr if there's no n'th character.
+const char *utf8_offset(const std::string_view &string, size_t n);
 
 /// @}
 

          
M hasher/main.cpp +5 -4
@@ 4,13 4,14 @@ 
 #include <core/strings/utf8.h>
 #include <iostream>
 #include <iomanip>
+#include <string_view>
 
 #if defined(_MSC_VER)
-	int wmain(int argc, wchar_t *argv[])
+	int wmain(int argc, char16_t *argv[])
 	{
 		std::wcout << std::hex << std::showbase;
 		for (int i = 1; i < argc; ++i)
-			std::wcout << argv[i] << L":" << core::murmur_hash3_string_x64_64(std::wstring(argv[i])) << L'\n';
+			std::wcout << argv[i] << L':' << core::murmur_hash3_string_x64_64(core::wide_to_utf8(argv[i])) << L'\n';
 
 		return 0;
 	}

          
@@ 19,10 20,10 @@ 
 	{
 		std::cout << std::hex << std::showbase;
 		for (int i = 1; i < argc; ++i)
-			std::cout << argv[i] << ":" << core::murmur_hash3_string_x64_64(core::convert_utf8_to_wide(argv[i])) << '\n';
+			std::cout << argv[i] << ':' << core::murmur_hash3_string_x64_64(std::string_view(argv[i])) << '\n';
 
 		return 0;
 	}
 #else
-	#error "Platform not supported"
+	#error "Compiler not supported"
 #endif

          
A => jasm-6502/convert_6502_keyword_case.py +106 -0
@@ 0,0 1,106 @@ 
+# Some old 6502 code is using the screaming syntax and jAsm doesn't accept upper case keywords.
+# This script can convert the keywords in all .asm files in a directory to help migrating the
+# source code.
+
+import os
+import argparse
+import sys
+import re
+
+def convert_file(file):
+	with open(file, "r") as f:
+		contents = f.read()
+
+	substitutions = [
+		[r"\bADC\b", "adc"],
+		[r"\bAND\b", "and"],
+		[r"\bASL\b", "asl"],
+		[r"\bBCC\b", "bcc"],
+		[r"\bBCS\b", "bcs"],
+		[r"\bBEQ\b", "beq"],
+		[r"\bBIT\b", "bit"],
+		[r"\bBMI\b", "bmi"],
+		[r"\bBNE\b", "bne"],
+		[r"\bBPL\b", "bpl"],
+		[r"\bBRK\b", "brk"],
+		[r"\bBVC\b", "bvc"],
+		[r"\bBVS\b", "bvs"],
+		[r"\bCLC\b", "clc"],
+		[r"\bCLD\b", "cld"],
+		[r"\bCLI\b", "cli"],
+		[r"\bCLV\b", "clv"],
+		[r"\bCMP\b", "cmp"],
+		[r"\bCPX\b", "cpx"],
+		[r"\bCPY\b", "cpy"],
+		[r"\bDEC\b", "dec"],
+		[r"\bDEX\b", "dex"],
+		[r"\bDEY\b", "dey"],
+		[r"\bEOR\b", "eor"],
+		[r"\bINC\b", "inc"],
+		[r"\bINX\b", "inx"],
+		[r"\bINY\b", "iny"],
+		[r"\bJMP\b", "jmp"],
+		[r"\bJSR\b", "jsr"],
+		[r"\bLDA\b", "lda"],
+		[r"\bLDX\b", "ldx"],
+		[r"\bLDY\b", "ldy"],
+		[r"\bLSR\b", "lsr"],
+		[r"\bNOP\b", "nop"],
+		[r"\bORA\b", "ora"],
+		[r"\bPHA\b", "pha"],
+		[r"\bPHP\b", "php"],
+		[r"\bPLA\b", "pla"],
+		[r"\bPLP\b", "plp"],
+		[r"\bROL\b", "rol"],
+		[r"\bROR\b", "ror"],
+		[r"\bRTI\b", "rti"],
+		[r"\bRTS\b", "rts"],
+		[r"\bSBC\b", "sbc"],
+		[r"\bSEC\b", "sec"],
+		[r"\bSED\b", "sed"],
+		[r"\bSEI\b", "sei"],
+		[r"\bSTA\b", "sta"],
+		[r"\bSTX\b", "stx"],
+		[r"\bSTY\b", "sty"],
+		[r"\bTAX\b", "tax"],
+		[r"\bTAY\b", "tay"],
+		[r"\bTSX\b", "tsx"],
+		[r"\bTXA\b", "txa"],
+		[r"\bTXS\b", "txs"],
+		[r"\bTYA\b", "tya"],
+		[r"\bA\b", "a"],
+		[r"\bX\b", "x"],
+		[r"\bY\b", "y"]
+	]
+
+	for subst in substitutions:
+		contents = re.sub(subst[0], subst[1], contents)
+
+	with open(file, "w") as f:
+		f.write(contents)
+
+
+def convert_dir(dir):
+	files = [f for f in os.listdir(dir) if f.endswith(".asm")]
+	for file in files:
+		convert_file(os.path.join(dir, file))
+
+
+def args_parser():
+	parser = argparse.ArgumentParser(description="""\
+This tool converts upper case 6502-assembler keywords to lower case for use in jAsm.
+The source directory will be scanned for .asm files and all of them will have
+upper case keywords replaced by lower case ones.""")
+
+	parser.add_argument('dir', action="store", type=str, help='Directory to convert.')
+	return parser
+
+if __name__ == "__main__":
+	parser = args_parser()
+	args = parser.parse_args()
+	try:
+		convert_dir(args.dir)
+	except Exception as e:
+		print(e)
+		sys.exit(10)
+

          
M jasm/assembling/assembler.cpp +7 -6
@@ 6,19 6,20 @@ 
 namespace jasm
 {
 
-std::vector<Section> assemble(bool multiple_output_files, bool multi_bank_mode
+std::vector<Section> assemble(bool multiple_output_files, bool multi_bank_mode, bool pseudo_instructions
 							, const std::vector<TokenChain> &syntax, StringRepository &strings
-							, const HashArrayRepository &hash_arrays, const std::vector<std::wstring> &used_files
-							, const std::vector<std::pair<std::wstring, bool>> &predefined_booleans
-							, const std::vector<std::pair<std::wstring, int32_t>> &predefined_integers
-							, const std::vector<std::pair<std::wstring, std::wstring>> &predefined_strings
+							, const HashArrayRepository &hash_arrays, const std::vector<std::string> &used_files
+							, const std::vector<std::pair<std::string, bool>> &predefined_booleans
+							, const std::vector<std::pair<std::string, int32_t>> &predefined_integers
+							, const std::vector<std::pair<std::string, std::string>> &predefined_strings
 							, DataReader &data_reader, int32_t max_errors
-							, const std::wstring &symbol_dump_file, const std::wstring &vice_dump_file, const std::wstring &gba_dump_file)
+							, const std::string &symbol_dump_file, const std::string &vice_dump_file, const std::string &gba_dump_file)
 {
 	std::vector<Section> output;
 	Assembler assembler(
 		multiple_output_files
 		, multi_bank_mode
+		, pseudo_instructions
 		, syntax
 		, strings
 		, hash_arrays

          
M jasm/assembling/assembler.h +6 -6
@@ 16,14 16,14 @@ class HashArrayRepository;
 
 /// Assemble the provided syntax chain.
 /// @return A vector of sections that provides output data. No section is empty and there are only code sections.
-std::vector<Section> assemble(bool multiple_output_files, bool multi_bank_mode
+std::vector<Section> assemble(bool multiple_output_files, bool multi_bank_mode, bool pseudo_instructions
 							, const std::vector<TokenChain> &syntax, StringRepository &strings
-							, const HashArrayRepository &hash_arrays, const std::vector<std::wstring> &used_files
-							, const std::vector<std::pair<std::wstring, bool>> &predefined_booleans
-							, const std::vector<std::pair<std::wstring, int32_t>> &predefined_integers
-							, const std::vector<std::pair<std::wstring, std::wstring>> &predefined_strings
+							, const HashArrayRepository &hash_arrays, const std::vector<std::string> &used_files
+							, const std::vector<std::pair<std::string, bool>> &predefined_booleans
+							, const std::vector<std::pair<std::string, int32_t>> &predefined_integers
+							, const std::vector<std::pair<std::string, std::string>> &predefined_strings
 							, DataReader &data_reader, int32_t max_errors
-							, const std::wstring &symbol_dump_file, const std::wstring &vice_dump_file, const std::wstring &gba_dump_file);
+							, const std::string &symbol_dump_file, const std::string &vice_dump_file, const std::string &gba_dump_file);
 
 /// @}
 

          
M jasm/assembling/assembler_impl/assembler_impl.cpp +164 -149
@@ 16,17 16,18 @@ namespace jasm {
 
 using namespace core;
 
-Assembler::Assembler(bool multiple_output_files, bool multi_bank_mode
+Assembler::Assembler(bool multiple_output_files, bool multi_bank_mode, bool pseudo_instructions
 				   , const std::vector<TokenChain> &syntax, StringRepository &strings
-				   , const HashArrayRepository &hash_arrays, const std::vector<std::wstring> &used_files
-				   , const std::vector<std::pair<std::wstring, bool>> &predefined_booleans
-				   , const std::vector<std::pair<std::wstring, int32_t>> &predefined_integers
-				   , const std::vector<std::pair<std::wstring, std::wstring>> &predefined_strings
+				   , const HashArrayRepository &hash_arrays, const std::vector<std::string> &used_files
+				   , const std::vector<std::pair<std::string, bool>> &predefined_booleans
+				   , const std::vector<std::pair<std::string, int32_t>> &predefined_integers
+				   , const std::vector<std::pair<std::string, std::string>> &predefined_strings
 				   , DataReader &data_reader, int32_t max_errors
-				   , const std::wstring &symbol_dump_file, const std::wstring &vice_dump_file, const std::wstring &gba_dump_file
+				   , const std::string &symbol_dump_file, const std::string &vice_dump_file, const std::string &gba_dump_file
 				   , std::vector<Section> &output)
 	: _multiple_output_files(multiple_output_files)
 	, _multi_bank_mode(multi_bank_mode)
+	, _pseudo_instructions(pseudo_instructions)
 	, _input(syntax)
 	, _strings(strings)
 	, _hash_arrays(hash_arrays)

          
@@ 51,7 52,7 @@ Assembler::Assembler(bool multiple_outpu
 	, _static_float_type(0)
 	, _static_string_type(0)
 	, _static_string_reference_type(0)
-	, _static_range_type(0)
+	, _static_subroutine_type(0)
 	, _static_value_reference_type(0)
 	, _static_byte_offset_type(0)
 	, _static_word_offset_type(0)

          
@@ 84,9 85,9 @@ Assembler::Assembler(bool multiple_outpu
 	_temp_namespace_list.reserve(64);
 
 	// add empty string for zero hash because the root namespace has hash 0
-	_strings.add(0, L"");
+	_strings.add(0, "");
 
-	_symbol_names[0] = L"";
+	_symbol_names[0] = "";
 }
 
 void Assembler::fill_type_integer_operators(TypeDescription &type)

          
@@ 207,8 208,8 @@ void Assembler::setup_fundamental_types(
 		type.operators[static_cast<uint32_t>(OperatorType::ArrayAccess)] = &Assembler::operator_string_array_access;
 		type.operators[static_cast<uint32_t>(OperatorType::Period)] = &Assembler::operator_string_period;
 		type.num_properties = num_properties;
-		type.name_hashes[static_cast<uint32_t>(StringProperties::Substring)] = hash_constant(0x548d19a49d349e3fULL, L"substring");
-		type.name_hashes[static_cast<uint32_t>(StringProperties::Length)] = hash_constant(0xf81b879ec0702403ULL, L"length");
+		type.name_hashes[static_cast<uint32_t>(StringProperties::Substring)] = hash_constant(0x906b1973c53fa0c6ULL, "substring");
+		type.name_hashes[static_cast<uint32_t>(StringProperties::Length)] = hash_constant(0xea9dd03ab3c476a3ULL, "length");
 		_static_string_type = add_type_to_type_map(type, p);
 	}
 	{

          
@@ 226,17 227,18 @@ void Assembler::setup_fundamental_types(
 		type.operators[static_cast<uint32_t>(OperatorType::ArrayAccess)] = &Assembler::operator_string_array_access;
 		type.operators[static_cast<uint32_t>(OperatorType::Period)] = &Assembler::operator_string_period;
 		type.num_properties = num_properties;
-		type.name_hashes[static_cast<uint32_t>(StringProperties::Substring)] = hash_constant(0x548d19a49d349e3fULL, L"substring");
-		type.name_hashes[static_cast<uint32_t>(StringProperties::Length)] = hash_constant(0xf81b879ec0702403ULL, L"length");
+		type.name_hashes[static_cast<uint32_t>(StringProperties::Substring)] = hash_constant(0x906b1973c53fa0c6ULL, "substring");
+		type.name_hashes[static_cast<uint32_t>(StringProperties::Length)] = hash_constant(0xea9dd03ab3c476a3ULL, "length");
 		_static_string_reference_type = add_type_to_type_map(type, p);
 	}
 	{
 		TokenReadPosition p = _current_pass.types.position();
 		uint32_t num_properties = 0;
 		TypeDescription &type = reserve_type(num_properties);
-		type.type = ValueType::RangeValue;
+		type.type = ValueType::SubroutineValue;
+		type.operators[static_cast<uint32_t>(OperatorType::Call)] = &Assembler::operator_subroutine_call;
 		fill_type_integer_operators(type);
-		_static_range_type = add_type_to_type_map(type, p);
+		_static_subroutine_type = add_type_to_type_map(type, p);
 	}
 	{
 		TokenReadPosition p = _current_pass.types.position();

          
@@ 249,11 251,15 @@ void Assembler::setup_fundamental_types(
 	}
 	{
 		TokenReadPosition p = _current_pass.types.position();
-		uint32_t num_properties = 0;
-		TypeDescription &type = reserve_type(num_properties);
+		uint32_t num_properties = static_cast<uint32_t>(WordOffsetProperties::NumProperties);
+		TypeDescriptionWithPayload &type = reserve_type(num_properties);
 		type.type = ValueType::WordOffset;
 		type.byte_size = 2;
 		fill_type_integer_operators(type);
+		type.operators[static_cast<uint32_t>(OperatorType::Period)] = &Assembler::operator_word_offset_period;
+		type.num_properties = num_properties;
+		type.name_hashes[static_cast<uint32_t>(WordOffsetProperties::Hi)] = hash_constant(0x5a2467aa43e6df96ULL, "hi");
+		type.name_hashes[static_cast<uint32_t>(WordOffsetProperties::Lo)] = hash_constant(0x678034356dc3d49aULL, "lo");
 		_static_word_offset_type = add_type_to_type_map(type, p);
 	}
 	{

          
@@ 334,14 340,14 @@ void Assembler::setup_fundamental_types(
 		type.operators[static_cast<uint32_t>(OperatorType::Plus)] = &Assembler::operator_list_add;
 		type.operators[static_cast<uint32_t>(OperatorType::AssignmentAdd)] = &Assembler::operator_list_assignment_add;
 		type.num_properties = num_properties;
-		type.name_hashes[static_cast<uint32_t>(ListProperties::Push)] = hash_constant(0xfa1800cd5dfb79a0ULL, L"push");
-		type.name_hashes[static_cast<uint32_t>(ListProperties::Pop)] = hash_constant(0x6016fa8751cfb62ULL, L"pop");
-		type.name_hashes[static_cast<uint32_t>(ListProperties::Insert)] = hash_constant(0xf45042595a7b3e99ULL, L"insert");
-		type.name_hashes[static_cast<uint32_t>(ListProperties::Erase)] = hash_constant(0xb5d29380f263da2dULL, L"erase");
-		type.name_hashes[static_cast<uint32_t>(ListProperties::Keep)] = hash_constant(0x22a89537675c3808ULL, L"keep");
-		type.name_hashes[static_cast<uint32_t>(ListProperties::Clear)] = hash_constant(0x30b8b5f2ce0e5d40ULL, L"clear");
-		type.name_hashes[static_cast<uint32_t>(ListProperties::Empty)] = hash_constant(0x6fb17151c2e4292dULL, L"empty");
-		type.name_hashes[static_cast<uint32_t>(ListProperties::Length)] = hash_constant(0xf81b879ec0702403ULL, L"length");
+		type.name_hashes[static_cast<uint32_t>(ListProperties::Push)] = hash_constant(0x7ad30941a5437e06ULL, "push");
+		type.name_hashes[static_cast<uint32_t>(ListProperties::Pop)] = hash_constant(0xd1a4d7f17ef16a40ULL, "pop");
+		type.name_hashes[static_cast<uint32_t>(ListProperties::Insert)] = hash_constant(0x8c464fe67490a18aULL, "insert");
+		type.name_hashes[static_cast<uint32_t>(ListProperties::Erase)] = hash_constant(0xa55568a0a6adcd51ULL, "erase");
+		type.name_hashes[static_cast<uint32_t>(ListProperties::Keep)] = hash_constant(0xe994c15f836f30b9ULL, "keep");
+		type.name_hashes[static_cast<uint32_t>(ListProperties::Clear)] = hash_constant(0xdb95283bae679a06ULL, "clear");
+		type.name_hashes[static_cast<uint32_t>(ListProperties::Empty)] = hash_constant(0x5ed3e28128ac2df7ULL, "empty");
+		type.name_hashes[static_cast<uint32_t>(ListProperties::Length)] = hash_constant(0xea9dd03ab3c476a3ULL, "length");
 		_static_list_type = add_type_to_type_map(type, p);
 	}
 	{

          
@@ 360,13 366,13 @@ void Assembler::setup_fundamental_types(
 //		type.operators[static_cast<uint32_t>(OperatorType::ArrayAccess)] = &Assembler::operator_map_array_access;
 		type.operators[static_cast<uint32_t>(OperatorType::Period)] = &Assembler::operator_map_period;
 		type.num_properties = num_properties;
-		type.name_hashes[static_cast<uint32_t>(MapProperties::Get)] = hash_constant(0xafcebfcb5d61ddd8ULL, L"get");
-		type.name_hashes[static_cast<uint32_t>(MapProperties::Set)] = hash_constant(0xdc8e0b42d29bf687ULL, L"set");
-		type.name_hashes[static_cast<uint32_t>(MapProperties::Erase)] = hash_constant(0xb5d29380f263da2dULL, L"erase");
-		type.name_hashes[static_cast<uint32_t>(MapProperties::Clear)] = hash_constant(0x30b8b5f2ce0e5d40ULL, L"clear");
-		type.name_hashes[static_cast<uint32_t>(MapProperties::Has)] = hash_constant(0x619c8e42eb9ab6d8ULL, L"has");
-		type.name_hashes[static_cast<uint32_t>(MapProperties::Empty)] = hash_constant(0x6fb17151c2e4292dULL, L"empty");
-		type.name_hashes[static_cast<uint32_t>(MapProperties::Length)] = hash_constant(0xf81b879ec0702403ULL, L"length");
+		type.name_hashes[static_cast<uint32_t>(MapProperties::Get)] = hash_constant(0x5c50af7d3aca106dULL, "get");
+		type.name_hashes[static_cast<uint32_t>(MapProperties::Set)] = hash_constant(0x81dc8d337b550fb3ULL, "set");
+		type.name_hashes[static_cast<uint32_t>(MapProperties::Erase)] = hash_constant(0xa55568a0a6adcd51ULL, "erase");
+		type.name_hashes[static_cast<uint32_t>(MapProperties::Clear)] = hash_constant(0xdb95283bae679a06ULL, "clear");
+		type.name_hashes[static_cast<uint32_t>(MapProperties::Has)] = hash_constant(0x451fa349ff1558cbULL, "has");
+		type.name_hashes[static_cast<uint32_t>(MapProperties::Empty)] = hash_constant(0x5ed3e28128ac2df7ULL, "empty");
+		type.name_hashes[static_cast<uint32_t>(MapProperties::Length)] = hash_constant(0xea9dd03ab3c476a3ULL, "length");
 		_static_map_type = add_type_to_type_map(type, p);
 	}
 }

          
@@ 398,7 404,7 @@ void Assembler::setup_predefined_constan
 	// add functions
 	for(int i = 0; i < static_cast<int>(FunctionType::NumTypes); ++i) {
 		FunctionType f = static_cast<FunctionType>(i);
-		const std::wstring_view function_name = to_string(f);
+		const std::string_view function_name = to_string(f);
 		symbol_hash = murmur_hash3_string_x64_64(function_name);
 		if (!_strings.has(symbol_hash))
 			_strings.add(symbol_hash, function_name);

          
@@ 407,11 413,11 @@ void Assembler::setup_predefined_constan
 	}
 
 	// add math constants
-	const wchar_t *constant_name;
+	const char *constant_name;
 
 	{
-		constant_name = L"PI";
-		symbol_hash = hash_constant(0xc4dad59240714e9dULL, constant_name);
+		constant_name = "PI";
+		symbol_hash = hash_constant(0x2748bbcafe4477cbULL, constant_name);
 		if (!_strings.has(symbol_hash))
 			_strings.add(symbol_hash, constant_name);
 		Value &new_value = create_unique_label(symbol_hash, is_global);

          
@@ 419,8 425,8 @@ void Assembler::setup_predefined_constan
 	}
 
 	{
-		constant_name = L"E";
-		symbol_hash = hash_constant(0xed4011ab799bcb64ULL, constant_name);
+		constant_name = "E";
+		symbol_hash = hash_constant(0x63836bc0d59ab02eULL, constant_name);
 		if (!_strings.has(symbol_hash))
 			_strings.add(symbol_hash, constant_name);
 		Value &new_value = create_unique_label(symbol_hash, is_global);

          
@@ 465,38 471,38 @@ void Assembler::setup_predefined_constan
 
 
 	// store names of automatic labels explicitly since these will not be used by the user
-	_strings.add(hash_constant(0x9d60c3eb1644552dULL, L"@loop"), std::wstring(L"@loop"));
-	_strings.add(hash_constant(0xdaa803af4141d0e8ULL, L"@continue"), std::wstring(L"@continue"));
-	_strings.add(hash_constant(0x1ded7765ceceebccULL, L"@i"), std::wstring(L"@i"));
+	_strings.add(hash_constant(0xdb831a5e32f85dcfULL, "@loop"), std::string("@loop"));
+	_strings.add(hash_constant(0x232e8dde60eefef3ULL, "@continue"), std::string("@continue"));
+	_strings.add(hash_constant(0x2d8619a103210bb8ULL, "@i"), std::string("@i"));
 }
 
-void Assembler::report_warning(const SourceLocation &location, AssemblyErrorCodes error_code, const std::wstring &msg)
+void Assembler::report_warning(const SourceLocation &location, AssemblyErrorCodes error_code, const std::string &msg)
 {
-	warning() << _used_files[location.file_index] << L"(" << location.row << L"," << location.column << L") : Warning " << static_cast<unsigned>(error_code) << L" : " << msg << L'\n';
+	warning() << _used_files[location.file_index] << "(" << location.row << "," << location.column << ") : Warning " << static_cast<unsigned>(error_code) << " : " << msg << '\n';
 
 	// print previous macro invocation locations backwards from the stack
 	for(auto it = _location_stack.crbegin(); it != _location_stack.crend(); ++it)
-		warning() << _used_files[it->file_index] << L"(" << it->row << L"," << it->column << L") : Invoked from here\n";
+		warning() << _used_files[it->file_index] << "(" << it->row << "," << it->column << ") : Invoked from here\n";
 }
 
-void Assembler::report_error(const SourceLocation &location, AssemblyErrorCodes error_code, const std::wstring &msg, bool fatal)
+void Assembler::report_error(const SourceLocation &location, AssemblyErrorCodes error_code, const std::string &msg, bool fatal)
 {
 	// avoid reporting same error/location again
 	if (!fatal && _reported_error_locations.find(location) != _reported_error_locations.end())
 		return;
 	_reported_error_locations.insert(location);
 
-	error() << _used_files[location.file_index] << L"(" << location.row << L"," << location.column << L") : Error " << static_cast<unsigned>(error_code) << L" : " << msg << L'\n';
+	error() << _used_files[location.file_index] << "(" << location.row << "," << location.column << ") : Error " << static_cast<unsigned>(error_code) << " : " << msg << '\n';
 
 	// print previous macro invocation locations backwards from the stack
 	for(auto it = _location_stack.crbegin(); it != _location_stack.crend(); ++it)
-		error() << _used_files[it->file_index] << L"(" << it->row << L"," << it->column << L") : Invoked from here\n";
+		error() << _used_files[it->file_index] << "(" << it->row << "," << it->column << ") : Invoked from here\n";
 
 	++_num_errors;
 	if (fatal)
-		throw AssemblyException(L"Fatal error, aborting assembly.");
+		throw AssemblyException("Fatal error, aborting assembly.");
 	if (_num_errors >= _max_errors)
-		throw AssemblyException(L"Too many errors, aborting assembly.");
+		throw AssemblyException("Too many errors, aborting assembly.");
 }
 
 void propagate_data(Section &parent)

          
@@ 543,7 549,7 @@ void Assembler::prepare_next_assembly_pa
 
 void Assembler::run_assembly_pass(bool generate, int pass)
 {
-	debug() << L"Assemble pass " << pass << L'\n';
+	debug() << "Assemble pass " << pass << '\n';
 
 	assert(_call_depth == 0);
 	assert(_data_generation_depth == 0);

          
@@ 558,7 564,7 @@ void Assembler::run_assembly_pass(bool g
 	// and these things will not be reset otherwise
 	_symbol_environment.reset();
 	// set the name of the outermost local scope to avoid unknowns in the symbol dump
-	_symbol_names[_symbol_environment.local_symbol_scope_stack.back()] = L"";
+	_symbol_names[_symbol_environment.local_symbol_scope_stack.back()] = "";
 
 	// allocated first 8 bytes to be sure that 0 is unused. It may come in handy
 	// as a null pointer.

          
@@ 598,7 604,7 @@ void Assembler::run_assembly_pass(bool g
 	} else {
 		#if 0 // a dump file for each pass to debug
 			if (_dump_symbols) {
-				dump_symbols(base_name(_symbol_dump_file) + std::to_wstring(pass) + file_extension(_symbol_dump_file));
+				dump_symbols(base_name(_symbol_dump_file) + std::to_string(pass) + file_extension(_symbol_dump_file));
 			}
 		#endif
 	}

          
@@ 653,7 659,7 @@ bool Assembler::progress_was_made()
 	}
 	// TODO: hash variable map
 
-	debug() << L"State hash: " << std::hex << storage_hash << std::dec << L'\n';
+	debug() << "State hash: " << std::hex << storage_hash << std::dec << '\n';
 
 	// if the storage hash has existed before we have ended up in a loop where the state
 	// doesn't make any progress

          
@@ 691,7 697,7 @@ bool Assembler::progress_was_made()
 
 struct SymbolInformation
 {
-	SymbolInformation(uint64_t hash_, size_t index_, std::wstring name_, std::wstring value_, bool constant_, ValueType type_)
+	SymbolInformation(uint64_t hash_, size_t index_, std::string name_, std::string value_, bool constant_, ValueType type_)
 		: hash(hash_)
 		, index(index_)
 		, name(std::move(name_))

          
@@ 701,7 707,7 @@ struct SymbolInformation
 		, type(type_)
 	{}
 
-	SymbolInformation(uint64_t hash_, size_t index_, std::wstring name_, std::wstring value_, bool constant_, bool current_pass_, ValueType type_)
+	SymbolInformation(uint64_t hash_, size_t index_, std::string name_, std::string value_, bool constant_, bool current_pass_, ValueType type_)
 		: hash(hash_)
 		, index(index_)
 		, name(std::move(name_))

          
@@ 720,14 726,14 @@ struct SymbolInformation
 
 	uint64_t hash; ///< Combined hash for the symbol.
 	size_t index; ///< Index into variable storage vector.
-	std::wstring name; ///< Readable symbol name.
-	std::wstring value; ///< Readable value contents.
+	std::string name; ///< Readable symbol name.
+	std::string value; ///< Readable value contents.
 	bool constant; ///< True if constant, otherwise variable.
 	bool current_pass; ///< True if found in current pass. This is only used when comparing variables across passes.
 	ValueType type; ///< Type of the value.
 };
 
-void Assembler::create_difference_report(std::wstringstream &ss)
+void Assembler::create_difference_report(std::stringstream &ss)
 {
 	// collect and sort symbol names
 	std::vector<SymbolInformation> symbol_list;

          
@@ 735,13 741,13 @@ void Assembler::create_difference_report
 
 	// find symbols in either pass
 	core::HashMap<uint64_t, bool, core::NullHashCompare<uint64_t>> used_symbols;
-    std::wstring no_name = L"<no name>";
+    std::string no_name = "<no name>";
 	for(const auto &pair : _current_pass.value_lookup) {
 		uint64_t hash = pair.first;
 		size_t value_index = pair.second;
 		const Value &value = _current_pass.values[value_index];
 		auto it = _symbol_names.find(hash);
-        const std::wstring &symbol_name = it == _symbol_names.end() ? no_name : it->second;
+        const std::string &symbol_name = it == _symbol_names.end() ? no_name : it->second;
 		symbol_list.emplace_back(hash, value_index, symbol_name, to_string(_strings, value), value.storage_type != StorageType::Variable, value.type);
 		used_symbols[hash] = true;
 	}

          
@@ 751,7 757,7 @@ void Assembler::create_difference_report
 			size_t value_index = pair.second;
 			const Value &value = _previous_pass.values[value_index];
 			auto it = _symbol_names.find(hash);
-            const std::wstring &symbol_name = it == _symbol_names.end() ? no_name : it->second;
+            const std::string &symbol_name = it == _symbol_names.end() ? no_name : it->second;
 			symbol_list.emplace_back(hash, value_index, symbol_name, to_string(_strings, value), value.storage_type != StorageType::Variable, false, value.type);
 			used_symbols[hash] = true;
 		}

          
@@ 771,12 777,12 @@ void Assembler::create_difference_report
 					continue;
 				} else {
 					ss << std::setw(16) << to_string(_strings, previous) << std::setw(0);
-					ss << L" | ";
+					ss << " | ";
 					ss << std::setw(16) << symbol.value << std::setw(0);
 				}
 			} else {
-				ss << std::setw(16) << L"<missing>" << std::setw(0);
-				ss << L" | ";
+				ss << std::setw(16) << "<missing>" << std::setw(0);
+				ss << " | ";
 				ss << std::setw(16) << symbol.value << std::setw(0);
 			}
 

          
@@ 791,22 797,22 @@ void Assembler::create_difference_report
 					continue;
 				} else {
 					ss << std::setw(16) << symbol.value << std::setw(0);
-					ss << L" | ";
+					ss << " | ";
 					ss << std::setw(16) << to_string(_strings, current) << std::setw(0);
 				}
 			} else {
 				ss << std::setw(16) << symbol.value << std::setw(0);
-				ss << L" | ";
-				ss << std::setw(16) << L"<missing>" << std::setw(0);
+				ss << " | ";
+				ss << std::setw(16) << "<missing>" << std::setw(0);
 			}
 		}
 
-		ss << L" | ";
-		ss << symbol.name << L'\n';
+		ss << " | ";
+		ss << symbol.name << '\n';
 	}
 }
 
-void Assembler::dump_symbols(const std::wstring &filename)
+void Assembler::dump_symbols(const std::string &filename)
 {
 	// extract all relevant symbol information
 	std::vector<SymbolInformation> symbol_list;

          
@@ 817,37 823,36 @@ void Assembler::dump_symbols(const std::
 		size_t value_index = pair.second;
 		const Value &value = _current_pass.values[value_index];
 		auto it = _symbol_names.find(hash);
-		symbol_list.emplace_back(hash, value_index, it == _symbol_names.end() ? L"<no name>" : it->second, to_string(_strings, value), value.storage_type != StorageType::Variable, value.type);
+		symbol_list.emplace_back(hash, value_index, it == _symbol_names.end() ? "<no name>" : it->second, to_string(_strings, value), value.storage_type != StorageType::Variable, value.type);
 	}
 	std::sort(symbol_list.begin(), symbol_list.end());
 
 	// generate output in memory
 	// <hash> | variable_index | <var/const> | <type> | <value> | main::CONSTANT
-	std::wstringstream ss;
+	std::stringstream ss;
 	for(const auto &symbol : symbol_list) {
-		ss << L"0x" << std::hex << std::setw(16) << std::setfill(L'0') << symbol.hash << std::setfill(L' ') << std::setw(0) << std::dec;
-		ss << L" | ";
+		ss << "0x" << std::hex << std::setw(16) << std::setfill('0') << symbol.hash << std::setfill(' ') << std::setw(0) << std::dec;
+		ss << " | ";
 		ss << std::setw(5) << symbol.index << std::setw(0);
-		ss << L" | ";
+		ss << " | ";
 		if (symbol.constant)
-			ss << L"constant";
+			ss << "constant";
 		else
-			ss << L"variable";
-		ss << L" | ";
+			ss << "variable";
+		ss << " | ";
 		ss << std::setw(20) << to_string(symbol.type) << std::setw(0);
-		ss << L" | ";
+		ss << " | ";
 		ss << std::setw(16) << symbol.value << std::setw(0);
-		ss << L" | ";
-		ss << symbol.name << L'\n';
+		ss << " | ";
+		ss << symbol.name << '\n';
 	}
 
-	// convert to utf8
-	std::string utf8 = convert_wide_to_utf8(ss.str());
+	std::string utf8 = ss.str();
 
 	// write to disk
 	FileWriter wr;
 	wr.open(filename);
-	wr.write(reinterpret_cast<const uint8_t *>(utf8.c_str()), static_cast<uint32_t>(utf8.size()));
+	wr.write(reinterpret_cast<const uint8_t *>(utf8.data()), static_cast<uint32_t>(utf8.size()));
 }
 
 struct SimpleSymbolInformation

          
@@ 868,7 873,7 @@ struct SimpleSymbolInformation
 	uint32_t addr; ///< Label address.
 };
 
-bool starts_with_breakpoint(const std::wstring_view &symbol, const std::wstring &beginning)
+bool starts_with_breakpoint(const std::string_view &symbol, const std::string &beginning)
 {
 	if (symbol.size() < beginning.size()) {
 		return false;

          
@@ 880,7 885,7 @@ bool starts_with_breakpoint(const std::w
 	return true;
 }
 
-void Assembler::dump_vice_symbols(const std::wstring &filename)
+void Assembler::dump_vice_symbols(const std::string &filename)
 {
 	// extract all relevant symbol information
 	std::vector<SimpleSymbolInformation> symbol_list;

          
@@ 889,9 894,9 @@ void Assembler::dump_vice_symbols(const 
 	std::set<uint32_t> w_breakpoints;
 	symbol_list.reserve(_current_pass.value_lookup.size());
 
-	std::wstring breakpoint_begin = L"breakpoint";
-	std::wstring r_breakpoint_begin = L"read_breakpoint";
-	std::wstring w_breakpoint_begin = L"write_breakpoint";
+	std::string breakpoint_begin = "breakpoint";
+	std::string r_breakpoint_begin = "read_breakpoint";
+	std::string w_breakpoint_begin = "write_breakpoint";
 
 	for(const auto &pair : _current_pass.value_lookup) {
 		uint64_t hash = pair.first;

          
@@ 914,9 919,9 @@ void Assembler::dump_vice_symbols(const 
 		if (UNLIKELY(!is_integer(value)))
 			continue;
 
-		const std::wstring_view name_str = _strings.get(it->second);
+		const std::string_view name_str = _strings.get(it->second);
 		// skip automatic labels since they may override more interesting names
-		if (name_str.front() == L'@')
+		if (name_str.front() == '@')
 			continue;
 
 		uint32_t integer_value = static_cast<uint32_t>(dereference_integer(value));

          
@@ 943,16 948,16 @@ void Assembler::dump_vice_symbols(const 
 	std::sort(symbol_list.begin(), symbol_list.end());
 
 	// generate output in memory
-	std::wstringstream ss;
-	ss << std::setfill(L'0') << std::hex;
+	std::stringstream ss;
+	ss << std::setfill('0') << std::hex;
 
 	// generate breakpoint list
 	for(auto value : breakpoints)
-		ss << L"break exec " << std::setw(4) << value << std::setw(0) << L'\n';
+		ss << "break exec " << std::setw(4) << value << std::setw(0) << '\n';
 	for(auto value : r_breakpoints)
-		ss << L"break load " << std::setw(4) << value << std::setw(0) << L'\n';
+		ss << "break load " << std::setw(4) << value << std::setw(0) << '\n';
 	for(auto value : w_breakpoints)
-		ss << L"break store " << std::setw(4) << value << std::setw(0) << L'\n';
+		ss << "break store " << std::setw(4) << value << std::setw(0) << '\n';
 
 	// generate symbol list
 	std::set<uint32_t> used_values;

          
@@ 965,9 970,9 @@ void Assembler::dump_vice_symbols(const 
 		uint64_t name = symbol.name;
 		if (used_names.find(name) != used_names.end()) {
 			// symbol exists, try combining the index in the name
-			std::wstring name_with_index(_strings.get(name));
+			std::string name_with_index(_strings.get(name));
 			name_with_index.append(1, '_');
-			name_with_index.append(std::to_wstring(symbol.index));
+			name_with_index.append(std::to_string(symbol.index));
 			name = murmur_hash3_string_x64_64(name_with_index);
 			// give up if still colliding
 			if (used_names.find(name) != used_names.end())

          
@@ 978,12 983,11 @@ void Assembler::dump_vice_symbols(const 
 		used_values.insert(symbol.addr);
 		used_names.insert(name);
 
-		const std::wstring_view name_str = _strings.get(name);
-		ss << L"al C:" << std::setw(4) << symbol.addr << std::setw(0) << L" ." << name_str << L'\n';
+		const std::string_view name_str = _strings.get(name);
+		ss << "al C:" << std::setw(4) << symbol.addr << std::setw(0) << " ." << name_str << '\n';
 	}
 
-	// convert to utf8
-	std::string utf8 = convert_wide_to_utf8(ss.str());
+	std::string utf8 = ss.str();
 
 	// write to disk
 	FileWriter wr;

          
@@ 991,7 995,7 @@ void Assembler::dump_vice_symbols(const 
 	wr.write(reinterpret_cast<const uint8_t *>(utf8.c_str()), static_cast<uint32_t>(utf8.size()));
 }
 
-void Assembler::dump_gba_symbols(const std::wstring &filename)
+void Assembler::dump_gba_symbols(const std::string &filename)
 {
 	// extract all relevant symbol information
 	std::vector<SimpleSymbolInformation> symbol_list;

          
@@ 1018,9 1022,9 @@ void Assembler::dump_gba_symbols(const s
 		if (UNLIKELY(!is_integer(value)))
 			continue;
 
-		const std::wstring_view name_str = _strings.get(it->second);
+		const std::string_view name_str = _strings.get(it->second);
 		// skip automatic labels since they may override more interesting names
-		if (name_str.front() == L'@')
+		if (name_str.front() == '@')
 			continue;
 
 		uint32_t integer_value = static_cast<uint32_t>(dereference_integer(value));

          
@@ 1029,8 1033,8 @@ void Assembler::dump_gba_symbols(const s
 	std::sort(symbol_list.begin(), symbol_list.end());
 
 	// generate output in memory
-	std::wstringstream ss;
-	ss << std::setfill(L'0') << std::hex;
+	std::stringstream ss;
+	ss << std::setfill('0') << std::hex;
 
 	// generate symbol list
 	std::set<uint32_t> used_values;

          
@@ 1043,9 1047,9 @@ void Assembler::dump_gba_symbols(const s
 		uint64_t name = symbol.name;
 		if (used_names.find(name) != used_names.end()) {
 			// symbol exists, try combining the index in the name
-			std::wstring name_with_index(_strings.get(name));
-			name_with_index.append(1, L'_');
-			name_with_index.append(std::to_wstring(symbol.index));
+			std::string name_with_index(_strings.get(name));
+			name_with_index.append(1, '_');
+			name_with_index.append(std::to_string(symbol.index));
 			name = murmur_hash3_string_x64_64(name_with_index);
 			// give up if still colliding
 			if (used_names.find(name) != used_names.end())

          
@@ 1056,12 1060,11 @@ void Assembler::dump_gba_symbols(const s
 		used_values.insert(symbol.addr);
 		used_names.insert(name);
 
-		const std::wstring_view name_str = _strings.get(name);
-		ss << std::setw(4) << (symbol.addr >> 16) << L":" << (symbol.addr & 0xffff) << std::setw(0) << L" " << name_str << L'\n';
+		const std::string_view name_str = _strings.get(name);
+		ss << std::setw(4) << (symbol.addr >> 16) << ":" << (symbol.addr & 0xffff) << std::setw(0) << " " << name_str << '\n';
 	}
 
-	// convert to utf8
-	std::string utf8 = convert_wide_to_utf8(ss.str());
+	std::string utf8 = ss.str();
 
 	// write to disk
 	FileWriter wr;

          
@@ 1074,15 1077,15 @@ void Assembler::recurse_print_sections(c
 	int32_t length = section.section_type == SectionType::Code ? static_cast<int32_t>(section.generated_data().size()) : section.bss_length;
 
 	for(int i = 0; i < indent; ++i) {
-		info() << L"  ";
+		info() << "  ";
 	}
-	info() << std::setfill(L'0');
-	info() << L"$" << std::setw(4) << section.start_address << std::setw(0) << L" - ";
-	info() << L"$" << std::setw(4) << section.start_address + length << std::setw(0);
-	info() << L" ($" << std::setw(4) << length << std::setw(0) << L") ";
-	info() << std::setfill(L' ');
+	info() << std::setfill('0');
+	info() << "$" << std::setw(4) << section.start_address << std::setw(0) << " - ";
+	info() << "$" << std::setw(4) << section.start_address + length << std::setw(0);
+	info() << " ($" << std::setw(4) << length << std::setw(0) << ") ";
+	info() << std::setfill(' ');
 	info() << std::setw(4) << to_string(section.section_type) << std::setw(0);
-	info() << L": " << _strings.get(section.name) << L'\n';
+	info() << ": " << _strings.get(section.name) << '\n';
 
 	for(const Section &child: section.children)
 		recurse_print_sections(child, indent + 1);

          
@@ 1095,28 1098,40 @@ void Assembler::print_sections()
 	for(const Section &section : _sections)
 		recurse_print_sections(section, 0);
 
-	info() << std::setfill(L' ') << std::dec;
+	info() << std::setfill(' ') << std::dec;
+}
+
+void Assembler::verify_sections_recursive(const Section &section)
+{
+	for(const Section &child : section.children) {
+		verify_sections_recursive(child);
+	}
+
+	if (!section.has_end) {
+		return;
+	}
+
+	int32_t length;
+	if (section.section_type == SectionType::Bss) {
+		length = section.bss_length;
+	} else {
+		length = static_cast<int32_t>(section.generated_data().size());
+	}
+
+	int32_t section_end = section.start_address + length;
+
+	if (section_end > section.end_address) {
+		std::stringstream ss;
+		ss << "Section " << _strings.get(section.name) << " exceeds its max size. Section is " << length << " B and the maximum size is " << section.end_address - section.start_address << " B.";
+		report_fatal_error(section.source_location, AssemblyErrorCodes::SectionDataOverflow, ss.str());
+	}
+	
 }
 
 void Assembler::verify_sections()
 {
 	for(const Section &section : _sections) {
-		if (!section.has_end)
-			continue;
-
-		int32_t length;
-		if (section.section_type == SectionType::Bss)
-			length = section.bss_length;
-		else
-			length = static_cast<int32_t>(section.generated_data().size());
-
-		int32_t section_end = section.start_address + length;
-
-		if (section_end > section.end_address) {
-			std::wstringstream ss;
-			ss << L"Section " << _strings.get(section.name) << L" exceeds its max size. Section is " << length << " B and the maximum size is " << section.end_address - section.start_address << L" B.";
-			report_fatal_error(section.source_location, AssemblyErrorCodes::SectionDataOverflow, ss.str());
-		}
+		verify_sections_recursive(section);
 	}
 }
 

          
@@ 1134,7 1149,7 @@ void Assembler::assemble()
 
 	int pass = 1;
 	while (true) {
-		TimerScope timer(L"Assemble pass");
+		TimerScope timer("Assemble pass");
 		prepare_next_assembly_pass();
 		constexpr bool generate_code = false;
 		run_assembly_pass(generate_code, pass);

          
@@ 1147,26 1162,26 @@ void Assembler::assemble()
 
 	if (_oscillating_state) {
 		error() <<
-			L"The variable state doesn't stabilize!\n"
-			L"This may indicate that the source code contains conflicting conditional code.\n"
-			L"Try disabling the latest changes in conditional blocks or code referring to\n"
-			L"conditional blocks to see what triggers this condition. Unfortunately it is\n"
-			L"not possible right now to get detailed information about exactly where the\n"
-			L"problem is.\n";
+			"The variable state doesn't stabilize!\n"
+			"This may indicate that the source code contains conflicting conditional code.\n"
+			"Try disabling the latest changes in conditional blocks or code referring to\n"
+			"conditional blocks to see what triggers this condition. Unfortunately it is\n"
+			"not possible right now to get detailed information about exactly where the\n"
+			"problem is.\n";
 
-		std::wstringstream ss;
-		ss << L"Differing symbols in last two passes:\n";
+		std::stringstream ss;
+		ss << "Differing symbols in last two passes:\n";
 		create_difference_report(ss);
 		debug() << ss.str();
 	}
 
 	{
-		TimerScope timer(L"Generate pass");
+		TimerScope timer("Generate pass");
 		prepare_next_assembly_pass();
 		constexpr bool generate_code = true;
 		run_assembly_pass(generate_code, pass);
 		if (_num_errors != 0 || _oscillating_state)
-			throw AssemblyException(L"Assembly ended with errors.");
+			throw AssemblyException("Assembly ended with errors.");
 	}
 
 	std::sort(_sections.begin(), _sections.end());

          
M jasm/assembling/assembler_impl/assembler_impl.h +66 -52
@@ 14,6 14,7 @@ 
 #include <parsing/syntax_parser.h>
 #include <set>
 #include <strings/string_conversions.h>
+#include <strings/string_locale.h>
 #include <strings/string_repository.h>
 #include <sstream>
 

          
@@ 45,14 46,14 @@ constexpr uint32_t max_call_depth = 100;
 class Assembler
 {
 public:
-	Assembler(bool multiple_output_files, bool multi_bank_mode
+	Assembler(bool multiple_output_files, bool multi_bank_mode, bool pseudo_instructions
 			, const std::vector<TokenChain> &syntax, StringRepository &strings
-			, const HashArrayRepository &hash_arrays, const std::vector<std::wstring> &used_files
-			, const std::vector<std::pair<std::wstring, bool>> &predefined_booleans
-			, const std::vector<std::pair<std::wstring, int32_t>> &predefined_integers
-			, const std::vector<std::pair<std::wstring, std::wstring>> &predefined_strings
+			, const HashArrayRepository &hash_arrays, const std::vector<std::string> &used_files
+			, const std::vector<std::pair<std::string, bool>> &predefined_booleans
+			, const std::vector<std::pair<std::string, int32_t>> &predefined_integers
+			, const std::vector<std::pair<std::string, std::string>> &predefined_strings
 			, DataReader &data_reader, int32_t max_errors
-			, const std::wstring &symbol_dump_file, const std::wstring &vice_dump_file, const std::wstring &gba_dump_file
+			, const std::string &symbol_dump_file, const std::string &vice_dump_file, const std::string &gba_dump_file
 			, std::vector<Section> &output);
 
 	Assembler &operator=(const Assembler &other) = delete;

          
@@ 213,7 214,7 @@ private:
 
 	inline bool is_integer(const Value &value) const
 	{
-		if (value.type == ValueType::IntegerValue || is_member_of(value.type, value_group_offset) || value.type == ValueType::RangeValue)
+		if (value.type == ValueType::IntegerValue || is_member_of(value.type, value_group_offset) || value.type == ValueType::SubroutineValue)
 			return true;
 
 		if (!value.is_value_reference())

          
@@ 222,7 223,7 @@ private:
 		// value reference type must follow the reference and check its type
 		const Value &referred_value = follow_reference(value);
 
-		return referred_value.type == ValueType::IntegerValue || is_member_of(referred_value.type, value_group_offset) || referred_value.type == ValueType::RangeValue;
+		return referred_value.type == ValueType::IntegerValue || is_member_of(referred_value.type, value_group_offset) || referred_value.type == ValueType::SubroutineValue;
 	}
 
 	inline bool is_float(const Value &value) const

          
@@ 244,7 245,7 @@ private:
 		if (value.type == ValueType::IntegerValue
 			|| is_member_of(value.type, value_group_offset)
 			|| value.type == ValueType::FloatValue
-			|| value.type == ValueType::RangeValue)
+			|| value.type == ValueType::SubroutineValue)
 			return true;
 
 		if (!value.is_value_reference())

          
@@ 257,7 258,7 @@ private:
 			referred_value.type == ValueType::IntegerValue
 			|| is_member_of(referred_value.type, value_group_offset)
 			|| referred_value.type == ValueType::FloatValue
-			|| referred_value.type == ValueType::RangeValue;
+			|| referred_value.type == ValueType::SubroutineValue;
 	}
 
 	inline bool is_string(const Value &value) const

          
@@ 354,7 355,7 @@ private:
 	static uint32_t leftmost_node_in_expression(const ExpressionComponent components[], uint32_t value_index);
 
 	/// Throw an error telling that the type of expression is wrong or generate an unknown value.
-	void generate_value_type_error(bool generate, const wchar_t *expected_type, Value &result, const ValueVector &expression_values, const ExpressionComponent components[], uint32_t value_index);
+	void generate_value_type_error(bool generate, const char *expected_type, Value &result, const ValueVector &expression_values, const ExpressionComponent components[], uint32_t value_index);
 	/// Throw an error telling that a result became infinite or generate an unknown value.
 	void generate_infinite_value_error(bool generate, Value &result, const ExpressionComponent components[], uint32_t value_index);
 

          
@@ 369,7 370,7 @@ private:
 	double dereference_float(const Value &value) const;
 	/// Return the char pointer value of a string value. The value argument
 	/// must be a string value or reference to an string value.
-	std::wstring_view dereference_string(const Value &value) const;
+	std::string_view dereference_string(const Value &value) const;
 	/// Return the string hash of a string value. The value argument
 	/// must be a string value or reference to an string value.
 	uint64_t dereference_string_hash(const Value &value) const;

          
@@ 382,15 383,17 @@ private:
 	void set_integer(Value &result, int32_t value) const;
 	void set_float(Value &result, double value) const;
 	void set_string(Value &result, uint64_t value) const;
-	void set_string(Value &result, const std::wstring_view &value) const;
-	void set_string(Value &result, const std::wstring &value) const;
-	void set_string(Value &result, std::wstring &&value) const;
-	void set_range(Value &result, int32_t value, int32_t size) const;
+	void set_string(Value &result, const std::string_view &value) const;
+	void set_string(Value &result, const std::string &value) const;
+	void set_string(Value &result, std::string &&value) const;
+	void set_subroutine(Value &result, int32_t value, int32_t size) const;
 	void set_function(Value &result, FunctionType f) const;
 	void set_method(Value &result, MethodType method, const Value &object) const;
 	void set_list(Value &result) const;
 	void set_list_element_reference(Value &result, StorageType storage_type, const Value &list, int32_t index) const;
 	void set_map(Value &result) const;
+	void set_byte_offset(Value &result, int32_t offset_base, int32_t offset);
+	void set_word_offset(Value &result, int32_t offset_base, int32_t offset);
 	/// @}
 
 	using FloatFunction1 = double (*)(double a);

          
@@ 399,7 402,7 @@ private:
 	using BooleanOperation = bool (*)(bool a, bool b);
 	using IntegerCompareOperation = bool (*)(int32_t a, int32_t b);
 	using FloatCompareOperation = bool (*)(double a, double b);
-	using StringCompareOperation = bool (*)(const std::wstring_view &a, const std::wstring_view &b);
+	using StringCompareOperation = bool (*)(const std::string_view &a, const std::string_view &b);
 
 	/// Get left value, result and the index of the applied property. Returns true if successful or false (with result set to unknown) if it to find the property.
 	bool get_operator_period_property_index(bool generate, const ExpressionComponent components[], Value &result, const Value &arg1, uint32_t next_index, uint32_t &key_index);

          
@@ 445,8 448,8 @@ private:
 			// value is out of range
 			if (generate) {
 				const ExpressionComponent &ec = components[leftmost_node_in_expression(components, value_index)];
-				std::wstringstream ss;
-				ss << L"Value " << value << L" is outside range for cast. Value must be in range [" << min_range << L", " << max_range << ").";
+				std::stringstream ss;
+				ss << "Value " << value << " is outside range for cast. Value must be in range [" << min_range << ", " << max_range << ").";
 				report_error(ec.source_location, AssemblyErrorCodes::ValueOutOfRangeForCast, ss.str());
 			}
 			set_unknown(result);

          
@@ 552,6 555,7 @@ private:
 	void operator_array_offset_access(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &arg1, const Value &arg2);
 	void operator_string_array_access(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &arg1, const Value &arg2);
 	void operator_list_array_access(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &arg1_ref, Value &arg1, const Value &arg2);
+	void operator_subroutine_call(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &arg1, uint32_t next_index);
 	void operator_function_call(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &arg1, uint32_t next_index);
 	void operator_method_closure_call(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &arg1, uint32_t next_index);
 	void operator_byte_conversion(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &arg1, uint32_t next_index);

          
@@ 564,7 568,8 @@ private:
 	void operator_list_add(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &arg1, const Value &arg2);
 	void operator_list_assignment_add(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &ref, Value &arg1, const Value &arg2);
 	void operator_map_period(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &arg1, uint32_t next_index);
-
+	void operator_word_offset_period(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &arg1, uint32_t next_index);
+	
 	/// Returns a member function pointer that contains a union of member function pointers.
 	static Function function_pointer(FunctionType type);
 

          
@@ 584,6 589,7 @@ private:
 	void function_pow(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1, uint32_t arg2);
 	void function_log(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1);
 	void function_log10(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1);
+	void function_logn(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1, uint32_t arg2);
 	void function_exp(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1);
 	void function_abs(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1);
 	void function_floor(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1);

          
@@ 602,6 608,8 @@ private:
 	void function_string(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1);
 	void function_hexstring(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1);
 	void function_unicode(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1);
+	void function_uppercase(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1);
+	void function_lowercase(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1);
 	void function_list(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op);
 	void function_map(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op);
 	void function_static_assert(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1, uint32_t arg2);

          
@@ 623,7 631,6 @@ private:
 			RIGHT,
 		};
 
-
 		enum class NumberFormat
 		{
 			OFF,

          
@@ 650,15 657,15 @@ private:
 	/// @param format_string A pointer to the character after the initial curly bracket. This will be modified to point after the closing bracket if parsing was successful.
 	/// @param format Will be updated with the parsing information if parsing was successful.
 	/// @return True if the parsing was successful.
-	bool parse_argument_format(bool generate, const SourceLocation &format_location, std::wstring_view &format_string, ArgFormat &format);
+	bool parse_argument_format(bool generate, const SourceLocation &format_location, std::string_view &format_string, ArgFormat &format);
 
 	/// Generate a string based on the specified format.
 	/// @param depth Recursion depth to stop recursion because objects can contain references to themselves.
-	bool argument_to_string(bool generate, const SourceLocation &format_location, const ArgFormat &format, const Value &arg_value, std::wstringstream &arg_string, uint32_t depth = 0);
+	bool argument_to_string(bool generate, const SourceLocation &format_location, const ArgFormat &format, const Value &arg_value, std::stringstream &arg_string, uint32_t depth = 0);
 
 	/// Parse the arguments for the format or print functions.
 	/// @return True if successful and @a result will contain the formatted string.
-	bool parse_format_arguments(bool generate, const ValueVector &expression_values, const ExpressionComponent components[], uint32_t arg1, std::wstring &result);
+	bool parse_format_arguments(bool generate, const ValueVector &expression_values, const ExpressionComponent components[], uint32_t arg1, std::string &result);
 
 	/// @}
 

          
@@ 714,10 721,10 @@ private:
 	/// @param hash Will be set to the symbol hash, if correctly formatted.
 	/// @param global Will be set according to symbol type, if correctly formatted.
 	/// @return True if the string is correctly formatted.
-	static bool parse_symbol_string(const std::wstring_view &symbol, uint64_t &hash, bool &global);
+	bool parse_symbol_string(const std::string_view &symbol, uint64_t &hash, bool &global);
 
 	/// Store a symbol name in the string repository, taking away the initial '.' on a local symbol before.
-	void store_symbol_string_in_repository(uint64_t hash, const std::wstring_view &symbol);
+	void store_symbol_string_in_repository(uint64_t hash, const std::string_view &symbol);
 
 	/// Store a symbol in the current pass using a prehashed symbol name (with namespace or instance hash).
 	void store_symbol_value(uint64_t symbol_hash, const Value &value, bool global, bool is_address, StorageType type);

          
@@ 907,6 914,10 @@ private:
 	const SyntaxToken *parse_section_map(bool generate, const SyntaxToken *t);
 	const SyntaxToken *parse_declare(bool generate, const SyntaxToken *t);
 	const SyntaxToken *parse_declaration(bool generate, const SyntaxToken *t, bool export_enabled);
+	#if SUPPORTS(M6502)
+		void generate_instruction_data_label(bool generate, bool export_enabled, const InstructionToken &token, int address, int offset, uint8_t size);
+	#endif
+	void generate_subroutine_instruction(bool generate, int32_t address, const SourceLocation &source_location);
 	const SyntaxToken *parse_instruction(bool generate, const SyntaxToken *t, bool export_enabled);
 	const SyntaxToken *parse_reserve(bool generate, const SyntaxToken *t, bool export_enabled);
 	const SyntaxToken *parse_statement(bool generate, const SyntaxToken *t, bool &early_return);

          
@@ 929,24 940,24 @@ private:
 	const SyntaxToken *parse_incbin(bool generate, const SyntaxToken *t);
 
 	/// Get the variable name for a global or local variable.
-	inline std::wstring variable_name(uint64_t symbol_hash, bool global)
+	inline std::string variable_name(uint64_t symbol_hash, bool global)
 	{
 		if (global)
-			return std::wstring(_strings.get(symbol_hash));
+			return std::string(_strings.get(symbol_hash));
 		else
-			return L"." + std::wstring(_strings.get(symbol_hash));
+			return "." + std::string(_strings.get(symbol_hash));
 	}
 
-	static inline std::wstring instance_path(const SourceLocation &source_location)
+	static inline std::string instance_path(const SourceLocation &source_location)
 	{
-		return std::to_wstring(source_location.file_index) + L"/"
-			 + std::to_wstring(source_location.row) + L"/"
-			 + std::to_wstring(source_location.column);
+		return std::to_string(source_location.file_index) + "/"
+			 + std::to_string(source_location.row) + "/"
+			 + std::to_string(source_location.column);
 	}
 
 	/// Combine a name with a parent and store the combined name for lookup using the combined hash.
 	/// This is used to make a reverse lookup table from hash to symbol names for the symbol dump.
-	void combine_and_store_hash_name(uint64_t parent_hash, uint64_t combined_hash, const std::wstring &child_name);
+	void combine_and_store_hash_name(uint64_t parent_hash, uint64_t combined_hash, const std::string &child_name);
 
 	/// Store a relation between a combined name and a symbol name for lookup using the combined hash.
 	/// This is used to find simple symbol names for the VICE symbol dump so only address label symbols

          
@@ 957,22 968,22 @@ private:
 	}
 
 	/// Log a warning.
-	void report_warning(const SourceLocation &location, AssemblyErrorCodes error_code, const std::wstring &msg);
+	void report_warning(const SourceLocation &location, AssemblyErrorCodes error_code, const std::string &msg);
 	/// Log an error and throw an exception if max number of errors have been printed.
-	void report_error(const SourceLocation &location, AssemblyErrorCodes error_code, const std::wstring &msg)
+	void report_error(const SourceLocation &location, AssemblyErrorCodes error_code, const std::string &msg)
 	{
 		report_error(location, error_code, msg, false);
 	}
 	/// Log a fatal error and throw an exception immediately.
-	void report_fatal_error(const SourceLocation &location, AssemblyErrorCodes error_code, const std::wstring &msg)
+	void report_fatal_error(const SourceLocation &location, AssemblyErrorCodes error_code, const std::string &msg)
 	{
 		report_error(location, error_code, msg, true);
 	}
 	/// Log any type of error.
-	void report_error(const SourceLocation &location, AssemblyErrorCodes error_code, const std::wstring &msg, bool fatal);
+	void report_error(const SourceLocation &location, AssemblyErrorCodes error_code, const std::string &msg, bool fatal);
 
 	/// Create a report of the difference between current and previous pass.
-	void create_difference_report(std::wstringstream &ss);
+	void create_difference_report(std::stringstream &ss);
 
 	/// Move state from current pass to previous pass.
 	void prepare_next_assembly_pass();

          
@@ 982,11 993,11 @@ private:
 	bool progress_was_made();
 
 	/// Dump all symbol information to a file.
-	void dump_symbols(const std::wstring &filename);
+	void dump_symbols(const std::string &filename);
 	/// Dump all symbol information to a VICE compatible file.
-	void dump_vice_symbols(const std::wstring &filename);
+	void dump_vice_symbols(const std::string &filename);
 	/// Dump all symbol information to a No$GBA compatible file.
-	void dump_gba_symbols(const std::wstring &filename);
+	void dump_gba_symbols(const std::string &filename);
 
 	/// Recursively collect and store the section additional parts sizes for the next assembly pass.
 	void collect_section_sizes(const std::vector<Section> &sections);

          
@@ 1004,6 1015,7 @@ private:
 	void print_sections();
 	/// Verify section sizes.
 	void verify_sections();
+	void verify_sections_recursive(const Section &section);
 	/// Remove the bss sections to only leave the ones producing output.
 	void cleanup_sections();
 

          
@@ 1027,20 1039,22 @@ private:
 	// input data
 	bool _multiple_output_files; ///< When true, write one file per section. Otherwise merge them together.
 	bool _multi_bank_mode; ///< When true, the assembler will truncate addresses in instructions.
+	bool _pseudo_instructions; ///< When true, some extra instructions or addressing modes can be added to simplify programming.
 	const std::vector<TokenChain> &_input; ///< The syntax token stream to run assemble passes on.
 	StringRepository &_strings; ///< The lookup table from uint64_t to strings.
 	const HashArrayRepository &_hash_arrays; ///< The lookup table from uint64_t handles to uint64_t arrays.
-	const std::vector<std::wstring> &_used_files; ///< Filenames of assembler files to be able to print them.
-	const std::vector<std::pair<std::wstring, bool>> &_predefined_booleans;
-	const std::vector<std::pair<std::wstring, int32_t>> &_predefined_integers;
-	const std::vector<std::pair<std::wstring, std::wstring>> &_predefined_strings;
+	const std::vector<std::string> &_used_files; ///< Filenames of assembler files to be able to print them.
+	const std::vector<std::pair<std::string, bool>> &_predefined_booleans;
+	const std::vector<std::pair<std::string, int32_t>> &_predefined_integers;
+	const std::vector<std::pair<std::string, std::string>> &_predefined_strings;
 	DataReader &_data_reader; ///< This handles reading binary files.
 	int32_t _max_errors; ///< Max number of errors before aborting assembly generation pass.
-	const std::wstring _symbol_dump_file; ///< The filename of the symbol dump.
-	const std::wstring _vice_dump_file; ///< The filename of the vice symbol dump.
-	const std::wstring _gba_dump_file; ///< The filename of the No$GBA style symbol dump.
+	const std::string _symbol_dump_file; ///< The filename of the symbol dump.
+	const std::string _vice_dump_file; ///< The filename of the vice symbol dump.
+	const std::string _gba_dump_file; ///< The filename of the No$GBA style symbol dump.
 	bool _dump_symbols; ///< True if symbol dump is enabled.
 
+	StringLocale _string_locale; ///< Converts locale names to names to be used in std::locale.
 	StringConversions _string_conversions; ///< Converts strings to platform specific formats. 
 	TokenReader _input_reader; ///< Reads the syntax input tokens.
 

          
@@ 1066,7 1080,7 @@ private:
 	uint64_t _static_float_type;
 	uint64_t _static_string_type;
 	uint64_t _static_string_reference_type;
-	uint64_t _static_range_type;
+	uint64_t _static_subroutine_type;
 	uint64_t _static_value_reference_type;
 	uint64_t _static_byte_offset_type;
 	uint64_t _static_word_offset_type;

          
@@ 1092,7 1106,7 @@ private:
 	core::HashMap<uint64_t, int32_t, core::NullHashCompare<uint64_t>> _previous_pass_section_part_size; ///< This stores the extra size added to outer sections (by name hash) by section parts. This is needed to be able to adjust the program counter after each outer section.
 
 	std::vector<uint64_t> _temp_namespace_list; ///< When combining namespaces with using, this is used temporarily.
-	std::vector<wchar_t> _temp_string_combine; ///< When combining symbol strings this is used to avoid reallocations.
+	std::vector<char> _temp_string_combine; ///< When combining symbol strings this is used to avoid reallocations.
 
 	uint32_t _call_depth; ///< Depth of, for now, macro calls to detect never ending recursion.
 	uint32_t _data_generation_depth; ///< Depth of data generating code. This must never be higher than 1 because it is possible that the assembler breaks down if something generating code calls something that generates code.

          
@@ 1102,7 1116,7 @@ private:
 	std::set<SourceLocation> _reported_error_locations; ///< Keep track of reported error positions to avoid duplicated errors in a loop construction.
 	int32_t _num_errors; ///< Errors encountered in the generation pass so far.
 
-	using SymbolNameMap = core::HashMap<uint64_t, std::wstring, core::NullHashCompare<uint64_t>>;
+	using SymbolNameMap = core::HashMap<uint64_t, std::string, core::NullHashCompare<uint64_t>>;
 	using SymbolHashNameMap = core::HashMap<uint64_t, uint64_t, core::NullHashCompare<uint64_t>>; // map from combined hash to symbol hash
 	SymbolNameMap _symbol_names; ///< Lookup from hash to symbol name in case symbol output is required.
 	SymbolHashNameMap _symbol_hash_names; ///< Lookup from combined hash to symbol hash. This is used to obtain the short name of symbols for the VICE symbol dump.

          
M jasm/assembling/assembler_impl/expressions_impl.cpp +13 -13
@@ 74,8 74,8 @@ void Assembler::evaluate_operator(bool g
 	if (operator_func.empty()) {
 		if (generate) {
 			// no operator for type is an error in the generation phase
-			std::wstringstream ss;
-			ss << L"Operator " << to_string(root.operator_type) << L" is not defined for left hand side " << to_string(left_value->type) << L" type.";
+			std::stringstream ss;
+			ss << "Operator " << to_string(root.operator_type) << " is not defined for left hand side " << to_string(left_value->type) << " type.";
 			report_error(root.source_location, AssemblyErrorCodes::OperatorNotSupportingType, ss.str());
 		}
 		// no operator for type returns an error type in non-generation phases

          
@@ 91,8 91,8 @@ void Assembler::evaluate_operator(bool g
 		bool allowed_to_be_modified = is_mutable(left_ref_value) || left_value->is_module_import();
 		if (!allowed_to_be_modified) {
 			if (generate) {
-				std::wstringstream ss;
-				ss << L"Cannot modify constant value.";
+				std::stringstream ss;
+				ss << "Cannot modify constant value.";
 				report_error(root.source_location, AssemblyErrorCodes::CannotModifyConstant, ss.str());
 			}
 			set_unknown(result);

          
@@ 248,8 248,8 @@ void Assembler::evaluate_expression_with
 
 	// report later than the expression evaluation because errors occurring on the same line are hiding each other
 	if (!has_side_effect) {
-		std::wstringstream ss;
-		ss << L"Expression must have side effect. If this seems confusing, the row before may lack a semicolon to end the statement or expression.";
+		std::stringstream ss;
+		ss << "Expression must have side effect. If this seems confusing, the row before may lack a semicolon to end the statement or expression.";
 		report_error(expression_header->source_location, AssemblyErrorCodes::StatementHasNoSideEffect, ss.str());
 	}
 }

          
@@ 270,8 270,8 @@ int32_t Assembler::evaluate_integer_expr
 		argument_value = dereference_integer(argument);
 	} else {
 		if (generate) {
-			std::wstringstream ss;
-			ss << L"Addressing mode needs an integer value. Argument type was " << to_string(type_of_value(argument)) << L".";
+			std::stringstream ss;
+			ss << "Addressing mode needs an integer value. Argument type was " << to_string(type_of_value(argument)) << ".";
 			report_error(expr->source_location, AssemblyErrorCodes::AddressingModeRequiresIntegerArgument, ss.str());
 		}
 	}

          
@@ 285,15 285,15 @@ uint32_t Assembler::leftmost_node_in_exp
 	return value_index;
 }
 
-void Assembler::generate_value_type_error(bool generate, const wchar_t *expected_type, Value &result, const ValueVector &expression_values, const ExpressionComponent components[], uint32_t value_index)
+void Assembler::generate_value_type_error(bool generate, const char *expected_type, Value &result, const ValueVector &expression_values, const ExpressionComponent components[], uint32_t value_index)
 {
 	if (generate) {
 		auto leftmost_index = leftmost_node_in_expression(components, value_index);
 
 		const ExpressionComponent &ec = components[leftmost_index];
 		const Value &value = follow_reference_or_value(expression_values[value_index]);
-		std::wstringstream ss;
-		ss << L"Expected " << expected_type << " but got " << to_string(value.type) << L".";
+		std::stringstream ss;
+		ss << "Expected " << expected_type << " but got " << to_string(value.type) << ".";
 		report_error(ec.source_location, AssemblyErrorCodes::UnexpectedValueType, ss.str());
 	}
 	set_unknown(result);

          
@@ 305,8 305,8 @@ void Assembler::generate_infinite_value_
 		auto leftmost_index = leftmost_node_in_expression(components, value_index);
 
 		const ExpressionComponent &ec = components[leftmost_index];
-		std::wstringstream ss;
-		ss << L"An infinite value was created.";
+		std::stringstream ss;
+		ss << "An infinite value was created.";
 		report_error(ec.source_location, AssemblyErrorCodes::InfiniteValueWasCreated, ss.str());
 	}
 	set_unknown(result);

          
M jasm/assembling/assembler_impl/functions_impl.cpp +302 -145
@@ 4,7 4,9 @@ 
 #include <assembling/functions.h>
 #include <cmath>
 #include <core/environment/log.h>
+#include <core/math/sign.h>
 #include <core/strings/string_helpers.h>
+#include <core/strings/utf8.h>
 #include <iomanip>
 #include <ios>
 

          
@@ 31,6 33,7 @@ Function Assembler::function_pointer(Fun
 		{ &Assembler::function_pow },
 		{ &Assembler::function_log },
 		{ &Assembler::function_log10 },
+		{ &Assembler::function_logn },
 		{ &Assembler::function_exp },
 		{ &Assembler::function_abs },
 		{ &Assembler::function_floor },

          
@@ 49,6 52,8 @@ Function Assembler::function_pointer(Fun
 		{ &Assembler::function_string },
 		{ &Assembler::function_hexstring },
 		{ &Assembler::function_unicode },
+		{ &Assembler::function_uppercase },
+		{ &Assembler::function_lowercase },
 		{ &Assembler::function_list },
 		{ &Assembler::function_map },
 		{ &Assembler::function_static_assert },

          
@@ 67,7 72,7 @@ void Assembler::apply_float_function_wit
 	Value &result = expression_values[op];
 	const Value &value = follow_reference_or_value(expression_values[arg]);
 	if (!is_numeric(value)) {
-		generate_value_type_error(generate, L"numeric type", result, expression_values, components, arg);
+		generate_value_type_error(generate, "numeric type", result, expression_values, components, arg);
 		return;
 	}
 

          
@@ 84,11 89,11 @@ void Assembler::apply_float_function_wit
 	const Value &value1 = follow_reference_or_value(expression_values[arg1]);
 	const Value &value2 = follow_reference_or_value(expression_values[arg2]);
 	if (!is_numeric(value1)) {
-		generate_value_type_error(generate, L"numeric type", result, expression_values, components, arg1);
+		generate_value_type_error(generate, "numeric type", result, expression_values, components, arg1);
 		return;
 	}
 	if (!is_numeric(value2)) {
-		generate_value_type_error(generate, L"numeric type", result, expression_values, components, arg2);
+		generate_value_type_error(generate, "numeric type", result, expression_values, components, arg2);
 		return;
 	}
 

          
@@ 103,13 108,13 @@ void Assembler::function_sizeof(bool gen
 {
 	Value &result = expression_values[op];
 	const Value &v1 = follow_reference_or_value(expression_values[arg1]);
-	bool has_size = is_offset(v1) || v1.type == ValueType::RangeValue;
+	bool has_size = is_offset(v1) || v1.type == ValueType::SubroutineValue;
 	if (!has_size) {
-		generate_value_type_error(generate, L"offset or range type", result, expression_values, components, arg1);
+		generate_value_type_error(generate, "offset or range type", result, expression_values, components, arg1);
 		return;
 	}
 
-	if (v1.type == ValueType::RangeValue) {
+	if (v1.type == ValueType::SubroutineValue) {
 		set_integer(result, v1.range_size);
 	} else {
 		// lookup type and get size

          
@@ 123,7 128,7 @@ void Assembler::function_offsetof(bool g
 	Value &result = expression_values[op];
 	const Value &v1 = follow_reference_or_value(expression_values[arg1]);
 	if (!is_offset(v1)) {
-		generate_value_type_error(generate, L"offset type", result, expression_values, components, arg1);
+		generate_value_type_error(generate, "offset type", result, expression_values, components, arg1);
 		return;
 	}
 

          
@@ 214,6 219,12 @@ void Assembler::function_log10(bool gene
 	apply_float_function_with_validation(generate, expression_values, components, op, arg1, float_fn);
 }
 
+void Assembler::function_logn(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1, uint32_t arg2)
+{
+	auto float_fn = [](double a, double b) { return log(a) / log(b); };
+	apply_float_function_with_validation(generate, expression_values, components, op, arg1, arg2, float_fn);
+}
+
 void Assembler::function_exp(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1)
 {
 	auto float_fn = [](double a) { return exp(a); };

          
@@ 231,7 242,7 @@ void Assembler::function_abs(bool genera
 		double v = dereference_float(value1);
 		set_float(result, v < 0.0 ? -v : v);
 	} else {
-		generate_value_type_error(generate, L"numeric type", result, expression_values, components, arg1);
+		generate_value_type_error(generate, "numeric type", result, expression_values, components, arg1);
 	}
 }
 

          
@@ 259,11 270,11 @@ void Assembler::apply_integer_module_fun
 	const Value &value1 = follow_reference_or_value(expression_values[arg1]);
 	const Value &value2 = follow_reference_or_value(expression_values[arg2]);
 	if (!is_integer(value1)) {
-		generate_value_type_error(generate, L"integer type", result, expression_values, components, arg1);
+		generate_value_type_error(generate, "integer type", result, expression_values, components, arg1);
 		return;
 	}
 	if (!is_integer(value2)) {
-		generate_value_type_error(generate, L"integer type", result, expression_values, components, arg2);
+		generate_value_type_error(generate, "integer type", result, expression_values, components, arg2);
 		return;
 	}
 

          
@@ 273,8 284,8 @@ void Assembler::apply_integer_module_fun
 	if (b == 0) {
 		if (generate) {
 			const ExpressionComponent &operator_component = components[op];
-			std::wstringstream ss;
-			ss << L"Division by zero.";
+			std::stringstream ss;
+			ss << "Division by zero.";
 			report_error(operator_component.source_location, AssemblyErrorCodes::DivisionByZero, ss.str());
 		}
 		// the result is an error

          
@@ 319,7 330,7 @@ void Assembler::aggregate_numeric_args(b
 	Value &result = expression_values[op];
 	const Value &value1 = follow_reference_or_value(expression_values[arg1]);
 	if (!is_numeric(value1)) {
-		generate_value_type_error(generate, L"numeric type", result, expression_values, components, arg1);
+		generate_value_type_error(generate, "numeric type", result, expression_values, components, arg1);
 		return;
 	}
 

          
@@ 331,7 342,7 @@ void Assembler::aggregate_numeric_args(b
 	while (argument_index != 0) {
 		const Value &value_n = follow_reference_or_value(expression_values[argument_index]);
 		if (!is_numeric(value_n)) {
-			generate_value_type_error(generate, L"numeric type", result, expression_values, components, argument_index);
+			generate_value_type_error(generate, "numeric type", result, expression_values, components, argument_index);
 			return;
 		}
 

          
@@ 366,15 377,15 @@ void Assembler::function_clamp(bool gene
 	const Value &value2 = follow_reference_or_value(expression_values[arg2]);
 	const Value &value3 = follow_reference_or_value(expression_values[arg3]);
 	if (!is_numeric(value1)) {
-		generate_value_type_error(generate, L"numeric type", result, expression_values, components, arg1);
+		generate_value_type_error(generate, "numeric type", result, expression_values, components, arg1);
 		return;
 	}
 	if (!is_numeric(value2)) {
-		generate_value_type_error(generate, L"numeric type", result, expression_values, components, arg2);
+		generate_value_type_error(generate, "numeric type", result, expression_values, components, arg2);
 		return;
 	}
 	if (!is_numeric(value3)) {
-		generate_value_type_error(generate, L"numeric type", result, expression_values, components, arg3);
+		generate_value_type_error(generate, "numeric type", result, expression_values, components, arg3);
 		return;
 	}
 

          
@@ 401,8 412,8 @@ void Assembler::function_clamp(bool gene
 	// the range is inverted
 	if (generate) {
 		const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg2)];
-		std::wstringstream ss;
-		ss << L"Clamp range is inverted. The smaller number must come first.";
+		std::stringstream ss;
+		ss << "Clamp range is inverted. The smaller number must come first.";
 		report_error(ec.source_location, AssemblyErrorCodes::ClampRangeIsInverted, ss.str());
 	}
 	set_unknown(result);

          
@@ 415,15 426,15 @@ void Assembler::function_lerp(bool gener
 	const Value &value2 = follow_reference_or_value(expression_values[arg2]);
 	const Value &value3 = follow_reference_or_value(expression_values[arg3]);
 	if (!is_numeric(value1)) {
-		generate_value_type_error(generate, L"numeric type", result, expression_values, components, arg1);
+		generate_value_type_error(generate, "numeric type", result, expression_values, components, arg1);
 		return;
 	}
 	if (!is_numeric(value2)) {
-		generate_value_type_error(generate, L"numeric type", result, expression_values, components, arg2);
+		generate_value_type_error(generate, "numeric type", result, expression_values, components, arg2);
 		return;
 	}
 	if (!is_numeric(value3)) {
-		generate_value_type_error(generate, L"numeric type", result, expression_values, components, arg3);
+		generate_value_type_error(generate, "numeric type", result, expression_values, components, arg3);
 		return;
 	}
 

          
@@ 454,8 465,8 @@ void Assembler::function_int(bool genera
 
 	if (generate) {
 		const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg1)];
-		std::wstringstream ss;
-		ss << L"Integer cast from " << to_string(value1.type) << L" is not allowed.";
+		std::stringstream ss;
+		ss << "Integer cast from " << to_string(value1.type) << " is not allowed.";
 		report_error(ec.source_location, AssemblyErrorCodes::TypeCastNotAllowed, ss.str());
 	}
 	set_unknown(result);

          
@@ 473,8 484,8 @@ void Assembler::function_float(bool gene
 
 	if (generate) {
 		const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg1)];
-		std::wstringstream ss;
-		ss << L"Float cast from " << to_string(value1.type) << L" is not allowed.";
+		std::stringstream ss;
+		ss << "Float cast from " << to_string(value1.type) << " is not allowed.";
 		report_error(ec.source_location, AssemblyErrorCodes::TypeCastNotAllowed, ss.str());
 	}
 	set_unknown(result);

          
@@ 487,14 498,14 @@ void Assembler::function_string(bool gen
 
 	uint32_t arg2_index = components[arg1].next_sibling; // optional argument for string conversion
 
-	std::wstring unconverted_result;
+	std::string unconverted_result;
 
 	// first convert to a unicode string
 	if (is_integer(value1)) {
-		unconverted_result = std::to_wstring(dereference_integer(value1));
+		unconverted_result = std::to_string(dereference_integer(value1));
 
 	} else if (is_float(value1)) {
-		unconverted_result = std::to_wstring(dereference_float(value1));
+		unconverted_result = std::to_string(dereference_float(value1));
 
 	} else if (is_string(value1)) {
 		unconverted_result = dereference_string(value1);

          
@@ 502,8 513,8 @@ void Assembler::function_string(bool gen
 	} else {
 		if (generate) {
 			const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg1)];
-			std::wstringstream ss;
-			ss << L"String cast from " << to_string(value1.type) << L" is not allowed.";
+			std::stringstream ss;
+			ss << "String cast from " << to_string(value1.type) << " is not allowed.";
 			report_error(ec.source_location, AssemblyErrorCodes::TypeCastNotAllowed, ss.str());
 		}
 		set_unknown(result);

          
@@ 533,33 544,33 @@ void Assembler::function_string(bool gen
 			const Value &conversion_value = follow_reference_or_value(expression_values[current_arg_index]);
 			if (LIKELY(is_string(conversion_value))) {
 
-				const std::wstring_view format_string = dereference_string(conversion_value);
+				const std::string_view format_string = dereference_string(conversion_value);
 				bool duplicate = false;
-				const wchar_t *duplicate_property = nullptr;
+				const char *duplicate_property = nullptr;
 				if (_string_conversions.is_format(format_string, this_format)) {
 					format = this_format;
 					duplicate = has_format;
-					duplicate_property = L"Format";
+					duplicate_property = "Format";
 					has_format = true;
 				} else if (_string_conversions.is_subformat(format_string, this_subformat)) {
 					subformat = this_subformat;
 					duplicate = has_subformat;
-					duplicate_property = L"Subformat";
+					duplicate_property = "Subformat";
 					has_subformat = true;
 				} else if (_string_conversions.is_locale(format_string, this_locale)) {
 					locale = this_locale;
 					duplicate = has_locale;
-					duplicate_property = L"Locale";
+					duplicate_property = "Locale";
 					has_locale = true;
 				} else if (_string_conversions.is_flag(format_string, this_flag)) {
 					duplicate = (flags & this_flag) != 0;
-					duplicate_property = L"Flag";
+					duplicate_property = "Flag";
 					flags = static_cast<StringConversions::Flags>(flags | this_flag);
 				} else {
 					if (generate) {
 						const ExpressionComponent &ec = components[leftmost_node_in_expression(components, current_arg_index)];
-						std::wstringstream ss;
-						ss << L"Invalid string conversion format property '" << format_string << L"'.";
+						std::stringstream ss;
+						ss << "Invalid string conversion format property '" << format_string << "'.";
 						report_error(ec.source_location, AssemblyErrorCodes::InvalidStringConversionFormatProperty, ss.str());
 					}
 					valid_formats = false;

          
@@ 568,8 579,8 @@ void Assembler::function_string(bool gen
 				if (duplicate) {
 					if (generate) {
 						const ExpressionComponent &ec = components[leftmost_node_in_expression(components, current_arg_index)];
-						std::wstringstream ss;
-						ss << duplicate_property << L" property specified twice.";
+						std::stringstream ss;
+						ss << duplicate_property << " property specified twice.";
 						report_error(ec.source_location, AssemblyErrorCodes::AmbiguousStringConversionProperty, ss.str());
 					}
 					valid_formats = false;

          
@@ 578,8 589,8 @@ void Assembler::function_string(bool gen
 			} else {
 				if (generate) {
 					const ExpressionComponent &ec = components[leftmost_node_in_expression(components, current_arg_index)];
-					std::wstringstream ss;
-					ss << L"String conversion format expected but got " << to_string(conversion_value.type) << L".";
+					std::stringstream ss;
+					ss << "String conversion format expected but got " << to_string(conversion_value.type) << ".";
 					report_error(ec.source_location, AssemblyErrorCodes::StringConversionFormatExpected, ss.str());
 				}
 				valid_formats = false;

          
@@ 592,15 603,15 @@ void Assembler::function_string(bool gen
 		if (valid_formats) {
 			if (_string_conversions.has_conversion(format, subformat, locale)) {
 				size_t error_pos = 0;
-				std::wstring converted_result;
+				std::string converted_result;
 				if (_string_conversions.convert(unconverted_result, format, subformat, locale, flags, converted_result, error_pos)) {
 					set_string(result, converted_result);
 					return;
 				} else {
 					if (generate) {
 						const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg2_index)];
-						std::wstringstream ss;
-						ss << L"String conversion format failed at position " << error_pos << ". No conversion for character.";
+						std::stringstream ss;
+						ss << "String conversion format failed at position " << error_pos << ". No conversion for character.";
 						report_error(ec.source_location, AssemblyErrorCodes::StringConversionFailed, ss.str());
 					}
 					set_unknown(result);

          
@@ 610,8 621,8 @@ void Assembler::function_string(bool gen
 			} else {
 				if (generate) {
 					const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg2_index)];
-					std::wstringstream ss;
-					ss << L"Unsupported string conversion property combination.";
+					std::stringstream ss;
+					ss << "Unsupported string conversion property combination.";
 					report_error(ec.source_location, AssemblyErrorCodes::UnknownStringConversionFormatCombination, ss.str());
 				}
 				set_unknown(result);

          
@@ 638,8 649,8 @@ void Assembler::function_hexstring(bool 
 	} else {
 		if (generate) {
 			const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg1)];
-			std::wstringstream ss;
-			ss << L"String cast from " << to_string(value1.type) << L" is not allowed.";
+			std::stringstream ss;
+			ss << "String cast from " << to_string(value1.type) << " is not allowed.";
 			report_error(ec.source_location, AssemblyErrorCodes::TypeCastNotAllowed, ss.str());
 		}
 		set_unknown(result);

          
@@ 655,15 666,15 @@ void Assembler::function_unicode(bool ge
 		int value = dereference_integer(value1);
 
 		if (value >= 0 && value < (1 << 21)) {
-			std::wstring char_string;
-			char_string.push_back(static_cast<wchar_t>(value));
+			std::string char_string;
+			core::wide_to_utf8(static_cast<wchar_t>(value), char_string);
 			set_string(result, std::move(char_string));
 
 		} else {
 			if (generate) {
 				const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg1)];
-				std::wstringstream ss;
-				ss << L"Unicode character " << value << L" is outside valid range [0..2097151].";
+				std::stringstream ss;
+				ss << "Unicode character " << value << " is outside valid range [0..2097151].";
 				report_error(ec.source_location, AssemblyErrorCodes::TypeCastNotAllowed, ss.str());
 			}
 			set_unknown(result);

          
@@ 672,14 683,160 @@ void Assembler::function_unicode(bool ge
 	} else {
 		if (generate) {
 			const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg1)];
-			std::wstringstream ss;
-			ss << L"String cast from " << to_string(value1.type) << L" is not allowed.";
+			std::stringstream ss;
+			ss << "String cast from " << to_string(value1.type) << " is not allowed.";
 			report_error(ec.source_location, AssemblyErrorCodes::TypeCastNotAllowed, ss.str());
 		}
 		set_unknown(result);
 	}
 }
 
+void Assembler::function_uppercase(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1)
+{
+	Value &result = expression_values[op];
+	const Value &value1 = follow_reference_or_value(expression_values[arg1]);
+
+	StringLocale::Locale locale_type = StringLocale::Locale::Default;
+	uint32_t arg2_index = components[arg1].next_sibling; // optional argument for string conversion
+	if (arg2_index != 0) {
+		uint32_t arg3_index = components[arg2_index].next_sibling;
+		if (arg3_index != 0) {
+			if (generate) {
+				const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg3_index)];
+				std::stringstream ss;
+				ss << "Too many arguments to function.";
+				report_error(ec.source_location, AssemblyErrorCodes::TooManyArguments, ss.str());
+			}
+		}
+		
+		const Value &value2 = follow_reference_or_value(expression_values[arg2_index]);
+		if (!is_string(value2)) {
+			if (generate) {
+				const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg2_index)];
+				std::stringstream ss;
+				ss << "Function needs string argument but got " << to_string(value2.type) << '.';
+				report_error(ec.source_location, AssemblyErrorCodes::ExpectedStringArgument, ss.str());
+			}
+			set_unknown(result);
+			return;
+		}
+		if (!_string_locale.is_locale(dereference_string(value2), locale_type)) {
+			if (generate) {
+				const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg2_index)];
+				std::stringstream ss;
+				ss << "String '" << dereference_string(value2) << "' isn't a locale name.\n";
+				ss << "Possible values are:\n";
+				for(std::string_view name : _string_locale.supported_locales()) {
+					ss << name << '\n';
+				}
+				report_error(ec.source_location, AssemblyErrorCodes::UnsupportedLocaleName, ss.str());
+			}
+			set_unknown(result);
+			return;
+		}
+	}
+
+	std::locale locale;
+	if (locale_type != StringLocale::Locale::Default) {
+		locale = std::locale(std::string(_string_locale.std_locale_name(locale_type)));
+	}
+	const std::ctype<wchar_t> &facet(std::use_facet<std::ctype<wchar_t>>(locale));
+	
+	if (is_integer(value1)) {
+		int value = dereference_integer(value1);
+		wchar_t uppercase = facet.toupper(static_cast<wchar_t>(value));
+		set_integer(result, static_cast<int>(uppercase));
+
+	} else if (is_string(value1)) {
+		std::string_view s = dereference_string(value1);
+		std::wstring wide = core::utf8_to_wide(s);
+		facet.toupper(wide.data(), wide.data() + wide.size());
+		set_string(result, core::wide_to_utf8(wide));
+		
+	} else {
+		if (generate) {
+			const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg1)];
+			std::stringstream ss;
+			ss << "Expected string or integer but got " << to_string(value1.type) << '.';
+			report_error(ec.source_location, AssemblyErrorCodes::IntegerOrStringValueExpected, ss.str());
+		}
+		set_unknown(result);
+	}
+}
+
+void Assembler::function_lowercase(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t op, uint32_t arg1)
+{
+	Value &result = expression_values[op];
+	const Value &value1 = follow_reference_or_value(expression_values[arg1]);
+
+	StringLocale::Locale locale_type = StringLocale::Locale::Default;
+	uint32_t arg2_index = components[arg1].next_sibling; // optional argument for string conversion
+	if (arg2_index != 0) {
+		uint32_t arg3_index = components[arg2_index].next_sibling;
+		if (arg3_index != 0) {
+			if (generate) {
+				const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg3_index)];
+				std::stringstream ss;
+				ss << "Too many arguments to function.";
+				report_error(ec.source_location, AssemblyErrorCodes::TooManyArguments, ss.str());
+			}
+		}
+
+		const Value &value2 = follow_reference_or_value(expression_values[arg2_index]);
+		if (!is_string(value2)) {
+			if (generate) {
+				const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg2_index)];
+				std::stringstream ss;
+				ss << "Function needs string argument but got " << to_string(value2.type) << '.';
+				report_error(ec.source_location, AssemblyErrorCodes::ExpectedStringArgument, ss.str());
+			}
+			set_unknown(result);
+			return;
+		}
+		if (!_string_locale.is_locale(dereference_string(value2), locale_type)) {
+			if (generate) {
+				const ExpressionComponent &ec = components[leftmost_node_in_expression(components, arg2_index)];
+				std::stringstream ss;
+				ss << "String '" << dereference_string(value2) << "' isn't a locale name.\n";
+				ss << "Possible values are:";
+				for(std::string_view name : _string_locale.supported_locales()) {
+					ss << '\n' << name;
+				}