M .kdev4/jasm.kdev4 +7 -25
@@ 16,42 16,24 @@ Install Directory=/usr/local
Runtime=Host System
[Launch]
-Launch Configurations=Launch Configuration 0,Launch Configuration 1,Launch Configuration 2
+Launch Configurations=Launch Configuration 2,Launch Configuration 0
[Launch][Launch Configuration 0]
Configured Launch Modes=execute
Configured Launchers=nativeAppLauncher
-Name=jasm-6502
+Name=jasm
Type=Native Application
[Launch][Launch Configuration 0][Data]
-Arguments=test.jasm -v3 test.prg
-Dependencies=@Variant(\x00\x00\x00\t\x00\x00\x00\x00\x01\x00\x00\x00\x0b\x00\x00\x00\x00\x03\x00\x00\x00\x08\x00j\x00a\x00s\x00m\x00\x00\x00\x12\x00j\x00a\x00s\x00m\x00-\x006\x005\x000\x002\x00\x00\x00\x12\x00j\x00a\x00s\x00m\x00-\x006\x005\x000\x002)
-Dependency Action=Build
+Arguments=-v3 test/test.jasm test.prg
+Dependencies=@Variant(\x00\x00\x00\t\x00\x00\x00\x00\x00)
+Dependency Action=Nothing
EnvironmentGroup=
Executable=file:///home/jonas/dev/c64/jasm
External Terminal=konsole --noclose --workdir %workdir -e %exe
-Project Target=jasm,jasm-6502,jasm-6502
+Project Target=jasm,jasm,jasm
Use External Terminal=false
-Working Directory=file:///home/jonas/dev/c64/jasm/jasm/test
-isExecutable=false
-
-[Launch][Launch Configuration 1]
-Configured Launch Modes=execute
-Configured Launchers=nativeAppLauncher
-Name=jasm-z80
-Type=Native Application
-
-[Launch][Launch Configuration 1][Data]
-Arguments=test.jasm -v3 test.prg
-Dependencies=@Variant(\x00\x00\x00\t\x00\x00\x00\x00\x01\x00\x00\x00\x0b\x00\x00\x00\x00\x03\x00\x00\x00\x08\x00j\x00a\x00s\x00m\x00\x00\x00\x10\x00j\x00a\x00s\x00m\x00-\x00z\x008\x000\x00\x00\x00\x10\x00j\x00a\x00s\x00m\x00-\x00z\x008\x000)
-Dependency Action=Build
-EnvironmentGroup=
-Executable=file:///home/jonas/dev/c64/jasm
-External Terminal=konsole --noclose --workdir %workdir -e %exe
-Project Target=jasm,jasm-z80,jasm-z80
-Use External Terminal=false
-Working Directory=file:///home/jonas/dev/c64/jasm/jasm/test
+Working Directory=file:///home/jonas/dev/c64/jasm/jasm
isExecutable=false
[Launch][Launch Configuration 2]
M CMakeLists.txt +1 -2
@@ 17,5 17,4 @@ endif("${CMAKE_BUILD_TYPE}" STREQUAL "Re
add_subdirectory(core)
add_subdirectory(hasher)
-add_subdirectory(jasm-6502)
-add_subdirectory(jasm-z80)
+add_subdirectory(jasm)
M core/core.cbp +1 -0
@@ 102,6 102,7 @@
<Unit filename="core/collections/hash_map.h" />
<Unit filename="core/collections/null_hash_compare.h" />
<Unit filename="core/collections/static_array.h" />
+ <Unit filename="core/collections/string_hash_functor.h" />
<Unit filename="core/debug/timer.cpp" />
<Unit filename="core/debug/timer.h" />
<Unit filename="core/environment/log.cpp" />
M core/core.vcxproj +1 -0
@@ 210,6 210,7 @@
<ClInclude Include="core\collections\hash_map.h" />
<ClInclude Include="core\collections\null_hash_compare.h" />
<ClInclude Include="core\collections\static_array.h" />
+ <ClInclude Include="core\collections\string_hash_functor.h" />
<ClInclude Include="core\debug\timer.h" />
<ClInclude Include="core\environment\log.h" />
<ClInclude Include="core\exceptions\exception.h" />
M core/core.vcxproj.filters +3 -0
@@ 40,6 40,9 @@
<ClInclude Include="core\collections\null_hash_compare.h">
<Filter>collections</Filter>
</ClInclude>
+ <ClInclude Include="core\collections\string_hash_functor.h">
+ <Filter>collections</Filter>
+ </ClInclude>
<ClInclude Include="core\debug\timer.h">
<Filter>debug</Filter>
</ClInclude>
M core/core/collections/null_hash_compare.h +0 -2
@@ 1,7 1,5 @@
#pragma once
-#include <functional>
-
namespace core
{
A => core/core/collections/string_hash_functor.h +22 -0
@@ 0,0 1,22 @@
+#pragma once
+
+#include <core/strings/murmur_hash.h>
+
+namespace core
+{
+
+/// @addtogroup collections
+/// @{
+
+/// Compare class for hash maps with a prehashed key.
+struct StringHashFunctor
+{
+ size_t operator()(const std::string &key) const
+ {
+ return murmur_hash3_string_x64_64(key);
+ }
+};
+
+/// @}
+
+} // namespace core
M core/core/io/file_helpers.cpp +8 -0
@@ 1,5 1,6 @@
#include "pch.h"
+#include <algorithm>
#include <core/io/file_helpers.h>
#include <sstream>
@@ 53,4 54,11 @@ std::string file_extension(const std::st
return filename.substr(pos, std::string::npos);
}
+std::string to_front_slashes(const std::string &path)
+{
+ std::string front_slash_path(path);
+ std::replace(front_slash_path.begin(), front_slash_path.end(), '\\', '/');
+ return front_slash_path;
+}
+
} // namespace core
M core/core/io/file_helpers.h +5 -1
@@ 7,7 7,7 @@ namespace core
/// @{
/// Determines if there is an include directory that matches a file part.
-/// @ return True if a file part matches an include directory. @a path is updated in that case.
+/// @return True if a file part matches an include directory. @a result is updated in that case.
bool match_include_dir_and_file(const std::string &file, const std::vector<std::string> &include_dirs, std::string &result);
/// Check if a file exists.
@@ 20,6 20,10 @@ std::string base_name(const std::string
/// Returns the file extension including the punctual character.
std::string file_extension(const std::string &filename);
+/// Change all back-slashes to front-slashes.
+std::string to_front_slashes(const std::string &path);
+
+
/// @}
} // namespace core
M core/core/io/file_id_linux.h +7 -0
@@ 17,6 17,13 @@ struct FileId
{
return a.device == b.device && a.inode == b.inode;
}
+ friend bool operator<(const FileId &a, const FileId &b)
+ {
+ if (a.device < b.device) {
+ return true;
+ }
+ return a.inode < b.inode;
+ }
dev_t device;
ino_t inode;
};
M core/core/io/file_id_win.h +10 -0
@@ 17,6 17,16 @@ struct FileId
&& a.file_index_hi == b.file_index_hi
&& a.file_index_lo == b.file_index_lo;
}
+ friend bool operator<(const FileId &a, const FileId &b)
+ {
+ if (a.serial_number < b.serial_number) {
+ return true;
+ }
+ if (a.file_index_hi < b.file_index_hi) {
+ return true;
+ }
+ return a.file_index_lo < b.file_index_lo;
+ }
DWORD serial_number;
DWORD file_index_hi;
DWORD file_index_lo;
M hasher/main.cpp +2 -2
@@ 9,9 9,9 @@
#if defined(_MSC_VER)
int wmain(int argc, char16_t *argv[])
{
- std::wcout << std::hex << std::showbase;
+ std::cout << std::hex << std::showbase;
for (int i = 1; i < argc; ++i)
- std::wcout << argv[i] << L':' << core::murmur_hash3_string_x64_64(core::wide_to_utf8(argv[i])) << L'\n';
+ std::cout << argv[i] << ':' << core::murmur_hash3_string_x64_64(core::wide_to_utf8(argv[i])) << '\n';
return 0;
}
R jasm-6502/CMakeLists.txt => +0 -33
@@ 1,33 0,0 @@
-file(GLOB_RECURSE jasm_src
- "../jasm/*.h"
- "../jasm/*.cpp"
-)
-
-include_directories("../jasm")
-
-if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
- # using Clang
- add_compile_options(-Wall -Wnon-virtual-dtor -Wbind-to-temporary-copy -Wambiguous-member-template -Wextra-tokens -Weverything)
-elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
- # using GCC
-elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
- # using Visual Studio C++
-endif()
-
-if (${MINGW})
- add_compile_options(-static-libstdc++)
-endif()
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPROCESSOR=0")
-
-set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_DEBUG")
-set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread")
-
-add_executable(jasm-6502 ${jasm_src})
-
-set_property(TARGET jasm-6502 PROPERTY CXX_STANDARD 17)
-set_property(TARGET jasm-6502 PROPERTY CXX_STANDARD_REQUIRED ON)
-
-target_link_libraries(jasm-6502 core)
-
-install(TARGETS jasm-6502 CONFIGURATIONS Release RUNTIME DESTINATION /usr/bin)
R jasm-z80/CMakeLists.txt => +0 -33
@@ 1,33 0,0 @@
-file(GLOB_RECURSE jasm_src
- "../jasm/*.h"
- "../jasm/*.cpp"
-)
-
-include_directories("../jasm")
-
-if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
- # using Clang
- add_compile_options(-Wall -Wnon-virtual-dtor -Wbind-to-temporary-copy -Wambiguous-member-template -Wextra-tokens -Weverything)
-elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
- # using GCC
-elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
- # using Visual Studio C++
-endif()
-
-if (${MINGW})
- add_compile_options(-static-libstdc++)
-endif()
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPROCESSOR=1")
-
-set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_DEBUG")
-set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread")
-
-add_executable(jasm-z80 ${jasm_src})
-
-set_property(TARGET jasm-z80 PROPERTY CXX_STANDARD 17)
-set_property(TARGET jasm-z80 PROPERTY CXX_STANDARD_REQUIRED ON)
-
-target_link_libraries(jasm-z80 core)
-
-install(TARGETS jasm-z80 CONFIGURATIONS Release RUNTIME DESTINATION /usr/bin)
R jasm.sln => +0 -62
@@ 1,62 0,0 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 14
-VisualStudioVersion = 14.0.23107.0
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jasm", "jasm\jasm.vcxproj", "{D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}"
- ProjectSection(ProjectDependencies) = postProject
- {082DD209-F99C-4F50-B360-7F0E75103418} = {082DD209-F99C-4F50-B360-7F0E75103418}
- EndProjectSection
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hasher", "hasher\hasher.vcxproj", "{1F130AF3-A85E-4BF5-9629-2787E58344E2}"
- ProjectSection(ProjectDependencies) = postProject
- {082DD209-F99C-4F50-B360-7F0E75103418} = {082DD209-F99C-4F50-B360-7F0E75103418}
- EndProjectSection
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "core", "core\core.vcxproj", "{082DD209-F99C-4F50-B360-7F0E75103418}"
-EndProject
-Global
- GlobalSection(SolutionConfigurationPlatforms) = preSolution
- debug-hasher|x64 = debug-hasher|x64
- debug-jasm-6502|x64 = debug-jasm-6502|x64
- debug-jasm-z80|x64 = debug-jasm-z80|x64
- release-hasher|x64 = release-hasher|x64
- release-jasm-6502|x64 = release-jasm-6502|x64
- release-jasm-z80|x64 = release-jasm-z80|x64
- EndGlobalSection
- GlobalSection(ProjectConfigurationPlatforms) = postSolution
- {D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}.debug-hasher|x64.ActiveCfg = debug-hasher|x64
- {D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}.debug-jasm-6502|x64.ActiveCfg = debug-jasm-6502|x64
- {D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}.debug-jasm-6502|x64.Build.0 = debug-jasm-6502|x64
- {D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}.debug-jasm-z80|x64.ActiveCfg = debug-jasm-z80|x64
- {D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}.debug-jasm-z80|x64.Build.0 = debug-jasm-z80|x64
- {D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}.release-hasher|x64.ActiveCfg = release-hasher|x64
- {D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}.release-jasm-6502|x64.ActiveCfg = release-jasm-6502|x64
- {D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}.release-jasm-6502|x64.Build.0 = release-jasm-6502|x64
- {D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}.release-jasm-z80|x64.ActiveCfg = release-jasm-z80|x64
- {D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}.release-jasm-z80|x64.Build.0 = release-jasm-z80|x64
- {1F130AF3-A85E-4BF5-9629-2787E58344E2}.debug-hasher|x64.ActiveCfg = debug-hasher|x64
- {1F130AF3-A85E-4BF5-9629-2787E58344E2}.debug-hasher|x64.Build.0 = debug-hasher|x64
- {1F130AF3-A85E-4BF5-9629-2787E58344E2}.debug-jasm-6502|x64.ActiveCfg = debug-jasm-6502|x64
- {1F130AF3-A85E-4BF5-9629-2787E58344E2}.debug-jasm-z80|x64.ActiveCfg = debug-jasm-z80|x64
- {1F130AF3-A85E-4BF5-9629-2787E58344E2}.release-hasher|x64.ActiveCfg = release-hasher|x64
- {1F130AF3-A85E-4BF5-9629-2787E58344E2}.release-hasher|x64.Build.0 = release-hasher|x64
- {1F130AF3-A85E-4BF5-9629-2787E58344E2}.release-jasm-6502|x64.ActiveCfg = release-jasm-6502|x64
- {1F130AF3-A85E-4BF5-9629-2787E58344E2}.release-jasm-z80|x64.ActiveCfg = release-jasm-z80|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.debug-hasher|x64.ActiveCfg = debug-hasher|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.debug-hasher|x64.Build.0 = debug-hasher|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.debug-jasm-6502|x64.ActiveCfg = debug-jasm-6502|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.debug-jasm-6502|x64.Build.0 = debug-jasm-6502|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.debug-jasm-z80|x64.ActiveCfg = debug-jasm-z80|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.debug-jasm-z80|x64.Build.0 = debug-jasm-z80|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.release-hasher|x64.ActiveCfg = release-hasher|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.release-hasher|x64.Build.0 = release-hasher|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.release-jasm-6502|x64.ActiveCfg = release-jasm-6502|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.release-jasm-6502|x64.Build.0 = release-jasm-6502|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.release-jasm-z80|x64.ActiveCfg = release-jasm-z80|x64
- {082DD209-F99C-4F50-B360-7F0E75103418}.release-jasm-z80|x64.Build.0 = release-jasm-z80|x64
- EndGlobalSection
- GlobalSection(SolutionProperties) = preSolution
- HideSolutionNode = FALSE
- EndGlobalSection
-EndGlobal
A => jasm/CMakeLists.txt +33 -0
@@ 0,0 1,33 @@
+file(GLOB_RECURSE jasm_src
+ "./*.h"
+ "./*.cpp"
+)
+
+include_directories(".")
+
+if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+ # using Clang
+ add_compile_options(-Wall -Wnon-virtual-dtor -Wbind-to-temporary-copy -Wambiguous-member-template -Wextra-tokens -Weverything)
+elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+ # using GCC
+elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
+ # using Visual Studio C++
+endif()
+
+if (${MINGW})
+ add_compile_options(-static-libstdc++)
+endif()
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_DEBUG")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread")
+
+add_executable(jasm ${jasm_src})
+
+set_property(TARGET jasm PROPERTY CXX_STANDARD 17)
+set_property(TARGET jasm PROPERTY CXX_STANDARD_REQUIRED ON)
+
+target_link_libraries(jasm core)
+
+install(TARGETS jasm CONFIGURATIONS Release RUNTIME DESTINATION /usr/bin)
M jasm/assembling/assembler.cpp => jasm/assemble/assembler.cpp +25 -14
@@ 1,37 1,48 @@
#include "pch.h"
-#include <assembling/assembler.h>
-#include <assembling/assembler_impl/assembler_impl.h>
+#include <assemble/assembler.h>
+#include <assemble/assembler_impl/assembler_impl.h>
namespace jasm
{
-std::vector<Section> assemble(bool multiple_output_files, bool multi_bank_mode, bool pseudo_instructions
- , const std::vector<TokenChain> &syntax, StringRepository &strings
- , const HashArrayRepository &hash_arrays, const std::vector<std::string> &used_files
- , const std::vector<std::pair<std::string, bool>> &predefined_booleans
- , const std::vector<std::pair<std::string, int32_t>> &predefined_integers
- , const std::vector<std::pair<std::string, std::string>> &predefined_strings
- , DataReader &data_reader, int32_t max_errors
- , const std::string &symbol_dump_file, const std::string &vice_dump_file, const std::string &gba_dump_file)
+std::vector<Section> assemble(
+ bool multiple_output_files,
+ bool multi_bank_mode,
+ bool pseudo_instructions,
+ ProcessorType default_processor,
+ const std::string &filename,
+ const std::vector<std::string> &include_dirs,
+ StringRepository &strings,
+ std::vector<std::string> &used_files,
+ const std::vector<std::pair<std::string, bool>> &predefined_booleans,
+ const std::vector<std::pair<std::string, int32_t>> &predefined_integers,
+ const std::vector<std::pair<std::string, std::string>> &predefined_strings,
+ int32_t max_errors,
+ const std::string &symbol_dump_file,
+ const std::string &vice_dump_file,
+ const std::string &gba_dump_file,
+ const std::string &output_hex_file
+)
{
std::vector<Section> output;
Assembler assembler(
- multiple_output_files
+ multiple_output_files
, multi_bank_mode
, pseudo_instructions
- , syntax
+ , default_processor
+ , filename
+ , include_dirs
, strings
- , hash_arrays
, used_files
, predefined_booleans
, predefined_integers
, predefined_strings
- , data_reader
, max_errors
, symbol_dump_file
, vice_dump_file
, gba_dump_file
+ , output_hex_file
, output
);
assembler.assemble();
M jasm/assembling/assembler.h => jasm/assemble/assembler.h +23 -13
@@ 1,29 1,39 @@
#pragma once
-#include <parsing/token_chain.h>
-#include <parsing/section.h>
+#include <core/collections/split_vector.h>
+#include <processor/processor.h>
+#include <syntax/section.h>
+#include <utility/token_chain.h>
#include <vector>
namespace jasm
{
-class DataReader;
class StringRepository;
-class HashArrayRepository;
-/// @addtogroup assembling
+/// @addtogroup assemble
/// @{
/// Assemble the provided syntax chain.
/// @return A vector of sections that provides output data. No section is empty and there are only code sections.
-std::vector<Section> assemble(bool multiple_output_files, bool multi_bank_mode, bool pseudo_instructions
- , const std::vector<TokenChain> &syntax, StringRepository &strings
- , const HashArrayRepository &hash_arrays, const std::vector<std::string> &used_files
- , const std::vector<std::pair<std::string, bool>> &predefined_booleans
- , const std::vector<std::pair<std::string, int32_t>> &predefined_integers
- , const std::vector<std::pair<std::string, std::string>> &predefined_strings
- , DataReader &data_reader, int32_t max_errors
- , const std::string &symbol_dump_file, const std::string &vice_dump_file, const std::string &gba_dump_file);
+std::vector<Section> assemble(
+ bool multiple_output_files
+ , bool multi_bank_mode
+ , bool pseudo_instructions
+ , ProcessorType default_processor
+ , const std::string &filename
+ , const std::vector<std::string> &include_dirs
+ , StringRepository &strings
+ , std::vector<std::string> &used_files
+ , const std::vector<std::pair<std::string, bool>> &predefined_booleans
+ , const std::vector<std::pair<std::string, int32_t>> &predefined_integers
+ , const std::vector<std::pair<std::string, std::string>> &predefined_strings
+ , int32_t max_errors
+ , const std::string &symbol_dump_file
+ , const std::string &vice_dump_file
+ , const std::string &gba_dump_file
+ , const std::string &output_hex_file
+);
/// @}
M jasm/assembling/assembler_impl/assembler_impl.cpp => jasm/assemble/assembler_impl/assembler_impl.cpp +210 -23
@@ 1,46 1,69 @@
#include "pch.h"
-#include <assembling/assembler_impl/assembler_impl.h>
-#include <assembling/type_description.h>
+#include <assemble/assembler_impl/assembler_impl.h>
+#include <assemble/type_description.h>
#include <core/debug/timer.h>
#include <core/environment/log.h>
#include <core/io/file_helpers.h>
+#include <core/io/file_id.h>
#include <core/io/file_writer.h>
+#include <core/math/sign.h>
#include <core/strings/utf8.h>
#include <exceptions/assembly_exception.h>
#include <exceptions/error_codes.h>
+#include <io/hex_source_writer.h>
#include <iomanip>
+#include <syntax/syntax_parse.h>
#include <sstream>
+#include <tokenize/tokenize.h>
namespace jasm {
using namespace core;
-Assembler::Assembler(bool multiple_output_files, bool multi_bank_mode, bool pseudo_instructions
- , const std::vector<TokenChain> &syntax, StringRepository &strings
- , const HashArrayRepository &hash_arrays, const std::vector<std::string> &used_files
- , const std::vector<std::pair<std::string, bool>> &predefined_booleans
- , const std::vector<std::pair<std::string, int32_t>> &predefined_integers
- , const std::vector<std::pair<std::string, std::string>> &predefined_strings
- , DataReader &data_reader, int32_t max_errors
- , const std::string &symbol_dump_file, const std::string &vice_dump_file, const std::string &gba_dump_file
- , std::vector<Section> &output)
+Assembler::Assembler(
+ bool multiple_output_files
+ , bool multi_bank_mode
+ , bool pseudo_instructions
+ , ProcessorType default_processor
+ , const std::string &filename
+ , const std::vector<std::string> &include_dirs
+ , StringRepository &strings
+ , std::vector<std::string> &used_files
+ , const std::vector<std::pair<std::string, bool>> &predefined_booleans
+ , const std::vector<std::pair<std::string, int32_t>> &predefined_integers
+ , const std::vector<std::pair<std::string, std::string>> &predefined_strings
+ , int32_t max_errors
+ , const std::string &symbol_dump_file
+ , const std::string &vice_dump_file
+ , const std::string &gba_dump_file
+ , const std::string &output_hex_file
+ , std::vector<Section> &output
+)
: _multiple_output_files(multiple_output_files)
, _multi_bank_mode(multi_bank_mode)
, _pseudo_instructions(pseudo_instructions)
- , _input(syntax)
+ , _default_processor(default_processor)
+ , _input_filename(filename)
+ , _include_dirs(include_dirs)
, _strings(strings)
- , _hash_arrays(hash_arrays)
, _used_files(used_files)
, _predefined_booleans(predefined_booleans)
, _predefined_integers(predefined_integers)
, _predefined_strings(predefined_strings)
- , _data_reader(data_reader)
, _max_errors(max_errors)
, _symbol_dump_file(symbol_dump_file)
, _vice_dump_file(vice_dump_file)
, _gba_dump_file(gba_dump_file)
+ , _output_hex_file(output_hex_file)
, _dump_symbols(!symbol_dump_file.empty() || !vice_dump_file.empty() || !gba_dump_file.empty())
+ , _dump_hex(!output_hex_file.empty())
+ , _hash_arrays(128)
+ , _processor(nullptr)
+ , _processor_type(ProcessorType::Unspecified)
+ , _catalogue(pseudo_instructions)
+ , _hex_source_writer(nullptr)
+ , _data_reader(include_dirs)
, _current_pass(token_chain_type_buffer_size)
, _previous_pass(token_chain_type_buffer_size)
, _oscillating_state(false)
@@ 79,6 102,7 @@ Assembler::Assembler(bool multiple_outpu
_section_mapping_stack.reserve(16);
_location_stack.reserve(8);
+ _processor_stack.reserve(4);
_sections.clear();
_sections.reserve(16);
@@ 90,6 114,13 @@ Assembler::Assembler(bool multiple_outpu
_symbol_names[0] = "";
}
+Assembler::~Assembler()
+{
+ if (_hex_source_writer != nullptr) {
+ delete _hex_source_writer;
+ }
+}
+
void Assembler::fill_type_integer_operators(TypeDescription &type)
{
type.operators[static_cast<uint32_t>(OperatorType::Plus)] = &Assembler::operator_integer_add;
@@ 554,12 585,16 @@ void Assembler::run_assembly_pass(bool g
assert(_call_depth == 0);
assert(_data_generation_depth == 0);
- _input_reader = TokenReader(_input[0]);
_current_pass.type_reader = TokenReader(_current_pass.types);
_previous_pass.type_reader = TokenReader(_previous_pass.types);
_sections.clear();
_section = nullptr;
+ _processor_type = _default_processor;
+ _processor = _catalogue.processor(_processor_type);
+ _processor_stack.clear();
+ _processor_stack.push_back(_processor_type);
+
// reset scopes since we may place things directly inside the global scope
// and these things will not be reset otherwise
_symbol_environment.reset();
@@ 579,13 614,8 @@ void Assembler::run_assembly_pass(bool g
// reset program counter
set_integer(_program_counter, 0);
- const SyntaxToken *t = consume_next_token();
- bool early_return = false;
- t = parse_inner_scope(generate, t, early_return);
- assert(!early_return); // this can't be inside a macro so this should not be possible
-
- assert(t->type == SyntaxTokenType::End);
-
+ parse_file(generate, _input_filename, nullptr);
+
if (generate) {
if (_dump_symbols) {
if (!_symbol_dump_file.empty()) {
@@ 611,6 641,152 @@ void Assembler::run_assembly_pass(bool g
}
+void Assembler::parse_file(bool generate, const std::string &filename, const SourceLocation *include_location)
+{
+ size_t file_index = syntax_analyze(filename, include_location);
+
+ const SyntaxResult &syntax_result = _syntax_results[file_index];
+
+ if (syntax_result.error_code != AssemblyErrorCodes::Ok) {
+ if (generate) {
+ report_error(syntax_result.error_location, syntax_result.error_code, syntax_result.error_message);
+ }
+ return;
+ }
+
+ if (std::find(_include_id_history.begin(), _include_id_history.end(), syntax_result.file_id) != _include_id_history.end()) {
+ std::stringstream ss;
+ ss << "Include file recursion. '" << _used_files[file_index] << "' is included twice from:";
+ for (auto it = _include_file_history.rbegin(); it != _include_file_history.rend(); ++it)
+ ss << "\n " << to_front_slashes(*it);
+ throw AssemblyException(_used_files[file_index], 1, 1, AssemblyErrorCodes::RecursiveIncludes, ss.str());
+ }
+
+ // save read chain and pointer to move back after the file
+ TokenChainScope tcs(_input_reader);
+
+ _include_id_history.push_back(syntax_result.file_id);
+ _include_file_history.push_back(filename);
+
+ size_t processor_depth = _processor_stack.size();
+
+ _input_reader = TokenReader(*_input[syntax_result.token_chain_index]);
+ const SyntaxToken *t = consume_next_token();
+ bool early_return = false;
+ t = parse_inner_scope(generate, t, early_return);
+ assert(!early_return); // this can't be inside a macro so this should not be possible
+ assert(t->type == SyntaxTokenType::End);
+
+ assert(_processor_stack.size() >= processor_depth);
+ _processor_stack.resize(processor_depth);
+ _processor_type = _processor_stack.back();
+ _processor = _catalogue.processor(_processor_type);
+
+ _include_file_history.pop_back();
+ _include_id_history.pop_back();
+}
+
+size_t Assembler::syntax_analyze(const std::string &filename, const SourceLocation *include_location)
+{
+ auto file_index_it = _file_to_index.find(filename);
+ if (file_index_it != _file_to_index.end()) {
+ // we already processed this file
+ return file_index_it->second;
+ }
+
+ // check if the same file exists but as another name
+ core::FileId fid;
+ std::string file_path = filename;
+ match_include_dir_and_file(filename, _include_dirs, file_path);
+ if (!core::file_id(file_path, fid)) {
+ size_t file_index = _used_files.size();
+ // make sure that the file has front slashes to get the output from linux and pc unit tests match
+ _used_files.emplace_back(core::to_front_slashes(filename));
+ _input.emplace_back();
+ if (_dump_hex) {
+ _file_row_locations.emplace_back();
+ _file_contents.emplace_back();
+ }
+
+ SyntaxResult result;
+ result.file_id = fid;
+ result.error_code = AssemblyErrorCodes::CantFindIncludeFile;
+ result.token_chain_index = 0;
+
+ std::stringstream ss;
+ ss << "Failed to open '" << filename << "'";
+ result.error_message = ss.str();
+
+ if (include_location == nullptr) {
+ // this is the main file
+ result.error_location.file_index = static_cast<uint32_t>(file_index);
+ result.error_location.column = 1;
+ result.error_location.row = 1;
+ } else {
+ // we came from an include statement
+ result.error_location.file_index = include_location->file_index;
+ result.error_location.column = include_location->column;
+ result.error_location.row = include_location->row;
+ }
+ _syntax_results.emplace_back(std::move(result));
+ return file_index;
+ }
+ // file exists, now check for previous file with different name
+ auto syntax_result_it = std::find_if(std::begin(_syntax_results), std::end(_syntax_results), [&fid](const SyntaxResult &r){ return r.file_id == fid; });
+ if (syntax_result_it != std::end(_syntax_results)) {
+ // the file has already been parsed!
+ return core::unsign_cast(syntax_result_it - std::begin(_syntax_results));
+ }
+
+ size_t file_index = _used_files.size();
+
+ // record this new file
+ _used_files.emplace_back(core::to_front_slashes(filename));
+ _syntax_results.emplace_back();
+ if (_dump_hex) {
+ _file_row_locations.emplace_back();
+ _file_contents.emplace_back();
+ }
+ SyntaxResult &syntax_result = _syntax_results.back();
+ syntax_result.file_id = fid;
+ syntax_result.error_code = AssemblyErrorCodes::Ok;
+ syntax_result.token_chain_index = _input.size();
+
+ // tokenize
+ std::vector<size_t> *row_locations_ptr = nullptr;
+ std::wstring *contents_ptr = nullptr;
+ if (_dump_hex) {
+ row_locations_ptr = &_file_row_locations.back();
+ contents_ptr = &_file_contents.back();
+ }
+
+ // TODO: Catch exceptions here to allow parse error in files that doesn't contribute to final output.
+ // However, to do that the tokenizer and syntax parser must return more error details.
+ TokenChain tokens = tokenize(
+ static_cast<uint32_t>(file_index),
+ core::to_front_slashes(filename),
+ file_path,
+ _catalogue,
+ _processor_type,
+ _strings,
+ row_locations_ptr,
+ contents_ptr
+ );
+
+ // syntax parse
+ parse_syntax(
+ tokens,
+ _input,
+ _catalogue,
+ _processor_type,
+ _strings,
+ _hash_arrays,
+ _used_files
+ );
+
+ return file_index;
+}
+
void Assembler::collect_section_sizes(const std::vector<Section> §ions)
{
for(const auto §ion : sections) {
@@ 1175,6 1351,11 @@ void Assembler::assemble()
debug() << ss.str();
}
+ if (_dump_hex) {
+ _hex_source_writer = new HexSourceWriter(_file_row_locations, _file_contents, _used_files);
+ _hex_source_writer->open_output(_output_hex_file);
+ }
+
{
TimerScope timer("Generate pass");
prepare_next_assembly_pass();
@@ 1183,7 1364,13 @@ void Assembler::assemble()
if (_num_errors != 0 || _oscillating_state)
throw AssemblyException("Assembly ended with errors.");
}
-
+
+ if (_hex_source_writer != nullptr) {
+ _hex_source_writer->close();
+ delete _hex_source_writer;
+ _hex_source_writer = nullptr;
+ }
+
std::sort(_sections.begin(), _sections.end());
print_sections();
M jasm/assembling/assembler_impl/assembler_impl.h => jasm/assemble/assembler_impl/assembler_impl.h +103 -27
@@ 1,29 1,41 @@
#pragma once
-#include <assembling/function_pointer.h>
-#include <assembling/method_pointer.h>
-#include <assembling/symbol_environment.h>
-#include <assembling/type_description.h>
-#include <assembling/value.h>
+#include <assemble/function_pointer.h>
+#include <assemble/method_pointer.h>
+#include <assemble/symbol_environment.h>
+#include <assemble/type_description.h>
+#include <assemble/value.h>
#include <core/math/algorithm.h>
#include <core/collections/hash_map.h>
+#include <core/collections/split_vector.h>
+#include <core/collections/string_hash_functor.h>
+#include <core/io/file_id.h>
#include <exceptions/assembly_exception.h>
#include <exceptions/error_codes.h>
#include <functional>
-#include <parsing/hasharray_repository.h>
-#include <parsing/syntax_parser.h>
+#include <io/data_reader.h>
#include <set>
#include <strings/string_conversions.h>
#include <strings/string_locale.h>
#include <strings/string_repository.h>
#include <sstream>
+#include <syntax/syntax_parser.h>
+#include <utility/hasharray_repository.h>
namespace jasm
{
+ namespace mos6502
+ {
+ class Processor6502;
+ }
+ namespace z80
+ {
+ class ProcessorZ80;
+ }
-class DataReader;
+class HexSourceWriter;
-/// @addtogroup assembling
+/// @addtogroup assemble
/// @{
constexpr uint32_t token_chain_type_buffer_size = 65536;
@@ 45,16 57,30 @@ constexpr uint32_t max_call_depth = 100;
/// to form the final hash.
class Assembler
{
+ friend mos6502::Processor6502;
+ friend z80::ProcessorZ80;
+
public:
- Assembler(bool multiple_output_files, bool multi_bank_mode, bool pseudo_instructions
- , const std::vector<TokenChain> &syntax, StringRepository &strings
- , const HashArrayRepository &hash_arrays, const std::vector<std::string> &used_files
- , const std::vector<std::pair<std::string, bool>> &predefined_booleans
- , const std::vector<std::pair<std::string, int32_t>> &predefined_integers
- , const std::vector<std::pair<std::string, std::string>> &predefined_strings
- , DataReader &data_reader, int32_t max_errors
- , const std::string &symbol_dump_file, const std::string &vice_dump_file, const std::string &gba_dump_file
- , std::vector<Section> &output);
+ Assembler(
+ bool multiple_output_files
+ , bool multi_bank_mode
+ , bool pseudo_instructions
+ , ProcessorType default_processor
+ , const std::string &filename
+ , const std::vector<std::string> &include_dirs
+ , StringRepository &strings
+ , std::vector<std::string> &used_files
+ , const std::vector<std::pair<std::string, bool>> &predefined_booleans
+ , const std::vector<std::pair<std::string, int32_t>> &predefined_integers
+ , const std::vector<std::pair<std::string, std::string>> &predefined_strings
+ , int32_t max_errors
+ , const std::string &symbol_dump_file
+ , const std::string &vice_dump_file
+ , const std::string &gba_dump_file
+ , const std::string &output_hex_file
+ , std::vector<Section> &output
+ );
+ ~Assembler();
Assembler &operator=(const Assembler &other) = delete;
@@ 805,6 831,11 @@ private:
/// Translate section name according to current section mappings.
uint64_t translate_section(uint64_t section_hash) const;
+ bool in_code_section() const
+ {
+ return _section != nullptr && _section->section_type == SectionType::Code;
+ }
+
/// Add a combined namespace to be used when looking up global symbols.
/// This will exist until the end of the current scope.
void add_using_namespace(uint64_t combined_hash);
@@ 914,11 945,6 @@ private:
const SyntaxToken *parse_section_map(bool generate, const SyntaxToken *t);
const SyntaxToken *parse_declare(bool generate, const SyntaxToken *t);
const SyntaxToken *parse_declaration(bool generate, const SyntaxToken *t, bool export_enabled);
- #if SUPPORTS(M6502)
- void generate_instruction_data_label(bool generate, bool export_enabled, const InstructionToken &token, int address, int offset, uint8_t size);
- #endif
- void generate_subroutine_instruction(bool generate, int32_t address, const SourceLocation &source_location);
- const SyntaxToken *parse_instruction(bool generate, const SyntaxToken *t, bool export_enabled);
const SyntaxToken *parse_reserve(bool generate, const SyntaxToken *t, bool export_enabled);
const SyntaxToken *parse_statement(bool generate, const SyntaxToken *t, bool &early_return);
const SyntaxToken *parse_statement_after_export(bool generate, const SyntaxToken *t, const SourceLocation &location);
@@ 927,6 953,7 @@ private:
const SyntaxToken *parse_namespace(bool generate, const SyntaxToken *t);
const SyntaxToken *parse_module(bool generate, const SyntaxToken *t);
const SyntaxToken *parse_export(bool generate, const SyntaxToken *t);
+ const SyntaxToken *parse_processor(const SyntaxToken *t);
const SyntaxToken *parse_define(bool generate, const SyntaxToken *t, bool export_enabled);
const SyntaxToken *parse_macro(bool generate, const SyntaxToken *t, bool export_enabled);
const SyntaxToken *parse_return(bool generate, const SyntaxToken *t);
@@ 938,6 965,7 @@ private:
const SyntaxToken *parse_using(bool generate, const SyntaxToken *t);
const SyntaxToken *parse_subroutine(bool generate, const SyntaxToken *t, bool export_enabled);
const SyntaxToken *parse_incbin(bool generate, const SyntaxToken *t);
+ const SyntaxToken *parse_include(bool generate, const SyntaxToken *t);
/// Get the variable name for a global or local variable.
inline std::string variable_name(uint64_t symbol_hash, bool global)
@@ 992,6 1020,13 @@ private:
bool progress_was_made();
+ /// Tokenize, syntax analyze and parse a file.
+ void parse_file(bool generate, const std::string &filename, const SourceLocation *include_location);
+ /// Attempt to parse or just return the file index if an attempt has already been made.
+ /// @param include_location Pointer to source location where include was made, or nullptr if this is the main file.
+ /// @return Index into _used_files and _syntax_results.
+ size_t syntax_analyze(const std::string &filename, const SourceLocation *include_location);
+
/// Dump all symbol information to a file.
void dump_symbols(const std::string &filename);
/// Dump all symbol information to a VICE compatible file.
@@ 1040,23 1075,64 @@ private:
bool _multiple_output_files; ///< When true, write one file per section. Otherwise merge them together.
bool _multi_bank_mode; ///< When true, the assembler will truncate addresses in instructions.
bool _pseudo_instructions; ///< When true, some extra instructions or addressing modes can be added to simplify programming.
- const std::vector<TokenChain> &_input; ///< The syntax token stream to run assemble passes on.
+ ProcessorType _default_processor; ///< The default processor to begin assembling with.
+ const std::string _input_filename; ///< The file to start assembling in.
+ const std::vector<std::string> &_include_dirs;
StringRepository &_strings; ///< The lookup table from uint64_t to strings.
- const HashArrayRepository &_hash_arrays; ///< The lookup table from uint64_t handles to uint64_t arrays.
- const std::vector<std::string> &_used_files; ///< Filenames of assembler files to be able to print them.
+ std::vector<std::string> &_used_files; ///< Filenames of assembler files to be able to print them.
const std::vector<std::pair<std::string, bool>> &_predefined_booleans;
const std::vector<std::pair<std::string, int32_t>> &_predefined_integers;
const std::vector<std::pair<std::string, std::string>> &_predefined_strings;
- DataReader &_data_reader; ///< This handles reading binary files.
int32_t _max_errors; ///< Max number of errors before aborting assembly generation pass.
const std::string _symbol_dump_file; ///< The filename of the symbol dump.
const std::string _vice_dump_file; ///< The filename of the vice symbol dump.
const std::string _gba_dump_file; ///< The filename of the No$GBA style symbol dump.
+ const std::string _output_hex_file; ///< The filename of the hex output dump.
bool _dump_symbols; ///< True if symbol dump is enabled.
+ bool _dump_hex; ///< True if hex dump is enabled.
+ // file handling
+
+ /// Cache with map from path to index into _used_files and _syntax_results.
+ core::HashMap<std::string, size_t, core::StringHashFunctor> _file_to_index;
+
+ struct SyntaxResult
+ {
+ /// ID of file to be able to find same files with different names.
+ core::FileId file_id;
+ /// Error code, or 0 if syntax analysis was successful.
+ AssemblyErrorCodes error_code;
+ /// Empty if no error occurred and the message in case there was a problem with loading, tokenizing or syntax analysis.
+ std::string error_message;
+ /// Location of error, if any.
+ SourceLocation error_location;
+ /// Token chain where the syntax output is stored.
+ size_t token_chain_index;
+ };
+ // List of syntax results, in the same order as _used_files.
+ std::vector<SyntaxResult> _syntax_results;
+
+ // tokenizer
+ std::vector<core::FileId> _include_id_history; ///< An array of include file identifiers used to determine file recursion.
+ std::vector<std::string> _include_file_history; ///< An array of include files in the same order as @a _include_id_history to be able to print the history in error messages.
+ core::SplitVector<std::vector<size_t>> _file_row_locations; ///< Character index of each row in each file, or empty if hex output is disabled.
+ /// All source file contents, or empty if hex output is disabled.
+ std::vector<std::wstring> _file_contents;
+
+ // syntax parser output
+ HashArrayRepository _hash_arrays; // repository to replace hash array references with a fixed size handle. The lookup table from uint64_t handles to uint64_t arrays.
+
+ const Processor *_processor; ///< Currently used processor.
+ ProcessorType _processor_type; ///< Currently used processor type.
+ std::vector<ProcessorType> _processor_stack; ///< Stack of processors where the top one is the current. This will never be empty.
+ ProcessorCatalogue _catalogue; ///< Keeps processor implementations.
StringLocale _string_locale; ///< Converts locale names to names to be used in std::locale.
StringConversions _string_conversions; ///< Converts strings to platform specific formats.
TokenReader _input_reader; ///< Reads the syntax input tokens.
+ HexSourceWriter *_hex_source_writer; ///< Writes hex data combined with source code.
+
+ std::vector<std::unique_ptr<TokenChain>> _input; ///< The syntax token stream to run assemble passes on.
+ DataReader _data_reader; ///< This handles reading binary files.
Pass _current_pass;
Pass _previous_pass;
M jasm/assembling/assembler_impl/expressions_impl.cpp => jasm/assemble/assembler_impl/expressions_impl.cpp +3 -3
@@ 1,8 1,8 @@
#include "pch.h"
-#include <assembling/assembler_impl/assembler_impl.h>
-#include <assembling/type_description.h>
-#include <parsing/operators.h>
+#include <assemble/assembler_impl/assembler_impl.h>
+#include <assemble/type_description.h>
+#include <tokenize/operators.h>
namespace jasm {
M jasm/assembling/assembler_impl/functions_impl.cpp => jasm/assemble/assembler_impl/functions_impl.cpp +2 -2
@@ 1,7 1,7 @@
#include "pch.h"
-#include <assembling/assembler_impl/assembler_impl.h>
-#include <assembling/functions.h>
+#include <assemble/assembler_impl/assembler_impl.h>
+#include <assemble/functions.h>
#include <cmath>
#include <core/environment/log.h>
#include <core/math/sign.h>
M jasm/assembling/assembler_impl/methods_impl.cpp => jasm/assemble/assembler_impl/methods_impl.cpp +2 -2
@@ 1,7 1,7 @@
#include "pch.h"
-#include <assembling/assembler_impl/assembler_impl.h>
-#include <assembling/methods.h>
+#include <assemble/assembler_impl/assembler_impl.h>
+#include <assemble/methods.h>
#include <core/math/sign.h>
#include <core/strings/utf8.h>
M jasm/assembling/assembler_impl/operators_impl.cpp => jasm/assemble/assembler_impl/operators_impl.cpp +3 -3
@@ 1,6 1,6 @@
#include "pch.h"
-#include <assembling/assembler_impl/assembler_impl.h>
+#include <assemble/assembler_impl/assembler_impl.h>
#include <core/math/sign.h>
#include <core/ownership/destruct_call.h>
#include <core/strings/utf8.h>
@@ 1044,7 1044,7 @@ void Assembler::operator_subroutine_call
return;
}
- generate_subroutine_instruction(generate, dereference_integer(arg1), components[operator_index].source_location);
+ _processor->generate_subroutine_instruction(*this, generate, dereference_integer(arg1), components[operator_index].source_location);
}
void Assembler::operator_function_call(bool generate, ValueVector &expression_values, const ExpressionComponent components[], uint32_t operator_index, Value &result, Value &arg1, uint32_t next_index)
@@ 1393,7 1393,7 @@ void Assembler::operator_call_macro(bool
// get first token in macro
const Value ¯o_value = arg1;
- TokenReader macro_reader(_input[macro_value.macro_chain_index]);
+ TokenReader macro_reader(*_input[macro_value.macro_chain_index]);
const MacroDefinitionToken *macro_def = macro_reader.next_type<MacroDefinitionToken>();
assert(macro_def->type == SyntaxTokenType::MacroDef);
M jasm/assembling/assembler_impl/symbols_impl.cpp => jasm/assemble/assembler_impl/symbols_impl.cpp +1 -1
@@ 1,6 1,6 @@
#include "pch.h"
-#include <assembling/assembler_impl/assembler_impl.h>
+#include <assemble/assembler_impl/assembler_impl.h>
#include <core/math/sign.h>
#include <core/strings/utf8.h>
#include <cstring>
M jasm/assembling/assembler_impl/syntax_impl.cpp => jasm/assemble/assembler_impl/syntax_impl.cpp +126 -553
@@ 1,13 1,13 @@
#include "pch.h"
#include <algorithm>
-#include <assembling/assembler_impl/assembler_impl.h>
-#include <assembling/scope_counter.h>
+#include <assemble/assembler_impl/assembler_impl.h>
+#include <assemble/scope_counter.h>
#include <core/environment/log.h>
#include <core/math/algorithm.h>
#include <core/math/sign.h>
#include <core/strings/utf8.h>
-#include <io/data_reader.h>
+#include <io/hex_source_writer.h>
#include <limits>
namespace jasm {
@@ 475,539 475,6 @@ const SyntaxToken *Assembler::parse_decl
return t;
}
-#if SUPPORTS(M6502)
-
-void Assembler::generate_subroutine_instruction(bool generate, int32_t address, const SourceLocation &source_location)
-{
- // instructions are only allowed within code sections.
- bool instructions_allowed = _section != nullptr && _section->section_type == SectionType::Code;
- if (UNLIKELY(!instructions_allowed)) {
- // this is an unrecoverable error
- std::stringstream ss;
- ss << "Instructions must be in a code section.";
- report_fatal_error(source_location, AssemblyErrorCodes::CodeMustBeInCodeSection, ss.str());
- }
-
- // recursive data generation may not be safe
- if (_data_generation_depth != 0) {
- // this is an unrecoverable error
- std::stringstream ss;
- ss << "Recursive data generation isn't allowed.";
- report_fatal_error(source_location, AssemblyErrorCodes::RecursiveDataGenerationNotAllowed, ss.str());
- }
-
- ScopeCounter<uint32_t> sc(_data_generation_depth);
-
- if (generate && address < 0) {
- std::stringstream ss;
- ss << "Addressing mode needs a positive argument. Argument value was evaluated to " << address << ".";
- report_error(source_location, AssemblyErrorCodes::AddressingModeRequiresPositiveArgument, ss.str());
- }
-
- if (_multi_bank_mode) {
- // in this mode, addresses gets truncated to support memory banks
- address &= 0xffff;
- }
-
- if (generate) {
- if (address > 65535) {
- std::stringstream ss;
- ss << "Addressing mode needs a word size argument. Argument was evaluated to " << address << ".";
- report_error(source_location, AssemblyErrorCodes::AddressingModeRequiresWordSizeArgument, ss.str());
- }
- Section::Contents ending_instruction = Section::Contents::ContinueExecutionInstruction;
- auto &data = _section->generated_data(ending_instruction);
- data.push_back(opcode(InstructionType::Jsr, AddressingModeType::AbsoluteAddr));
- data.push_back(static_cast<uint8_t>(address));
- data.push_back(static_cast<uint8_t>(address >> 8));
- }
- _program_counter.integer_value += 3;
-}
-
-void Assembler::generate_instruction_data_label(bool generate, bool export_enabled, const InstructionToken &token, int address, int offset, uint8_t size)
-{
- // exporting local variables is not allowed
- if (generate && export_enabled && !token.global_data_label) {
- std::stringstream ss;
- ss << variable_name(token.data_label_symbol_hash, token.global_data_label) << " cannot be exported since it is local.";
- report_error(token.address_label_location, AssemblyErrorCodes::ExportingLocalIsNotAllowed, ss.str());
- }
-
- if (create_label(generate, token.data_label_symbol_hash, token.global_data_label, StorageType::Constant, token.address_label_location)) {
- Value &new_label = _current_pass.values.back();
- if (size == 1) {
- set_byte_offset(new_label, address, offset);
- } else if (size == 2) {
- set_word_offset(new_label, address, offset);
- } else {
- assert(false);
- }
- new_label.set_contains_address(true);
- if (export_enabled) {
- new_label.set_is_public(true);
- }
- }
-}
-
-const SyntaxToken *Assembler::parse_instruction(bool generate, const SyntaxToken *t, bool export_enabled)
-{
- assert(t->type == SyntaxTokenType::Instruction);
- const InstructionToken &instruction_token = *static_cast<const InstructionToken *>(t);
-
- // instructions are only allowed within code sections.
- bool instructions_allowed = _section != nullptr && _section->section_type == SectionType::Code;
- if (UNLIKELY(!instructions_allowed)) {
- // this is an unrecoverable error
- std::stringstream ss;
- ss << "Instructions must be in a code section.";
- report_fatal_error(instruction_token.source_location, AssemblyErrorCodes::CodeMustBeInCodeSection, ss.str());
- }
-
- // recursive data generation may not be safe
- if (_data_generation_depth != 0) {
- // this is an unrecoverable error
- std::stringstream ss;
- ss << "Recursive data generation isn't allowed.";
- report_fatal_error(instruction_token.source_location, AssemblyErrorCodes::RecursiveDataGenerationNotAllowed, ss.str());
- }
- ScopeCounter<uint32_t> sc(_data_generation_depth);
-
- InstructionType instruction = instruction_token.instruction;
- uint16_t addr_mode = instruction_token.addressing_modes;
- Section::Contents ending_instruction = is_ending_instruction(instruction) ? Section::Contents::EndExecutionInstruction : Section::Contents::ContinueExecutionInstruction;
-
- t = consume_next_token(); // instruction
-
- // in the generation pass, the program counter is guaranteed to be an integer value
- // so there is no need to verify this
-
- if (addr_mode == AddressingModeMask::Imp) {
- if (generate) {
- _section->generated_data(ending_instruction).push_back(opcode(instruction, AddressingModeType::Implied));
- }
- ++_program_counter.integer_value;
- return t;
- }
-
- // in all other instructions, we have to parse the expression for the argument
- const ExpressionToken *expr = static_cast<const ExpressionToken *>(t);
- const Value argument = evaluate_expression(generate, t);
- t = consume_next_token();
-
- // The argument is guaranteed to be a valid type in the generation pass.
- // In an assembly pass this can be Unknown.
- int32_t argument_value = 0;
-
- // in case of an assembly pass, unknown will be converted to 0, which is ok for all addressing modes except relative
- if (is_integer(argument))
- argument_value = dereference_integer(argument);
- else if (!is_unknown(argument)) {
- if (generate) {
- std::stringstream ss;
- ss << "Addressing mode needs an integer value. Argument type was " << to_string(type_of_value(argument)) << ".";
- report_error(expr->source_location, AssemblyErrorCodes::AddressingModeRequiresIntegerArgument, ss.str());
- }
- }
-
- if (addr_mode == AddressingModeMask::Imm) {
- if (generate) {
- // handle the case where the value doesn't fit in a byte
- if (argument_value < -128 || argument_value > 255) {
- std::stringstream ss;
- ss << "Addressing mode needs a byte size argument. Argument was evaluated to " << argument_value << ".";
- report_error(expr->source_location, AssemblyErrorCodes::AddressingModeRequiresByteSizeArgument, ss.str());
- }
- auto &data = _section->generated_data(ending_instruction);
- data.push_back(opcode(instruction, AddressingModeType::Immediate));
- data.push_back(static_cast<uint8_t>(argument_value));
- }
- if (UNLIKELY(instruction_token.has_instruction_data_label)) {
- generate_instruction_data_label(generate, export_enabled, instruction_token, _program_counter.integer_value + 1, 0, 1);
- }
-
- _program_counter.integer_value += 2;
- return t;
- }
-
- if (generate && argument_value < 0) {
- std::stringstream ss;
- ss << "Addressing mode needs a positive argument. Argument value was evaluated to " << argument_value << ".";
- report_error(expr->source_location, AssemblyErrorCodes::AddressingModeRequiresPositiveArgument, ss.str());
- }
-
- if (_multi_bank_mode && addr_mode != AddressingModeMask::Rel) {
- // in this mode, addresses gets truncated to support memory banks
- argument_value &= 0xffff;
- }
-
- if (addr_mode == (AddressingModeMask::Zp | AddressingModeMask::Abs)
- || addr_mode == (AddressingModeMask::Zpx | AddressingModeMask::AbsX)
- || addr_mode == (AddressingModeMask::Zpy | AddressingModeMask::AbsY))
- {
- // mask off the zero page or absolute addressing mode regardless of modes
- addr_mode = argument_value > 255 ? select_word_mode(addr_mode) : select_byte_mode(addr_mode);
- }
-
- if (generate) {
- _section->generated_data(ending_instruction).push_back(opcode(instruction, mask_to_addressing_mode(addr_mode)));
- }
- ++_program_counter.integer_value;
-
- if (addr_mode == AddressingModeMask::Zp
- || addr_mode == AddressingModeMask::Zpx
- || addr_mode == AddressingModeMask::Zpy
- || addr_mode == AddressingModeMask::IndX
- || addr_mode == AddressingModeMask::IndY)
- {
- if (generate) {
- if (argument_value > 255) {
- std::stringstream ss;
- ss << "Addressing mode needs a byte size argument. Argument was evaluated to " << argument_value << ".";
- report_error(expr->source_location, AssemblyErrorCodes::AddressingModeRequiresByteSizeArgument, ss.str());
- }
- _section->generated_data(ending_instruction).push_back(static_cast<uint8_t>(argument_value));
- }
- if (UNLIKELY(instruction_token.has_instruction_data_label)) {
- generate_instruction_data_label(generate, export_enabled, instruction_token, _program_counter.integer_value, 0, 1);
- }
- ++_program_counter.integer_value;
- return t;
- }
-
- if (addr_mode == AddressingModeMask::Abs
- || addr_mode == AddressingModeMask::AbsX
- || addr_mode == AddressingModeMask::AbsY
- || addr_mode == AddressingModeMask::Ind)
- {
- if (generate) {
- if (argument_value > 65535) {
- std::stringstream ss;
- ss << "Addressing mode needs a word size argument. Argument was evaluated to " << argument_value << ".";
- report_error(expr->source_location, AssemblyErrorCodes::AddressingModeRequiresWordSizeArgument, ss.str());
- }
- auto &data = _section->generated_data(ending_instruction);
- data.push_back(static_cast<uint8_t>(argument_value));
- data.push_back(static_cast<uint8_t>(argument_value >> 8));
- }
- if (UNLIKELY(instruction_token.has_instruction_data_label)) {
- generate_instruction_data_label(generate, export_enabled, instruction_token, _program_counter.integer_value, 0, 2);
- }
- _program_counter.integer_value += 2;
- return t;
- }
-
- // only relative address left
- assert(addr_mode == AddressingModeMask::Rel);
-
- if (generate) {
- int32_t reference_addr = _program_counter.integer_value + 1; // program counter has already been increased once
- int32_t relative_addr = argument_value - reference_addr;
- if (relative_addr < -128 || relative_addr > 127) {
- std::stringstream ss;
- ss << "Relative address out of range. Offset is " << relative_addr << " and needs to be in a [-128..127] range.";
- report_error(expr->source_location, AssemblyErrorCodes::RelativeAddressOutOfRange, ss.str());
- }
- _section->generated_data(ending_instruction).push_back(static_cast<uint8_t>(relative_addr));
- }
- if (UNLIKELY(instruction_token.has_instruction_data_label)) {
- generate_instruction_data_label(generate, export_enabled, instruction_token, _program_counter.integer_value, 0, 1);
- }
- ++_program_counter.integer_value;
- return t;
-}
-
-#elif SUPPORTS(Z80)
-
-void Assembler::generate_subroutine_instruction(bool generate, int32_t address, const SourceLocation &source_location)
-{
- // instructions are only allowed within code sections.
- bool instructions_allowed = _section != nullptr && _section->section_type == SectionType::Code;
- if (UNLIKELY(!instructions_allowed)) {
- // this is an unrecoverable error
- std::stringstream ss;
- ss << "Instructions must be in a code section.";
- report_fatal_error(source_location, AssemblyErrorCodes::CodeMustBeInCodeSection, ss.str());
- }
-
- // recursive data generation may not be safe
- if (_data_generation_depth != 0) {
- // this is an unrecoverable error
- std::stringstream ss;
- ss << "Recursive data generation isn't allowed.";
- report_fatal_error(source_location, AssemblyErrorCodes::RecursiveDataGenerationNotAllowed, ss.str());
- }
-
- ScopeCounter<uint32_t> sc(_data_generation_depth);
-
- const InstructionOpCode &opcode_data = opcode(InstructionType::Call, 0);
- uint8_t mutable_opcode_data[4];
- mutable_opcode_data[0] = opcode_data.op[0];
- mutable_opcode_data[1] = opcode_data.op[1];
- mutable_opcode_data[2] = opcode_data.op[2];
- mutable_opcode_data[3] = opcode_data.op[3];
-
- if (generate) {
- if (UNLIKELY(_multi_bank_mode && address >= 0)) {
- address &= 0xffff;
- }
- if (address < -32768 || address > 65535) {
- std::stringstream ss;
- ss << "Addressing mode needs a word size argument. Argument was evaluated to " << address << ".";
- report_error(source_location, AssemblyErrorCodes::AddressingModeRequiresWordSizeArgument, ss.str());
- }
- mutable_opcode_data[opcode_data.offset_to_data[0] + 0] = static_cast<uint8_t>(address & 0xff);
- mutable_opcode_data[opcode_data.offset_to_data[0] + 1] = static_cast<uint8_t>(address >> 8);
-
- Section::Contents ending_instruction = Section::Contents::ContinueExecutionInstruction;
- auto &data = _section->generated_data(ending_instruction);
- for(decltype(opcode_data.total_size) i = 0; i < opcode_data.total_size; ++i) {
- data.push_back(mutable_opcode_data[i]);
- }
- }
- _program_counter.integer_value += static_cast<int32_t>(opcode_data.total_size);
-}
-
-const SyntaxToken *Assembler::parse_instruction(bool generate, const SyntaxToken *t, bool export_enabled)
-{
- assert(t->type == SyntaxTokenType::Instruction);
- const InstructionToken &instruction_token = *static_cast<const InstructionToken *>(t);
-
- // instructions are only allowed within code sections.
- bool instructions_allowed = _section != nullptr && _section->section_type == SectionType::Code;
- if (UNLIKELY(!instructions_allowed)) {
- // this is an unrecoverable error
- std::stringstream ss;
- ss << "Instructions must be in a code section.";
- report_fatal_error(instruction_token.source_location, AssemblyErrorCodes::CodeMustBeInCodeSection, ss.str());
- }
-
- // recursive data generation may not be safe
- if (_data_generation_depth != 0) {
- // this is an unrecoverable error
- std::stringstream ss;
- ss << "Recursive data generation isn't allowed.";
- report_fatal_error(instruction_token.source_location, AssemblyErrorCodes::RecursiveDataGenerationNotAllowed, ss.str());
- }
- ScopeCounter<uint32_t> sc(_data_generation_depth);
-
- InstructionType instruction = instruction_token.instruction;
- const InstructionOpCode &opcode_data = opcode(instruction, instruction_token.addressing_mode_index);
- Section::Contents ending_instruction = is_ending_instruction(instruction) ? Section::Contents::EndExecutionInstruction : Section::Contents::ContinueExecutionInstruction;
-
- if (UNLIKELY(generate && !_pseudo_instructions && opcode_data.category == InstructionCategory::Pseudo)) {
- std::stringstream ss;
- ss << "Pseudo instructions require the pseudo instruction mode to be enabled.";
- report_error(instruction_token.source_location, AssemblyErrorCodes::UseOfPseudoInstructionInStandardMode, ss.str());
- }
-
- t = consume_next_token(); // instruction
-
- // in the generation pass, the program counter is guaranteed to be an integer value
- // so there is no need to verify this
-
- // generate labels to instruction data
- for(int i = 0; i < 2; ++i) {
- if (UNLIKELY(instruction_token.has_instruction_data_label[i])) {
- // verify that the data label is valid for the addressing mode (can point to actual data)
- uint8_t data_size = argument_data_size(i, opcode_data);
- if (data_size != 0) {
- // exporting local variables is not allowed
- if (export_enabled && !instruction_token.global_data_label[i]) {
- std::stringstream ss;
- ss << variable_name(instruction_token.data_label_symbol_hash[i], instruction_token.global_data_label[i]) << " cannot be exported since it is local.";
- report_error(instruction_token.address_label_location[i], AssemblyErrorCodes::ExportingLocalIsNotAllowed, ss.str());
- }
-
- if (create_label(generate, instruction_token.data_label_symbol_hash[i], instruction_token.global_data_label[i], StorageType::Constant, instruction_token.address_label_location[i])) {
- Value &new_label = _current_pass.values.back();
- if (data_size == 1) {
- set_byte_offset(new_label, _program_counter.integer_value + argument_data_offset(i, opcode_data), 0);
- } else if (data_size == 2) {
- set_word_offset(new_label, _program_counter.integer_value + argument_data_offset(i, opcode_data), 0);
- } else {
- assert(false);
- }
- new_label.set_contains_address(true);
- if (export_enabled) {
- new_label.set_is_public(true);
- }
- }
- } else {
- std::stringstream ss;
- ss << "Addressing mode argument cannot have label to instruction data.";
- report_error(instruction_token.address_label_location[i], AssemblyErrorCodes::AddressingModeArgumentCannotHaveDataLabel, ss.str());
- }
- }
- }
-
- // copy the opcode to be able to modify it before writing it to the data stream
- uint8_t mutable_opcode_data[4];
- mutable_opcode_data[0] = opcode_data.op[0];
- mutable_opcode_data[1] = opcode_data.op[1];
- mutable_opcode_data[2] = opcode_data.op[2];
- mutable_opcode_data[3] = opcode_data.op[3];
-
- if (opcode_data.format != OpCodeFormat::OpcodeOnly) {
- assert(t->type == SyntaxTokenType::Expression);
- const ExpressionToken *expr1 = static_cast<const ExpressionToken *>(t);
- int32_t arg1 = evaluate_integer_expression_for_instruction_argument(generate, t);
- t = consume_next_token();
-
- switch (opcode_data.format)
- {
- case OpCodeFormat::OpcodeOnly:
- break;
- case OpCodeFormat::ByteArg:
- if (generate) {
- if (arg1 < -128 || arg1 > 255) {
- std::stringstream ss;
- ss << "Addressing mode needs a byte size argument. Argument was evaluated to " << arg1 << ".";
- report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresByteSizeArgument, ss.str());
- }
- mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(arg1);
- }
- break;
- case OpCodeFormat::WordArg:
- if (generate) {
- if (UNLIKELY(_multi_bank_mode && arg1 >= 0)) {
- arg1 &= 0xffff;
- }
- if (arg1 < -32768 || arg1 > 65535) {
- std::stringstream ss;
- ss << "Addressing mode needs a word size argument. Argument was evaluated to " << arg1 << ".";
- report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresWordSizeArgument, ss.str());
- }
- mutable_opcode_data[opcode_data.offset_to_data[0] + 0] = static_cast<uint8_t>(arg1 & 0xff);
- mutable_opcode_data[opcode_data.offset_to_data[0] + 1] = static_cast<uint8_t>(arg1 >> 8);
- }
- break;
- case OpCodeFormat::OffsetArg:
- if (generate) {
- if (arg1 < -128 || arg1 > 127) {
- std::stringstream ss;
- ss << "Addressing mode needs an offset in range [-128..127]. Argument was evaluated to " << arg1 << ".";
- report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresOffsetSizeArgument, ss.str());
- }
- mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(arg1);
- }
- break;
- case OpCodeFormat::OffsetAndByteArg:
- {
- assert(t->type == SyntaxTokenType::Expression);
- const ExpressionToken *expr2 = static_cast<const ExpressionToken *>(t);
- int32_t arg2 = evaluate_integer_expression_for_instruction_argument(generate, t);
- t = consume_next_token();
-
- if (generate) {
- if (arg1 < -128 || arg1 > 127) {
- std::stringstream ss;
- ss << "Addressing mode needs an offset in range [-128..127]. Argument was evaluated to " << arg1 << ".";
- report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresOffsetSizeArgument, ss.str());
- }
- if (arg2 < -128 || arg2 > 255) {
- std::stringstream ss;
- ss << "Addressing mode needs a byte size argument. Argument was evaluated to " << arg2 << ".";
- report_error(expr2->source_location, AssemblyErrorCodes::AddressingModeRequiresByteSizeArgument, ss.str());
- }
-
- mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(arg1);
- mutable_opcode_data[opcode_data.offset_to_data[1]] = static_cast<uint8_t>(arg2);
- }
- break;
- }
- case OpCodeFormat::InterruptModeArg:
- {
- if (generate) {
- if (arg1 < 0 || arg1 > 2) {
- std::stringstream ss;
- ss << "Addressing mode needs an interrupt mode in range [0..2]. Argument was evaluated to " << arg1 << ".";
- report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresByteSizeArgument, ss.str());
- }
- uint8_t opcode_part = 0;
- if (arg1 == 0) {
- opcode_part = 0x46;
- } else if (arg1 == 1) {
- opcode_part = 0x56;
- } else if (arg1 == 2) {
- opcode_part = 0x5e;
- }
- mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(opcode_part);
- }
- break;
- }
- case OpCodeFormat::BitAndRegisterArg:
- {
- if (generate) {
- if (arg1 < 0 || arg1 > 7) {
- std::stringstream ss;
- ss << "Addressing mode needs a bit argument in range [0..7]. Argument was evaluated to " << arg1 << ".";
- report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresBitArgument, ss.str());
- }
- size_t bit_argument_offset = opcode_data.total_size - 1; // the bit is always placed in the last opcode byte
- mutable_opcode_data[bit_argument_offset] |= static_cast<uint8_t>(arg1 << 3);
- }
- break;
- }
- case OpCodeFormat::BitAndOffsetArg:
- {
- assert(t->type == SyntaxTokenType::Expression);
- const ExpressionToken *expr2 = static_cast<const ExpressionToken *>(t);
- int32_t arg2 = evaluate_integer_expression_for_instruction_argument(generate, t);
- t = consume_next_token();
-
- if (generate) {
- if (arg1 < 0 || arg1 > 7) {
- std::stringstream ss;
- ss << "Addressing mode needs a bit argument in range [0..7]. Argument was evaluated to " << arg1 << ".";
- report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresBitArgument, ss.str());
- }
- if (arg2 < -128 || arg2 > 127) {
- std::stringstream ss;
- ss << "Addressing mode needs an offset in range [-128..127]. Argument was evaluated to " << arg2 << ".";
- report_error(expr2->source_location, AssemblyErrorCodes::AddressingModeRequiresOffsetSizeArgument, ss.str());
- }
- mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(arg2);
- size_t bit_argument_offset = opcode_data.total_size - 1; // the bit is always placed in the last opcode byte
- mutable_opcode_data[bit_argument_offset] |= static_cast<uint8_t>(arg1 << 3);
- }
- break;
- }
- case OpCodeFormat::BranchOffsetArg:
- if (generate) {
- int32_t relative_offset = arg1 - (_program_counter.integer_value + 2);
- if (relative_offset < -128 || relative_offset > 127) {
- std::stringstream ss;
- ss << "Relative address out of range. Offset is " << relative_offset << " and needs to be in a [-128..127] range.";
- report_error(expr1->source_location, AssemblyErrorCodes::RelativeAddressOutOfRange, ss.str());
- }
- mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(relative_offset);
- }
- break;
- case OpCodeFormat::PageZeroArg:
- if (generate) {
- if ((arg1 & (~0b111000)) != 0) {
- std::stringstream ss;
- ss << "Zero page address must be 0, 8, 16, 24, 32, 40, 48, 56. Argument was evaluated to " << arg1 << ".";
- report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresZeroPageArgument, ss.str());
- }
- mutable_opcode_data[0] |= static_cast<uint8_t>(mutable_opcode_data[0] | arg1);
- }
- break;
- }
- }
-
- if (generate) {
- for(decltype(opcode_data.total_size) i = 0; i < opcode_data.total_size; ++i) {
- _section->generated_data(ending_instruction).push_back(mutable_opcode_data[i]);
- }
- }
- _program_counter.integer_value += static_cast<int32_t>(opcode_data.total_size);
-
- return t;
-}
-
-#endif
-
bool Assembler::resolve_type_hash(bool generate, const TypeReference &type_reference, const SourceLocation &source_location, uint64_t &type_hash)
{
switch (type_reference.type) {
@@ 1385,9 852,10 @@ const SyntaxToken *Assembler::parse_defi
return t;
}
- if (!is_fixed_array)
+ if (!is_fixed_array) {
array_length = defined_array_elements;
-
+ }
+
success = true;
return t;
}
@@ 1575,9 1043,13 @@ const SyntaxToken *Assembler::parse_defi
// set the value base and offset (this will enable use of offsetof() and further offsetting
- value.offset_base = _program_counter.integer_value;
+ int32_t program_counter_before = _program_counter.integer_value;
+ value.offset_base = program_counter_before;
value.offset = 0;
+ auto &data = _section->generated_data();
+ size_t data_size_before = data.size();
+
bool is_array_type = define.type_reference.array_flag != ArrayFlag::Single;
bool success;
//t = parse_define_data(generate, success, t, array_updated_type, find_type(value.type_hash), is_array_type, array_length);
@@ 1597,6 1069,16 @@ const SyntaxToken *Assembler::parse_defi
return skip_to_end_of_define(t);
}
+ if (generate && _hex_source_writer != nullptr) {
+ uint32_t generated_size = static_cast<uint32_t>(data.size() - data_size_before);
+ uint32_t end_line = define.end_source_location.row + 1;
+ // if for some reason the end of the define is in a separate file, just print one line
+ if (define.source_location.file_index != define.end_source_location.file_index) {
+ end_line = define.source_location.file_index + 1;
+ }
+ _hex_source_writer->write_data(static_cast<uint32_t>(program_counter_before), &data[data_size_before], generated_size, define.source_location.file_index, define.source_location.row, end_line);
+ }
+
t = skip_to_end_of_define(t);
return t;
}
@@ 2401,6 1883,26 @@ const SyntaxToken *Assembler::parse_expo
}
+const SyntaxToken *Assembler::parse_processor(const SyntaxToken *t)
+{
+ ProcessorType processor_type = t->processor;
+ if (processor_type == ProcessorType::Unspecified) {
+ // pop the processor stack
+ _processor_stack.pop_back();
+ assert(!_processor_stack.empty());
+ _processor_type = _processor_stack.back();
+ _processor = _catalogue.processor(_processor_type);
+
+ } else {
+ _processor_type = processor_type;
+ _processor_stack.push_back(_processor_type);
+ _processor = _catalogue.processor(_processor_type);
+ }
+
+ return consume_next_token();
+}
+
+
const SyntaxToken *Assembler::parse_subroutine(bool generate, const SyntaxToken *t, bool export_enabled)
{
const SubroutineToken &subroutine = *static_cast<const SubroutineToken *>(t);
@@ 2472,6 1974,32 @@ const SyntaxToken *Assembler::parse_incb
t = consume_next_token(); // statement token
+ // parse filename
+ assert(t->type == SyntaxTokenType::Expression);
+ const ExpressionToken &filename_expression = *static_cast<const ExpressionToken *>(t);
+ const Value filename_value = evaluate_expression(generate, t);
+ t = consume_next_token();
+
+ if (!is_string(filename_value)) {
+ if (generate) {
+ std::stringstream ss;
+ ss << "Incbin expects a filename string but got " << to_string(filename_value.type) << ".";
+ report_error(filename_expression.source_location, AssemblyErrorCodes::ExpectedStringArgument, ss.str());
+ }
+ if (incbin.has_start_offset) {
+ // consume max size expression
+ t = consume_next_token();
+ }
+ if (incbin.has_max_size) {
+ // consume max size expression
+ t = consume_next_token();
+ }
+ return t;
+ }
+
+ std::string_view filename = dereference_string(filename_value);
+ uint64_t handle = _data_reader.queue_load(filename);
+
int32_t offset = 0;
int32_t max_size = std::numeric_limits<int32_t>::max();
@@ 2515,24 2043,58 @@ const SyntaxToken *Assembler::parse_incb
}
}
- try {
- int64_t data_size = static_cast<int64_t>(_data_reader.size(incbin.load_handle));
- int64_t data_start = clamp_by_value<int64_t>(static_cast<int64_t>(offset), core::Range<int64_t>{0LL, data_size});
- int64_t data_end = clamp_by_value<int64_t>(static_cast<int64_t>(offset) + max_size, core::Range<int64_t>{0LL, data_size});
- int32_t inserted_size = static_cast<int32_t>(data_end - data_start);
- _program_counter.integer_value += inserted_size;
+ size_t data_size = 0;
+ std::string error;
+ if (!_data_reader.size(handle, data_size, error)) {
if (generate) {
- const std::vector<uint8_t> &data = _data_reader.data(incbin.load_handle);
- auto §ion_data = _section->generated_data(Section::Contents::DataDefinition);
- section_data.insert(section_data.end(), data.begin() + data_start, data.begin() + data_end);
+ report_error(filename_expression.source_location, AssemblyErrorCodes::FailedToIncludeBinary, error.c_str());
}
- } catch (Exception &e) {
- // rethrow with link to problem
- throw AssemblyException(_used_files, incbin.source_location, AssemblyErrorCodes::FailedToIncludeBinary, e.message.c_str());
+ return t;
+ }
+
+ int64_t data_start = clamp_by_value<int64_t>(static_cast<int64_t>(offset), core::Range<int64_t>{0LL, static_cast<int64_t>(data_size)});
+ int64_t data_end = clamp_by_value<int64_t>(static_cast<int64_t>(offset) + max_size, core::Range<int64_t>{0LL, static_cast<int64_t>(data_size)});
+ int32_t inserted_size = static_cast<int32_t>(data_end - data_start);
+ _program_counter.integer_value += inserted_size;
+
+ if (generate) {
+ const std::vector<uint8_t> *data = nullptr;
+ if (!_data_reader.data(handle, data, error)) {
+ report_error(filename_expression.source_location, AssemblyErrorCodes::FailedToIncludeBinary, error.c_str());
+ return t;
+ }
+
+ auto §ion_data = _section->generated_data(Section::Contents::DataDefinition);
+ section_data.insert(section_data.end(), data->begin() + data_start, data->begin() + data_end);
}
return t;
}
+const SyntaxToken *Assembler::parse_include(bool generate, const SyntaxToken *t)
+{
+ t = consume_next_token(); // statement token
+
+ // parse filename
+ assert(t->type == SyntaxTokenType::Expression);
+ const ExpressionToken &filename_expression = *static_cast<const ExpressionToken *>(t);
+ const Value filename_value = evaluate_expression(generate, t);
+ t = consume_next_token();
+
+ if (!is_string(filename_value)) {
+ if (generate) {
+ std::stringstream ss;
+ ss << "Include expects a filename string but got " << to_string(filename_value.type) << ".";
+ report_error(filename_expression.source_location, AssemblyErrorCodes::ExpectedStringArgument, ss.str());
+ }
+ return t;
+ }
+
+ std::string_view filename_view = dereference_string(filename_value);
+ std::string filename(filename_view.data(), filename_view.size());
+ parse_file(generate, filename, &filename_expression.source_location);
+
+ return t;
+}
const SyntaxToken *Assembler::parse_statement_after_export(bool generate, const SyntaxToken *t, const SourceLocation &location)
{
@@ 2564,10 2126,11 @@ const SyntaxToken *Assembler::parse_stat
break;
case SyntaxTokenType::Instruction:
- t = parse_instruction(generate, t, export_enabled);
+ t = _processor->parse_instruction(*this, generate, t, export_enabled);
break;
case SyntaxTokenType::Incbin:
+ case SyntaxTokenType::Include:
case SyntaxTokenType::Align:
case SyntaxTokenType::Using:
case SyntaxTokenType::ForLoop:
@@ 2599,6 2162,7 @@ const SyntaxToken *Assembler::parse_stat
case SyntaxTokenType::ForLoopEnd:
case SyntaxTokenType::RepeatEnd:
case SyntaxTokenType::Return:
+ case SyntaxTokenType::Processor:
#if defined(_DEBUG)
case SyntaxTokenType::Debug:
#endif
@@ 2654,8 2218,13 @@ const SyntaxToken *Assembler::parse_stat
t = parse_export(generate, t);
break;
+ case SyntaxTokenType::Processor:
+ t = parse_processor(t);
+ break;
+
case SyntaxTokenType::Instruction:
- t = parse_instruction(generate, t, export_enabled);
+ assert(t->processor == _processor_type);
+ t = _processor->parse_instruction(*this, generate, t, export_enabled);
break;
case SyntaxTokenType::If:
@@ 2716,6 2285,10 @@ const SyntaxToken *Assembler::parse_stat
t = parse_incbin(generate, t);
break;
+ case SyntaxTokenType::Include:
+ t = parse_include(generate, t);
+ break;
+
case SyntaxTokenType::Optimize:
case SyntaxTokenType::StructDef:
case SyntaxTokenType::StructMember:
M jasm/assembling/function_pointer.h => jasm/assemble/function_pointer.h +2 -2
@@ 1,13 1,13 @@
#pragma once
-#include <assembling/value.h>
+#include <assemble/value.h>
namespace jasm {
class SINGLE_INHERITANCE Assembler;
struct ExpressionComponent;
-/// @addtogroup assembling
+/// @addtogroup assemble
/// @{
constexpr uint32_t max_fixed_number_of_function_args = 3;
M jasm/assembling/functions.cpp => jasm/assemble/functions.cpp +2 -2
@@ 1,6 1,6 @@
#include "pch.h"
-#include <assembling/functions.h>
+#include <assemble/functions.h>
namespace jasm {
@@ 63,7 63,7 @@ const FunctionDesc &function_info(Functi
return desc[static_cast<int>(type)];
}
-const std::string_view to_string(FunctionType type)
+std::string_view to_string(FunctionType type)
{
static const std::string_view names[] = {
std::string_view("sizeof"),
M jasm/assembling/functions.h => jasm/assemble/functions.h +2 -2
@@ 2,7 2,7 @@
namespace jasm {
-/// @addtogroup assembling
+/// @addtogroup assemble
/// @{
enum class FunctionType : uint8_t
@@ 63,7 63,7 @@ struct FunctionDesc {
/// Returns information about a function.
const FunctionDesc &function_info(FunctionType type);
-const std::string_view to_string(FunctionType type);
+std::string_view to_string(FunctionType type);
/// @}
M jasm/assembling/method_pointer.h => jasm/assemble/method_pointer.h +1 -1
@@ 1,6 1,6 @@
#pragma once
-#include <assembling/value.h>
+#include <assemble/value.h>
namespace jasm {
M jasm/assembling/methods.cpp => jasm/assemble/methods.cpp +3 -3
@@ 1,7 1,7 @@
#include "pch.h"
-#include <assembling/assembler_impl/assembler_impl.h>
-#include <assembling/methods.h>
+#include <assemble/assembler_impl/assembler_impl.h>
+#include <assemble/methods.h>
namespace jasm {
@@ 34,7 34,7 @@ const MethodDesc &method_info(MethodType
return desc[static_cast<int>(type)];
}
-const std::string_view to_string(MethodType type)
+std::string_view to_string(MethodType type)
{
static const std::string_view names[] = {
std::string_view("substring"),
M jasm/assembling/methods.h => jasm/assemble/methods.h +2 -2
@@ 6,7 6,7 @@ class SINGLE_INHERITANCE Assembler;
struct ExpressionComponent;
struct Value;
-/// @addtogroup assembling
+/// @addtogroup assemble
/// @{
enum class StringProperties : uint8_t
@@ 89,7 89,7 @@ struct MethodDesc {
/// Returns information about a function.
const MethodDesc &method_info(MethodType type);
-const std::string_view to_string(MethodType type);
+std::string_view to_string(MethodType type);
/// @}
M jasm/assembling/scope_counter.h => jasm/assemble/scope_counter.h +1 -1
@@ 2,7 2,7 @@
namespace jasm {
-/// @addtogroup assembling
+/// @addtogroup assemble
/// @{
template<typename T>
M jasm/assembling/symbol_environment.cpp => jasm/assemble/symbol_environment.cpp +1 -1
@@ 1,6 1,6 @@
#include "pch.h"
-#include <assembling/symbol_environment.h>
+#include <assemble/symbol_environment.h>
#include <core/strings/murmur_hash.h>
namespace jasm
M jasm/assembling/symbol_environment.h => jasm/assemble/symbol_environment.h +2 -2
@@ 1,13 1,13 @@
#pragma once
-#include <parsing/source_location.h>
#include <core/collections/hash_map.h>
#include <core/collections/null_hash_compare.h>
+#include <tokenize/source_location.h>
namespace jasm
{
-/// @addtogroup assembling
+/// @addtogroup assemble
/// @{
/// This contains the connection between a local name and a combined name.
M jasm/assembling/type_description.h => jasm/assemble/type_description.h +3 -3
@@ 1,8 1,8 @@
#pragma once
-#include <assembling/value.h>
-#include <parsing/syntax_parser.h>
-#include <parsing/tokenizer.h>
+#include <assemble/value.h>
+#include <syntax/syntax_parser.h>
+#include <tokenize/tokenizer.h>
namespace jasm
{
M jasm/assembling/value.cpp => jasm/assemble/value.cpp +2 -2
@@ 1,6 1,6 @@
#include "pch.h"
-#include <assembling/value.h>
+#include <assemble/value.h>
#include <core/exceptions/exception.h>
#include <cstring>
#include <strings/string_repository.h>
@@ 9,7 9,7 @@ namespace jasm {
using namespace core;
-const std::string_view to_string(ValueType type)
+std::string_view to_string(ValueType type)
{
assert(type < ValueType::NumTypes);
switch(type)
M jasm/assembling/value.h => jasm/assemble/value.h +7 -7
@@ 1,14 1,14 @@
#pragma once
#include <algorithm>
-#include <assembling/functions.h>
-#include <assembling/methods.h>
-#include <assembling/symbol_environment.h>
+#include <assemble/functions.h>
+#include <assemble/methods.h>
+#include <assemble/symbol_environment.h>
#include <core/collections/split_vector.h>
#include <core/strings/murmur_hash.h>
-#include <parsing/storage_type.h>
-#include <parsing/token_chain.h>
-#include <parsing/types.h>
+#include <syntax/storage_type.h>
+#include <tokenize/types.h>
+#include <utility/token_chain.h>
namespace jasm
{
@@ 69,7 69,7 @@ enum class ValueType : uint8_t
};
-const std::string_view to_string(ValueType type);
+std::string_view to_string(ValueType type);
/// This is the POD part of a value.
struct StaticValue
M +2 -0
@@ 4,6 4,8 @@
<title>jAsm Documentation</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="description" content="This is the documentation for the jAsm assembler.">
<meta name="keywords" content="jAsm,6502,z80,assembler,asm,cross-assembler">
<meta name="author" content="Jonas Hultén">
<link rel="shortcut icon" href="images/favicon.ico">
<link href="jasm.css" rel="stylesheet">
</head>
M jasm/docs/jasm.md +187 -57
@@ 42,22 42,24 @@ This documentation covers the language a
* [Compiling jAsm](#compiling-jasm)
* [Fetching Source Code](#fetching-source-code)
* [Compiling Using CMake](#compiling-using-cmake)
- * [Compiling Using Visual Studio](#compiling-using-vs)
* [Starting jAsm](#starting-jasm)
* [Bank Mode](#bank-mode)
* [Predefined Constants](#predefined-constants)
* [Symbol Dumps](#symbol-dumps)
+ * [Hex Output](#hex-output)
* [Binary Header](#binary-header)
* [Include Paths](#include-paths)
* [Max Errors](#max-errors)
* [Output Files and Sections](#output-files-and-sections)
- * [Verboseness](#verboseness)
+ * [Default Processor](#default-processor)
* [Pseudo Instructions](#pseudo-instructions)
* [6502 Pseudo Instructions](#6502-pseudo-instructions)
* [Z80 Pseudo Instructions](#z80-pseudo-instructions)
+ * [Verboseness](#verboseness)
* [Return Codes](#return-codes)
* [Language Reference](#language-reference)
* [Input Format](#input-format)
+ * [Selecting Processor](#selecting-processor)
* [Comments](#comments)
* [Assembler Instruction Syntax](#assembler-instruction-syntax)
* [Constants](#constants)
@@ 122,6 124,11 @@ jAsm supports all regular instructions o
lda #0
sta $d020
+Due to the large amount of source code with upper case instruction keywords, a python script is provided to convert upper case keywords in all .asm files in a directory. Run that like this.
+
+ [text]
+ python3 tools/convert_6502_keyword_case.py <my_source_directory>
+
<div id="z80"></div>
## Z80
@@ 131,10 138,10 @@ jAsm supports all regular instructions o
ld a, 0
ld (hl), a
-Due to the large amount of source code with upper case instruction keywords, a python script is provided to convert upper case keywords in all .asm files in a directory. Run that like this.
+There's also a script to convert Z80 uppercase keywords to lowercase. Run that like this.
[text]
- python3 jasm-z80/convert_z80_keyword_case.py <my_source_directory>
+ python3 tools/convert_z80_keyword_case.py <my_source_directory>
<div id="starter-guide"></div>
# Starter Guide
@@ 144,6 151,8 @@ Due to the large amount of source code w
We'll start by creating a small program in a text file.
[6502]
+ processor "6502"
+
section code, "main", $8000
{
inc $d020
@@ 153,7 162,7 @@ We'll start by creating a small program
Save this to a file named main.jasm. Use utf-8 format, because this is what jAsm expects. 7-bit ASCII is also ok since that is compatible with the utf-8 format. Now we'll assemble it into a binary. Open a command line window and change the current directory to where the main.jasm file is. Type this on the command line.
[text]
- jasm-6502 -hla main.jasm main.prg
+ jasm -hla main.jasm main.prg
Now you have a program that changes the border color on a Commodore 64. Load it into an emulator or onto a real machine.
@@ 173,6 182,8 @@ The border color changes.
If you want to start it on a Commodore 64 with a BASIC line, you need to add the necessary data to produce a SYS line at the BASIC start. This is specific to the Commodore BASIC v2. This example shows how to do that in jAsm.
[6502]
+ processor "6502"
+
section code, "main", $0801
{
define word = .next_basic_line // next BASIC line
@@ 200,6 211,8 @@ Stuff written after `[6502]|//` are comm
This BASIC line thing will be used a lot in programs since almost all programs loaded from disk will need it. Let's break out this code into a handy macro that we can reuse. The macro will need two arguments, one is the line number and one is the address to start the program from.
[6502]
+ processor "6502"
+
macro basic_sys_line(.line_number, .sys_address)
{
define word = .next_basic_line // next BASIC line
@@ 230,6 243,8 @@ The main section of our example looks a
Move the macro code into a file called macros.jasm and place it where main.jasm lies. We can now include the macros in main.jasm.
[6502]
+ processor "6502"
+
include "macros.jasm"
section code, "main", $0801
@@ 247,6 262,8 @@ Move the macro code into a file called m
The border color changing address isn't exactly self explanatory. The BASIC start address is also a naked constant that isn't exactly self explained. Let's make this a bit better.
[6502]
+ processor "6502"
+
include "macros.jasm"
const BASIC_START = $0801
@@ 264,6 281,8 @@ The border color changing address isn't
I use uppercase characters for fixed address constants (basically any naked constant) to make it easy to identify them. `[6502]|BASIC_START` and `[6502]|BORDER_COLOR` can now be used instead of the naked constants. Let's move the constants out into their own file as well. Call this c64.jasm since they describe constants specific to Commodore 64. We'll include this as well in the program.
[6502]
+ processor "6502"
+
include "macros.jasm"
include "c64.jasm"
@@ 288,6 307,8 @@ Now, what if we wanted to port this to V
Now, what we need is a way to include either the c64.jasm or vic20.jasm file based on an option somewhere. Let's add the selection first.
[6502]
+ processor "6502"
+
include "macros.jasm"
if (C64_BUILD) {
include "c64.jasm"
@@ 310,8 331,8 @@ Now, what we need is a way to include ei
The `[6502]|if` statement wants a boolean expression within the parentheses and if true the first block of code is used, otherwise the second block is used. We can feed constants from the command line to solve this. The command line option is `[text]|-d` and it needs to be followed by an assignment. In this case we want to assign `[6502]|C64_BUILD` to `[6502]|true` or `[6502]|false`.
[text]
- jasm-6502 -d C64_BUILD=true main.jasm main.prg
- jasm-6502 -d C64_BUILD=false main.jasm main.prg
+ jasm -d C64_BUILD=true main.jasm main.prg
+ jasm -d C64_BUILD=false main.jasm main.prg
<div id="starter-guide-definining-data"></div>
## Defining Data
@@ 324,6 345,8 @@ Let's try a hello world example. We'll d
Now we'll add the loop to print the text.
[6502]
+ processor "6502"
+
include "macros.jasm"
include "c64.jasm"
@@ 352,6 375,8 @@ The define now has a name before the equ
This works but is hard to read. It isn't obvious where the loop starts and ends unless we read the instructions. Let's improve it using indentation.
[6502]
+ processor "6502"
+
include "macros.jasm"
include "c64.jasm"
@@ 378,6 403,8 @@ This works but is hard to read. It isn't
This is better but can be improved further. jAsm supports an automatic `[6502]|@loop` label at the beginning of a scope defined by curly braces.
[6502]
+ processor "6502"
+
include "macros.jasm"
include "c64.jasm"
@@ 407,6 434,8 @@ It's now much easier to read the loop an
If we want to print more text we need to move the loop into a subroutine which can be called with a jsr instruction and some parameters in registers.
[6502]
+ processor "6502"
+
include "macros.jasm"
include "c64.jasm"
@@ 450,10 479,10 @@ If we want to print more text we need to
`[6502]|*` in the subroutine represents the current program counter. `[6502]|* + 1` points one byte into the next instruction, which is where the instruction argument is. All is well, except that it doesn't assemble!
[text]
- main.jasm(23,7) : Error 3004 : Reference to undefined symbol .addr
- main.jasm(24,7) : Error 3004 : Reference to undefined symbol .addr
- main.jasm(24,13) : Error 3000 : Operator + is not defined for left hand side unknown type.
- main.jasm(25,7) : Error 3004 : Reference to undefined symbol .size
+ main.jasm(25,7) : Error 3004 : Reference to undefined symbol .addr
+ main.jasm(26,7) : Error 3004 : Reference to undefined symbol .addr
+ main.jasm(26,13) : Error 3000 : Operator + is not defined for left hand side unknown type.
+ main.jasm(27,7) : Error 3004 : Reference to undefined symbol .size
<div id="starter-guide-declaring-symbols"></div>
## Declaring Symbols
@@ 463,6 492,8 @@ There is something wrong with `[6502]|.a
To solve this we can declare the symbol names in the subroutine scope but define the constants inside the loop. This is the working subroutine.
[6502]
+ processor "6502"
+
// -> xa: address to text
// -> y: size of text
subroutine print_text
@@ 492,6 523,8 @@ To solve this we can declare the symbol
There is a more intuitive way to declare the `[6502]|.addr` and `[6502]|.size` addresses. Instruction data labels can point directly to the instruction argument by placing a label definition between the instruction and the argument.
[6502]
+ processor "6502"
+
// -> xa: address to text
// -> y: size of text
subroutine print_text
@@ 519,6 552,8 @@ There is a more intuitive way to declare
This subroutine can be reused so let's move it to its own file. Name a new file screen\_io.jasm and paste the subroutine into it. Now we'll modify the main file to include this new file. Note that we now must include the file inside the section because otherwise generated code or data would lie outside any section and that isn't allowed. Only code sections can contain code or data. The other include files only contain constant definitions and macros and they don't directly produce any code or data themselves. That's why they can be outside a section.
[6502]
+ processor "6502"
+
include "macros.jasm"
include "c64.jasm"
@@ 544,6 579,8 @@ This subroutine can be reused so let's m
Self modifying code is handy and can improve efficiency but it doesn't work if the code is in a cartridge ROM, because it can't be modified. Let's try modifying the code to use the zero page instead. To do this we need to reserve some space for variables in the zero page area. This is done with a bss section. BSS stands for "Block Started by Symbol" and means a static memory block that is part of the program, but without its content stored in the executable file. The bss section doesn't generate any code or data, it just reserves uninitialized space. I reserved the last 5 bytes in the zero page area from $fb to, but not including, $100.
[6502]
+ processor "6502"
+
include "macros.jasm"
include "c64.jasm"
@@ 600,6 637,8 @@ Now we need to modify the print subrouti
It would also be nice to avoid having to specify the length of the string when printing it. The code became a bit kludgy when swapping registers. We can solve this by removing the need for the size argument. If we zero terminate the string we can get rid of it (or swap argument registers).
[6502]
+ processor "6502"
+
include "macros.jasm"
include "c64.jasm"
@@ 653,6 692,8 @@ One thing that isn't really great is tha
This is what main.jasm looks like after the change.
[6502]
+ processor "6502"
+
include "macros.jasm"
include "c64.jasm"
@@ 753,6 794,8 @@ The reference to the print subroutine mu
If `[6502]|print_text` is used a lot in one place it is also possible to specify that a namespace should be used in a scope. As long as other names don't start to collide, this is just as good.
[6502]
+ processor "6502"
+
include "macros.jasm"
include "c64.jasm"
@@ 822,7 865,7 @@ Accessing the print\_text subroutine in
jAsm can assist debugging in the VICE emulator by exporting the names of addresses for use in the emulator. Add [--dump-vice-symbols](#symbol-dumps) and a filename to the command line arguments to export this information.
[text]
- jasm-6502 --dump-vice-symbols main.vs main.jasm main.prg
+ jasm --dump-vice-symbols main.vs main.jasm main.prg
Now, a symbol file will be created called `[text]|main.vs`. Let's start the emulator (install it first if you don't have it) and use the file.
@@ 934,12 977,12 @@ Now you know the basics of jAsm and shou
<div id="fetching-source-code"></div>
## Fetching Source Code
-You need to fetch the source code from BitBucket to get started. If you have a command line Mercurial client you can clone the repository like this.
+You need to fetch the source code from SourceHut to get started. If you have a command line Mercurial client you can clone the repository like this.
[text]
- hg clone ssh://hg@bitbucket.org/bjonte/jasm
-
-jAsm compiles using CMake and Clang or using Code::Blocks or Visual Studio.
+ hg clone https://hg.sr.ht/~bjonte/jasm
+
+jAsm compiles using CMake and Clang.
<div id="compiling-using-cmake"></div>
## Compiling Using CMake
@@ 952,7 995,7 @@ To build with CMake you need CMake 3.5,
Clone the repository into a directory called 'jasm' and build it like this.
[text]
- hg clone ssh://hg@bitbucket.org/bjonte/jasm
+ hg clone https://hg.sr.ht/~bjonte/jasm
cd jasm
export CXX=/usr/bin/clang++
mkdir build
@@ 968,26 1011,19 @@ If you want to cross compile binaries fo
Cross compile like this.
[text]
- hg clone ssh://hg@bitbucket.org/bjonte/jasm
+ hg clone https://hg.sr.ht/~bjonte/jasm
cd jasm
mkdir build
cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../win64_cross_compile_toolchain.txt -DCMAKE_BUILD_TYPE=Release ..
make
-You will find the binaries in build/jasm-6502 and build/jasm-z80. You will also need the MingW dynamic link libraries found here in Linux Mint.
+You will find the binaries in build/jasm. You will also need the MingW dynamic link libraries found here in Linux Mint.
[text]
/usr/lib/gcc/x86_64-w64-mingw32/7.3-win32/libgcc_s_seh-1.dll
/usr/lib/gcc/x86_64-w64-mingw32/7.3-win32/libstdc++-6.dll
-<div id="compiling-using-vs"></div>
-## Compiling Using Visual Studio
-
-<i>The Visual Studio solution is no longer actively maintained and may not work. Cross compiling on Linux is the supported method to build Windows binaries.</i>
-
-Download Visual Studio 2015 from www.microsoft.com and install it. Double click on the `[text]|jasm.sln` file to open the project. Select the `[text]|Release` configuration and build the solution. You will get a `[text]|jasm.exe` binary in `[text]|x64\Release`.
-
<div id="starting-jasm"></div>
# Starting jAsm
@@ 996,12 1032,7 @@ Download Visual Studio 2015 from www.mic
jAsm is a command line tool. It will print its arguments if started without any. Basically it needs an input file and an output file.
[text]
- jasm-6502 input.jasm output.bin
-
-If you are assembling for Z80, use that version of the assembler instead.
-
- [text]
- jasm-z80 input.jasm output.bin
+ jasm input.jasm output.bin
There are some flags to tweak how the assembler behaves.
@@ 1011,7 1042,7 @@ There are some flags to tweak how the as
When working with several memory banks it is handy to place them after each other in memory. That way it is possible to check which bank code or data belongs to just looking at the address. For example, cartridge bank 0 could be located at $08000-$0a000 and bank 1 at $18000-$1a000. However, jAsm will generate an error when trying to reference bank 1 in data definitions or instructions because the addresses exceeds 16 bits. This can be overridden with the `[text]|--bank-mode` flag, which automatically truncates long addresses.
[text]
- jasm-6502 --bank-mode input.jasm output.bin
+ jasm --bank-mode input.jasm output.bin
_A shortcut alternative is `[text]|-bm`._
@@ 1023,8 1054,8 @@ This also have implications on the high
You can instruct the assembler to create some initial constants that can be accessed in the source code with the `[text]|--define` flag.
[text]
- jasm-6502 --define INFINITE_LIVES=true --define STARTING_LIVES=3 input.jasm output.bin
- jasm-6502 --define DEFAULT_NAME=bobo input.jasm output.bin
+ jasm --define INFINITE_LIVES=true --define STARTING_LIVES=3 input.jasm output.bin
+ jasm --define DEFAULT_NAME=bobo input.jasm output.bin
You can feed it with integers, booleans and strings, like in the examples above.
@@ 1047,31 1078,66 @@ The constants and variables in the assem
Dump jAsm symbols like this.
[text]
- jasm-6502 --dump-symbols symbols.txt input.jasm output.bin
+ jasm --dump-symbols symbols.txt input.jasm output.bin
_A shortcut alternative is `[text]|-ds`._
Dump VICE symbols like this.
[text]
- jasm-6502 --dump-vice-symbols symbols.vs input.jasm output.bin
+ jasm --dump-vice-symbols symbols.vs input.jasm output.bin
_A shortcut alternative is `[text]|-dv`._
Dump No$GBA symbols like this.
[text]
- jasm-6502 --dump-gba-symbols symbols.sym input.jasm output.bin
+ jasm --dump-gba-symbols symbols.sym input.jasm output.bin
_A shortcut alternative is `[text]|-dg`._
+<div id="hex-output"></div>
+## Hex Output
+
+The assembled program can be written as a hex file interleaved with embedded source lines that produced the output to help understanding what the assembler produced.
+
+Write hex output like this.
+
+ [text]
+ jasm --dump-hex hex_output.txt input.jasm output.bin
+
+_A shortcut alternative is `[text]|-dh`._
+
+The file will output all source lines that generate data. The first column is the program counter, then up to four columns of binary data. This is followed by a line number and then the source code that produced the generated data.
+
+ [text]
+ ./source/main_loop.jasm
+ --------------------------------------------------------------------------------
+ 0400: 20 17 04 7: jsr setup_cpu
+ 8:
+ 0403: 20 46 04 9: jsr blank_screen
+ 10:
+ 0406: 20 00 1f 11: jsr mmu::setup
+ 0409: 20 6b 04 12: jsr init_reset_vector
+
+When the source file changes, the file name and a line with dashes will be added. In case there is a longer jump in line numbers or a jump backwards, a partially dashed line is printed.
+
+ [text]
+ 046b: ad 06 d5 51: lda MMURCR
+ 046e: 48 52: pha
+ -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
+ 046f: ad 06 d5 67: lda MMURCR
+ 0472: 29 f7 68: and #~MMURCR_COMMON_TOP
+ 0474: 09 04 69: ora #MMURCR_COMMON_BOTTOM
+ 0476: 8d 06 d5 70: sta MMURCR
+
<div id="binary-header"></div>
## Binary Header
By default, jAsm outputs only the binary data without any header. To generate a program file for Commodore 64 that can be loaded from BASIC, a two byte header must be added containing the load address in little endian format. You can add this header using `[text]|--header-little-endian-address`.
[text]
- jasm-6502 --header-little-endian-address input.jasm output.prg
+ jasm --header-little-endian-address input.jasm output.prg
_A shortcut alternative is `[text]|-hla`._
@@ 1081,7 1147,7 @@ By default, jAsm outputs only the binary
You can add include paths using the `[text]|--include-dir` flag. jAsm will look in these for included files.
[text]
- jasm-6502 --include-dir some/dir --include-dir other/dir input.jasm output.bin
+ jasm --include-dir some/dir --include-dir other/dir input.jasm output.bin
_A shortcut alternative is `[text]|-i`._
@@ 1091,7 1157,7 @@ You can add include paths using the `[te
With the `[text]|--max-errors` flag, you can specify the number of errors that will be printed before jAsm stops assembling.
[text]
- jasm-6502 --max-errors 4 input.jasm output.bin
+ jasm --max-errors 4 input.jasm output.bin
_A shortcut alternative is `[text]|-me`._
@@ 1101,37 1167,36 @@ With the `[text]|--max-errors` flag, you
The default output mode will merge all code sections into one big binary and pad the inbetween space with zero. With the flag `[text]|--output-multiple-files`, this can be changed to store one file per section instead. Each file will be named after the output file but add the section name before the file extension.
[text]
- jasm-6502 --output-multiple-files input.jasm output.bin
+ jasm --output-multiple-files input.jasm output.bin
_A shortcut alternative is `[text]|-om`._
You can choose to have jAsm name the files after the sections by not specifying an output file name.
[text]
- jasm-6502 --output-multiple-files input.jasm
+ jasm --output-multiple-files input.jasm
You may want to add an extension to the section names when using them as file names. Use the option `[text]|--file-extension` to do that.
[text]
- jasm-6502 --output-multiple-files --file-extension prg input.jasm
+ jasm --output-multiple-files --file-extension prg input.jasm
_A shortcut alternative is `[text]|-ext`._
-<div id="verboseness"></div>
-## Verboseness
-
-jAsm supports several levels of output during assembly. This is controlled by the `[text]|-v0`, `[text]|-v1`, `[text]|-v2` and `[text]|-v3` flags.
+<div id="default-processor"></div>
+## Default Processor
+
+You can set the default processor to use when assembling the source code using the option `[text]|--processor`. If you do this you won't need to specify the processor in the source code, unless you need to switch it.
[text]
- jasm-6502 -v2 input.jasm output.bin
-
-<table>
- <tr><th>Flag</th><th>Meaning</th></tr>
- <tr><td><code>[text]|-v0</code></td><td>Show errors</td></tr>
- <tr><td><code>[text]|-v1</code></td><td>Show errors and warnings</td></tr>
- <tr><td><code>[text]|-v2</code></td><td>Show errors, warnings, printouts and general information</td></tr>
- <tr><td><code>[text]|-v3</code></td><td>Show errors, warnings, general information and debugging information</td></tr>
-</table>
+ jasm --processor 6502 input.jasm output.bin
+
+or
+
+ [text]
+ jasm --processor z80 input.jasm output.bin
+
+_A shortcut alternative is `[text]|-p`._
<div id="pseudo-instructions"></div>
## Pseudo Instructions
@@ 1166,6 1231,22 @@ These are the pseudo instructions for Z8
They are implemented using two instructions under the hood. First the high register part is loaded and then the low.
+<div id="verboseness"></div>
+## Verboseness
+
+jAsm supports several levels of output during assembly. This is controlled by the `[text]|-v0`, `[text]|-v1`, `[text]|-v2` and `[text]|-v3` flags.
+
+ [text]
+ jasm -v2 input.jasm output.bin
+
+<table>
+ <tr><th>Flag</th><th>Meaning</th></tr>
+ <tr><td><code>[text]|-v0</code></td><td>Show errors</td></tr>
+ <tr><td><code>[text]|-v1</code></td><td>Show errors and warnings</td></tr>
+ <tr><td><code>[text]|-v2</code></td><td>Show errors, warnings, printouts and general information</td></tr>
+ <tr><td><code>[text]|-v3</code></td><td>Show errors, warnings, general information and debugging information</td></tr>
+</table>
+
<div id="return-codes"></div>
## Return Codes
@@ 1183,6 1264,55 @@ This section documents the entire syntax
jAsm uses Unicode utf-8 encoded text files only. If you provide something that can't be interpreted as utf-8, an error will be returned.
+<div id="selecting-processor"></div>
+
+To assemble instructions jAsm needs to know what processor to target. This is done by either specifying the processor using [command line flags](#default-processor) or by a keyword in the source code. Specify the processor in a source file like this.
+
+ [6502]
+ processor "6502"
+
+After this statement, the assembler can handle 6502 processor instructions. You can switch processor in a source file several times.
+
+ [6502]
+ processor "6502"
+ rts
+ processor "z80"
+ ret
+
+It is also possible to momentarily change the processor and switch back to whatever it was before. The `[6502]|processor pop` statement is used to change back to the previously set processor.
+
+ [6502]
+ processor "6502"
+ rts
+ processor "z80"
+ ret
+ processor pop
+ rts
+ processor pop
+
+Included files inherit the processor from the file with the include statement but the processor set in the included file won't affect the file where the include statement is.
+
+Suppose we have a file named test.jasm:
+
+ [z80]
+ // processor 6502 inherited from main.jasm
+ rts
+
+ processor "z80"
+ // processor is now set to z80
+ ret
+
+and a file named main.jasm:
+
+ [6502]
+ processor "6502"
+ // processor is now set to 6502
+ include "test.jasm"
+ // processor is still 6502
+ lda #0
+
+When including `[text]|test.jasm`, the `[6502]|rts` instruction is assembled using 6502 because it was inherited from `[text]|main.jasm`. The `[z80]|ret` instruction is assembled as z80 since the processor was changed in the included file before the instruction. After the included file the processor is 6502 since the included file won't affect the file it is included from.
+
<div id="comments"></div>
## Comments
M jasm/docs/syntax_highlight.py +1 -1
@@ 43,7 43,7 @@ def replace_with_token(text, replace_map
return text
def syntax_highlight_code(code_class, text):
- keywords = re.compile(r'\b(address|align|basic|bss|byte|code|const|declare|define|dynamic|elif|else|enum|export|fill|for|function|if|import|incbin|include|long|macro|mapping|module|namespace|optimize|part|reserve|return|section|struct|subroutine|using|var|word)\b')
+ keywords = re.compile(r'\b(address|align|basic|bss|byte|code|const|declare|define|dynamic|elif|else|enum|export|fill|for|function|if|import|incbin|include|long|macro|mapping|module|namespace|optimize|part|pop|processor|reserve|return|section|struct|subroutine|using|var|word)\b')
functions = re.compile(r'\b(abs|acos|asin|atan|atan2|ceil|clamp|cos|cosh|degrees|exp|float|floor|format|hexstring|int|lerp|log|log10|logn|max|min|modulo|offsetof|pow|print|radians|remainder|round|select|sin|sinh|sizeof|sqrt|static_assert|string|symbol|tan|tanh|unicode|uppercase|lowercase)\b')
instructions_6502 = re.compile(r'\b(adc|and|asl|bcc|bcs|beq|bit|bmi|bne|bpl|brk|bvc|bvs|clc|cld|cli|clv|cmp|cpx|cpy|dec|dex|dey|eor|inc|inx|iny|jmp|jsr|lda|ldx|ldy|lsr|nop|ora|pha|php|pla|plp|rol|ror|rti|rts|sbc|sec|sed|sei|sta|stx|sty|tax|tay|tsx|txa|txs|tya|bhs|blt)\b')
instructions_z80 = re.compile(r'\b(adc|add|and|bit|call|ccf|cp|cpd|cpdr|cpi|cpir|cpl|daa|dec|di|djnz|ei|ex|exx|halt|im|in|inc|ind|indr|ini|inir|jp|jr|ld|ldd|lddr|ldi|ldir|neg|nop|or|otdr|otir|out|outd|outi|pop|push|res|ret|reti|retn|rl|rla|rlc|rlca|rld|rr|rra|rrc|rrca|rrd|rst|sbc|scf|set|sla|sra|srl|sub|xor)\b')
M jasm/environment/command_line_args.cpp +44 -6
@@ 8,6 8,7 @@ using namespace core;
CommandLineArgs::CommandLineArgs(std::string version)
: verbose_level(ErrorLevel::Errors)
+ , default_processor(ProcessorType::Unspecified)
, max_errors(20)
, multiple_output_files(false)
, section_names_as_file_names(false)
@@ 44,14 45,14 @@ bool CommandLineArgs::parse(int argc, co
}
// verify that there is exactly one '=' in that string
std::string define = argv[i];
- if (std::count(define.begin(), define.end(), L'=') != 1) {
+ if (std::count(define.begin(), define.end(), '=') != 1) {
error() << arg << " option must have NAME=VALUE argument\n";
print_usage(argv[0]);
return false;
}
// split string on equal sign
- auto equal_position = std::find(define.begin(), define.end(), L'=');
+ auto equal_position = std::find(define.begin(), define.end(), '=');
std::string symbol(define.begin(), equal_position);
std::string value_str(equal_position + 1, define.end());
bool boolean_true = value_str == "true";
@@ 104,6 105,15 @@ bool CommandLineArgs::parse(int argc, co
}
gba_sym_dump_file = argv[i];
+ } else if (arg == "-dh" || arg == "--dump-hex") {
+ ++i;
+ if (i == argc) {
+ error() << "Missing " << arg << " argument\n";
+ print_usage(argv[0]);
+ return false;
+ }
+ output_hex_file = argv[i];
+
} else if (arg == "-os" || arg == "--output-single-file") {
multiple_output_files = false;
@@ 142,6 152,26 @@ bool CommandLineArgs::parse(int argc, co
}
file_extension = argv[i];
+ } else if (arg == "-p" || arg == "--processor") {
+ ++i;
+ if (i == argc) {
+ error() << "Missing " << arg << " processor argument\n";
+ print_usage(argv[0]);
+ return false;
+ }
+ if (!is_processor(argv[i], default_processor)) {
+ error() << "Invalid processor '" << argv[i] << "' specified.\n";
+ error() << "Possible values are ";
+ for(uint8_t p = 1; p < static_cast<uint8_t>(ProcessorType::NumProcessors); ++p) {
+ if (p != 1) {
+ error() << ", ";
+ }
+ error() << to_string(static_cast<ProcessorType>(p));
+ }
+ error() << '\n';
+ return false;
+ }
+
} else if (arg == "-pi" || arg == "--pseudo-instructions") {
pseudo_instructions = true;
@@ 221,15 251,18 @@ void CommandLineArgs::print_usage(const
error() << " -d NAME=VALUE\n";
error() << " --define NAME=VALUE\n";
error() << " Add predefined symbol (integer, string or boolean value)\n\n";
+ error() << " -dg FILE\n";
+ error() << " --dump-gba-symbols FILE\n";
+ error() << " Dump No$GBA style symbols to the specified file\n\n";
+ error() << " -dh FILE\n";
+ error() << " --dump-hex FILE\n";
+ error() << " Dump hex output interleaved with source code to the specified file\n\n";
error() << " -ds FILE\n";
error() << " --dump-symbols FILE\n";
error() << " Dump symbols to the specified file\n\n";
error() << " -dv FILE\n";
error() << " --dump-vice-symbols FILE\n";
error() << " Dump VICE symbol commands to the specified file\n\n";
- error() << " -dg FILE\n";
- error() << " --dump-gba-symbols FILE\n";
- error() << " Dump No$GBA style symbols to the specified file\n\n";
error() << " -ext EXT\n";
error() << " --file-extension EXT\n";
error() << " File extension to use when naming files after sections\n\n";
@@ 241,13 274,18 @@ void CommandLineArgs::print_usage(const
error() << " Add include directory\n\n";
error() << " -me NUM\n";
error() << " --max-errors NUM\n";
- error() << " Max number of errors before aborting assembler generation pass (default is 20)\n\n";
+ error() << " Max number of errors before aborting assembler generation pass\n\n";
+ error() << " (default is 20)\n\n";
error() << " -om\n";
error() << " --output-multiple-files\n";
error() << " Assemble an output file per section\n\n";
error() << " -os\n";
error() << " --output-single-file\n";
error() << " Assemble a single output file (default)\n\n";
+ error() << " -p PROCESSOR\n";
+ error() << " --processor PROCESSOR\n";
+ error() << " Sets the default processor. This removes the need to specify the\n";
+ error() << " processor in the source code.\n\n";
error() << " -pi\n";
error() << " --pseudo-instructions\n";
error() << " Adds support for additional pseudo instructions or addressing modes to\n";
M jasm/environment/command_line_args.h +3 -0
@@ 1,6 1,7 @@
#pragma once
#include <core/environment/log.h>
+#include <processor/processor.h>
namespace jasm
{
@@ 20,11 21,13 @@ public:
std::string vice_dump_file; ///< File to dump VICE symbol commands to, or empty.
std::string gba_sym_dump_file; ///< File to dump NO$GBA style symbol to, or empty.
std::string file_extension; ///< File extension to use if files are named after sections.
+ std::string output_hex_file; ///< File to dump hex output with interleaved source to.
std::vector<std::string> include_dirs; ///< List of include directories.
std::vector<std::pair<std::string, bool>> predefined_booleans; ///< List of predefined constants and values.
std::vector<std::pair<std::string, int32_t>> predefined_integers; ///< List of predefined constants and values.
std::vector<std::pair<std::string, std::string>> predefined_strings; ///< List of predefined constants and values.
core::ErrorLevel verbose_level;
+ ProcessorType default_processor;
int32_t max_errors; ///< Max number of errors before aborting assembler generation pass.
bool multiple_output_files; ///< True means use one file per section.
bool section_names_as_file_names; ///< True means name files after sections.
M jasm/exceptions/assembly_exception.h +1 -1
@@ 2,7 2,7 @@
#include <core/exceptions/exception.h>
#include <exceptions/error_codes.h>
-#include <parsing/source_location.h>
+#include <tokenize/source_location.h>
namespace jasm
{
M jasm/exceptions/error_codes.h +10 -4
@@ 12,6 12,8 @@ namespace jasm
/// instead.
enum class AssemblyErrorCodes
{
+ Ok = 0,
+
// tokenizer
UnexpectedCharacter = 1000,
AlphaFollowingNumberLiteral,
@@ 21,13 23,13 @@ enum class AssemblyErrorCodes
TooLongCharacterConstant,
MultilineCommentWasNotTerminated,
MissingClosingStringQuote,
- StringTooLong, // not used!
+ Unused1, // not used!
IllegalCharacterInBinaryConstant,
IllegalCharacterInHexConstant,
CantFindIncludeFile,
- RecursiveIncludes,
- ExpectedPathString,
- Unused1, // not used!
+ UnmatchedProcessorPop,
+ InvalidProcessorName,
+ ExpectedProcessorNameOrPop,
FloatOutOfRange,
// syntax parser
@@ 85,6 87,8 @@ enum class AssemblyErrorCodes
SectionMappingExpectsStringLiteral,
SectionMappingHasDoubleSources,
ModulesNotAllowedInMacros,
+ IncludeNotAllowedInMacros,
+ IncludeNotAllowedInSubroutine,
// assembler
OperatorNotSupportingType = 3000,
@@ 197,6 201,8 @@ enum class AssemblyErrorCodes
TooManyArguments,
ArgumentsPassedToSubroutineCall,
UseOfPseudoInstructionInStandardMode,
+ ExpectedPathString,
+ RecursiveIncludes,
// assembler warnings
SubroutineFallthrough = 3500,
M jasm/io/data_reader.cpp +52 -25
@@ 22,8 22,12 @@ DataReader::~DataReader()
{
#if defined(USE_THREADS)
// wait for threaded operations
- for(auto &file : _files)
- file.file_request.wait();
+ for(auto &file : _files) {
+ try {
+ file.file_request.wait();
+ } catch (Exception &) {
+ }
+ }
#endif
}
@@ 58,43 62,64 @@ uint64_t DataReader::queue_load(const st
}
-size_t DataReader::size(uint64_t handle)
+bool DataReader::size(uint64_t handle, size_t &size, std::string &error)
{
HandleMap::const_iterator it = _handle_map.find(handle);
assert(it != _handle_map.end());
FileInfo &info = *it->second;
- #if defined(USE_THREADS)
- if (info.size_future.valid())
- info.size = info.size_future.get();
- #else
- if (!info.loaded) {
- load(&info);
+ if (!info.size_obtained) {
+ #if defined(USE_THREADS)
+ if (info.size_future.valid()) {
+ try {
+ info.size = info.size_future.get();
+ } catch (Exception &e) {
+ info.file_size_error = e.message;
+ }
+ }
+ #else
+ try {
+ load(&info);
+ } catch (Exception &e) {
+ info.file_size_error = e.message;
+ info.file_data_error = e.message;
+ }
info.loaded = true;
- }
- #endif
+ #endif
+ info.size_obtained = true;
+ }
- return info.size;
+ size = info.size;
+ error = info.file_size_error;
+ return info.file_size_error.empty();
}
-const std::vector<uint8_t> &DataReader::data(uint64_t handle)
+bool DataReader::data(uint64_t handle, const std::vector<uint8_t> *&data_ptr, std::string &error)
{
HandleMap::const_iterator it = _handle_map.find(handle);
assert(it != _handle_map.end());
FileInfo &info = *it->second;
- #if defined(USE_THREADS)
- if (info.data_future.valid())
- return info.data_future.get();
- #else
- if (!info.loaded) {
- load(&info);
- info.loaded = true;
+ if (!info.loaded) {
+ try {
+ #if defined(USE_THREADS)
+ if (info.data_future.valid()) {
+ info.data_future.get();
+ }
+ #else
+ load(&info);
+ #endif
+ } catch (Exception &e) {
+ info.file_data_error = e.message;
}
- #endif
+ info.loaded = true;
+ }
- return info.data;
+ bool success = info.file_data_error.empty();
+ data_ptr = success ? &info.data : nullptr;
+ error = info.file_data_error;
+ return success;
}
@@ 122,8 147,9 @@ void DataReader::load(FileInfo *file_inf
#else
#error "Platform not supported"
#endif
- if (!file.is_open())
+ if (!file.is_open()) {
throw FileException("Failed to open " + file_path);
+ }
// get file size
size = static_cast<size_t>(file.tellg());
@@ 152,12 178,13 @@ void DataReader::load(FileInfo *file_inf
file.seekg(0, std::ios::beg);
file.read(reinterpret_cast<char *>(file_info->data.data()), static_cast<int64_t>(size));
- if (!file.good())
+ if (!file.good()) {
throw FileException("Failed to read " + file_path);
+ }
#if defined(USE_THREADS)
// notify the caller of the data
- file_info->data_promise.set_value(file_info->data);
+ file_info->data_promise.set_value(&file_info->data);
#endif
#if defined(USE_THREADS)
M jasm/io/data_reader.h +20 -10
@@ 29,14 29,16 @@ public:
uint64_t queue_load(const std::string_view &filename);
/// This will block until the size has been determined and then return it or a fail state.
- /// @throw An exception is thrown if a file error occurred.
- /// @return The size of the file in bytes.
- size_t size(uint64_t handle);
+ /// @param size Will be set to the size of the file in bytes, if successful.
+ /// @param error Will be set to an error message, if not successful.
+ /// @return True if successful.
+ bool size(uint64_t handle, size_t &size, std::string &error);
/// This will block until the data has been loaded and then return it or a fail state.
- /// @throw An exception is thrown if a file error occurred.
- /// @return A pointer to the data.
- const std::vector<uint8_t> &data(uint64_t handle);
+ /// @param data_ptr Will be set to a pointer to the data, if successful.
+ /// @param error Will be set to an error message, if not successful.
+ /// @return True if successful.
+ bool data(uint64_t handle, const std::vector<uint8_t> *&data_ptr, std::string &error);
private:
struct FileInfo;
@@ 49,17 51,25 @@ private:
const std::vector<std::string> _include_dirs;
struct FileInfo {
+ FileInfo()
+ : size_obtained(false)
+ , loaded(false)
+ , size(0)
+ {}
+
std::string filename;
+ std::string file_size_error; ///< Non-empty if an error occurred while obtaining the size.
+ std::string file_data_error; ///< Non-empty if an error occurred while loading.
#if defined(USE_THREADS)
std::promise<size_t> size_promise;
std::future<size_t> size_future;
- std::promise<std::vector<uint8_t> &> data_promise;
- std::future<std::vector<uint8_t> &> data_future;
+ std::promise<std::vector<uint8_t> *> data_promise;
+ std::future<std::vector<uint8_t> *> data_future;
std::future<void> file_request;
- #else
- bool loaded;
#endif
+ bool size_obtained;
+ bool loaded;
size_t size; ///< Cached size for when future has been consumed.
std::vector<uint8_t> data;
A => jasm/io/hex_source_writer.cpp +195 -0
@@ 0,0 1,195 @@
+#include "pch.h"
+
+#include <core/exceptions/file_exception.h>
+#include <core/math/sign.h>
+#include <core/strings/utf8.h>
+#include <cstdio>
+#include <cstring>
+#include <io/hex_source_writer.h>
+#include <limits>
+#include <string_view>
+#include <core/math/sign.h>
+
+namespace jasm
+{
+
+HexSourceWriter::HexSourceWriter(const core::SplitVector<std::vector<size_t>> &file_row_locations, const std::vector<std::wstring> &file_contents, const std::vector<std::string> &used_files)
+ : _file_row_locations(file_row_locations)
+ , _file_contents(file_contents)
+ , _used_files(used_files)
+ , _current_file(std::numeric_limits<uint32_t>::max())
+ , _current_line(1)
+{
+}
+
+void HexSourceWriter::open_output(const std::string &filename)
+{
+ #if defined(_MSC_VER)
+ std::wstring wide_filename;
+ try {
+ wide_filename = core::convert_utf8_to_wide(filename);
+ } catch (Exception &e) {
+ throw FileException("Path cannot be converted to wide byte format: " + filename);
+ }
+ _output_file.open(wide_filename, std::ios::out | std::ios::trunc | std::ios::binary);
+ #elif defined(__GNUC__)
+ _output_file.open(filename, std::ios::out | std::ios::trunc | std::ios::binary);
+ #else
+ #error "Platform not supported"
+ #endif
+
+ if (!_output_file.is_open()) {
+ throw core::FileException("Failed to open " + filename);
+ }
+}
+
+void HexSourceWriter::close()
+{
+ _output_file.close();
+}
+
+void HexSourceWriter::write_data(uint32_t address, const uint8_t *data, uint32_t size, uint32_t file_index, uint32_t start_line, uint32_t end_line)
+{
+ // set current file
+ if (file_index != _current_file) {
+ change_file(file_index);
+ _current_line = start_line;
+ }
+
+ // if we're close, print intermediate source lines
+ if (start_line > _current_line && start_line - _current_line < _source_jump_threshold) {
+ while (_current_line < start_line) {
+ write_source_only();
+ ++_current_line;
+ }
+ }
+
+ if (start_line != _current_line && start_line + 1 != _current_line) {
+ _output_file << " -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --\n";
+ }
+
+ // teleport to the starting line
+ _current_line = start_line;
+
+ // print combined lines
+ while(_current_line < end_line && size > 0) {
+ uint32_t size_to_write = std::min(size, 4U);
+ write_combined(address, data, size_to_write);
+ address += size_to_write;
+ data += size_to_write;
+ size -= size_to_write;
+ ++_current_line;
+ }
+
+ // write leftover data without source
+ while(size > 0) {
+ uint32_t size_to_write = std::min(size, 4U);
+ write_data_only(address, data, size_to_write);
+ address += size_to_write;
+ data += size_to_write;
+ size -= size_to_write;
+ }
+ // write leftover source without data
+ while(_current_line < end_line) {
+ write_source_only();
+ ++_current_line;
+ }
+}
+
+void HexSourceWriter::write_source_only()
+{
+ // generate line number only
+ std::memset(_program_counter_buffer, ' ', sizeof(_program_counter_buffer));
+ std::memset(_hex_buffer, ' ', sizeof(_hex_buffer));
+ snprintf(_line_number_buffer, sizeof(_line_number_buffer), "%*u: ", _line_number_width - 2, _current_line);
+
+ _output_file
+ << std::string_view(_program_counter_buffer, _program_counter_width)
+ << std::string_view(_hex_buffer, _hex_width)
+ << std::string_view(_line_number_buffer, _line_number_width);
+
+ write_source_part();
+}
+
+void HexSourceWriter::write_combined(uint32_t address, const uint8_t *data, uint32_t size)
+{
+ constexpr bool include_row_number = true;
+ write_data_part(address, data, size, include_row_number);
+ write_source_part();
+}
+
+void HexSourceWriter::write_data_only(uint32_t address, const uint8_t *data, uint32_t size)
+{
+ constexpr bool include_row_number = false;
+ write_data_part(address, data, size, include_row_number);
+ _output_file << '\n';
+}
+
+void HexSourceWriter::write_data_part(uint32_t address, const uint8_t *data, uint32_t size, bool include_row_number)
+{
+ // generate left side
+ snprintf(_program_counter_buffer, sizeof(_program_counter_buffer), "%*.4x: ", _program_counter_width - 2, address);
+ switch (size) {
+ case 1:
+ snprintf(_hex_buffer, sizeof(_hex_buffer), "%02x ", data[0]);
+ break;
+ case 2:
+ snprintf(_hex_buffer, sizeof(_hex_buffer), "%02x %02x ", data[0], data[1]);
+ break;
+ case 3:
+ snprintf(_hex_buffer, sizeof(_hex_buffer), "%02x %02x %02x ", data[0], data[1], data[2]);
+ break;
+ case 4:
+ snprintf(_hex_buffer, sizeof(_hex_buffer), "%02x %02x %02x %02x ", data[0], data[1], data[2], data[3]);
+ break;
+ default:
+ break;
+ }
+ if (include_row_number) {
+ snprintf(_line_number_buffer, sizeof(_line_number_buffer), "%*u: ", _line_number_width - 2, _current_line);
+ } else {
+ memset(_line_number_buffer, ' ', sizeof(_line_number_buffer));
+ }
+
+ _output_file
+ << std::string_view(_program_counter_buffer, _program_counter_width)
+ << std::string_view(_hex_buffer, _hex_width)
+ << std::string_view(_line_number_buffer, _line_number_width);
+}
+
+void HexSourceWriter::write_source_part()
+{
+ // figure out start and end of source line
+ size_t start_index = _file_row_locations[_current_file][_current_line];
+ const wchar_t *start = &_file_contents[_current_file][start_index];
+ const wchar_t *end = start;
+ while(true) {
+ wchar_t c = *end;
+ if (c == 0 || c == L'\n' || c == L'\r') {
+ break;
+ }
+ ++end;
+ }
+
+ // generate utf8 string
+ std::string narrow_line = core::wide_to_utf8(std::wstring_view(start, core::unsign_cast(end - start)));
+
+ _output_file << narrow_line << '\n';
+}
+
+void HexSourceWriter::change_file(uint32_t file_index)
+{
+ _output_file
+ << '\n'
+ << _used_files[file_index]
+ << '\n'
+ << "--------------------------------------------------------------------------------"
+ << '\n';
+ _current_file = file_index;
+ _current_line = 1;
+
+
+}
+
+
+}
A => jasm/io/hex_source_writer.h +58 -0
@@ 0,0 1,58 @@
+#pragma once
+
+#include <core/collections/split_vector.h>
+#include <fstream>
+
+namespace jasm
+{
+
+/// This class writes output data in hex interleaved with source code.
+class HexSourceWriter
+{
+public:
+ /// References to the original collection objects will be stored so they must live
+ /// until the class is destroyed.
+ HexSourceWriter(
+ const core::SplitVector<std::vector<size_t>> &file_row_locations
+ , const std::vector<std::wstring> &file_contents
+ , const std::vector<std::string> &used_files
+ );
+
+ /// @throw FileException If a file error occurs.
+ void open_output(const std::string &filename);
+ /// @throw FileException If a file error occurs.
+ void close();
+
+ /// Write data as hex to the file and interleave with source data from start_line to end_line (not including).
+ void write_data(uint32_t address, const uint8_t *data, uint32_t size, uint32_t file_index, uint32_t start_line, uint32_t end_line);
+
+private:
+ void write_source_only();
+ void write_combined(uint32_t address, const uint8_t *data, uint32_t size);
+ void write_data_only(uint32_t address, const uint8_t *data, uint32_t size);
+ void write_data_part(uint32_t address, const uint8_t *data, uint32_t size, bool include_row_number);
+ void write_source_part();
+ void change_file(uint32_t file_index);
+
+ const core::SplitVector<std::vector<size_t>> &_file_row_locations;
+ const std::vector<std::wstring> &_file_contents;
+ const std::vector<std::string> &_used_files;
+
+ std::ofstream _output_file;
+
+ static constexpr uint8_t _line_number_width = 6 + 2;
+ static constexpr uint8_t _program_counter_width = 8 + 2;
+ static constexpr uint8_t _hex_width = 3*4;
+ static constexpr uint8_t _left_width = _line_number_width + _program_counter_width + _hex_width;
+ static constexpr uint8_t _source_jump_threshold = 10; // avoid jumping less than this number of lines
+
+ char _line_number_buffer[_line_number_width + 1];
+ char _program_counter_buffer[_program_counter_width + 1];
+ char _hex_buffer[_hex_width + 1];
+
+ // Remember last line to fill up with intermediate lines unless the distance isn't too big to next row.
+ uint32_t _current_file;
+ uint32_t _current_line; ///< The line after the one just printed.
+};
+
+}
R jasm/jasm.cbp => +0 -173
@@ 1,173 0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
-<CodeBlocks_project_file>
- <FileVersion major="1" minor="6" />
- <Project>
- <Option title="jasm" />
- <Option pch_mode="2" />
- <Option compiler="clang" />
- <Build>
- <Target title="debug-6502">
- <Option output="bin/debug/jasm-6502" prefix_auto="1" extension_auto="1" />
- <Option object_output="obj/debug-6502/" />
- <Option type="1" />
- <Option compiler="clang" />
- <Option parameters="-v2 test/test.jasm test/test.prg" />
- <Compiler>
- <Add option="-g" />
- <Add option="-D_GLIBCXX_DEBUG" />
- <Add option="-D_DEBUG" />
- <Add option="-DPROCESSOR=0" />
- </Compiler>
- <Linker>
- <Add library="../core/bin/debug/libcore.a" />
- </Linker>
- </Target>
- <Target title="debug-z80">
- <Option output="bin/debug/jasm-z80" prefix_auto="1" extension_auto="1" />
- <Option object_output="obj/debug-z80/" />
- <Option type="1" />
- <Option compiler="clang" />
- <Option parameters="-v2 test/test.jasm test/test.prg" />
- <Compiler>
- <Add option="-g" />
- <Add option="-D_GLIBCXX_DEBUG" />
- <Add option="-D_DEBUG" />
- <Add option="-DPROCESSOR=1" />
- </Compiler>
- <Linker>
- <Add library="../core/bin/debug/libcore.a" />
- </Linker>
- </Target>
- <Target title="release-6502">
- <Option output="bin/release/jasm-6502" prefix_auto="1" extension_auto="1" />
- <Option object_output="obj/release-6502/" />
- <Option type="1" />
- <Option compiler="clang" />
- <Compiler>
- <Add option="-O3" />
- <Add option="-DPROCESSOR=0" />
- </Compiler>
- <Linker>
- <Add option="-s" />
- <Add library="../core/bin/release/libcore.a" />
- </Linker>
- </Target>
- <Target title="release-z80">
- <Option output="bin/release/jasm-z80" prefix_auto="1" extension_auto="1" />
- <Option object_output="obj/release-z80/" />
- <Option type="1" />
- <Option compiler="clang" />
- <Compiler>
- <Add option="-O3" />
- <Add option="-DPROCESSOR=1" />
- </Compiler>
- <Linker>
- <Add option="-s" />
- <Add library="../core/bin/release/libcore.a" />
- </Linker>
- </Target>
- </Build>
- <Compiler>
- <Add option="-Wnon-virtual-dtor" />
- <Add option="-Wbind-to-temporary-copy" />
- <Add option="-Wambiguous-member-template" />
- <Add option="-Wextra-tokens" />
- <Add option="-Weverything" />
- <Add option="-Wall" />
- <Add option="-std=c++14" />
- <Add directory="../jasm/" />
- <Add directory="../core/" />
- </Compiler>
- <Linker>
- <Add option="-pthread" />
- </Linker>
- <Unit filename="assembling/assembler.cpp" />
- <Unit filename="assembling/assembler.h" />
- <Unit filename="assembling/assembler_impl/assembler_impl.cpp" />
- <Unit filename="assembling/assembler_impl/assembler_impl.h" />
- <Unit filename="assembling/assembler_impl/expressions_impl.cpp" />
- <Unit filename="assembling/assembler_impl/functions_impl.cpp" />
- <Unit filename="assembling/assembler_impl/methods_impl.cpp" />
- <Unit filename="assembling/assembler_impl/operators_impl.cpp" />
- <Unit filename="assembling/assembler_impl/symbols_impl.cpp" />
- <Unit filename="assembling/assembler_impl/syntax_impl.cpp" />
- <Unit filename="assembling/function_pointer.h" />
- <Unit filename="assembling/functions.cpp" />
- <Unit filename="assembling/functions.h" />
- <Unit filename="assembling/instructions.h" />
- <Unit filename="assembling/instructions_6502.cpp" />
- <Unit filename="assembling/instructions_6502.h" />
- <Unit filename="assembling/instructions_common.h" />
- <Unit filename="assembling/instructions_z80.cpp" />
- <Unit filename="assembling/instructions_z80.h" />
- <Unit filename="assembling/method_pointer.h" />
- <Unit filename="assembling/methods.cpp" />
- <Unit filename="assembling/methods.h" />
- <Unit filename="assembling/scope_counter.h" />
- <Unit filename="assembling/symbol_environment.cpp" />
- <Unit filename="assembling/symbol_environment.h" />
- <Unit filename="assembling/type_description.h" />
- <Unit filename="assembling/value.cpp" />
- <Unit filename="assembling/value.h" />
- <Unit filename="environment/command_line_args.cpp" />
- <Unit filename="environment/command_line_args.h" />
- <Unit filename="exceptions/assembly_exception.h" />
- <Unit filename="exceptions/error_codes.h" />
- <Unit filename="io/data_reader.cpp" />
- <Unit filename="io/data_reader.h" />
- <Unit filename="main.cpp" />
- <Unit filename="parsing/hasharray_repository.cpp" />
- <Unit filename="parsing/hasharray_repository.h" />
- <Unit filename="parsing/keyword_finder.cpp" />
- <Unit filename="parsing/keyword_finder.h" />
- <Unit filename="parsing/keywords.cpp" />
- <Unit filename="parsing/keywords.h" />
- <Unit filename="parsing/operators.cpp" />
- <Unit filename="parsing/operators.h" />
- <Unit filename="parsing/processor_keywords_6502.cpp" />
- <Unit filename="parsing/processor_keywords_6502.h" />
- <Unit filename="parsing/processor_keywords_z80.cpp" />
- <Unit filename="parsing/processor_keywords_z80.h" />
- <Unit filename="parsing/section.h" />
- <Unit filename="parsing/source_location.h" />
- <Unit filename="parsing/storage_type.h" />
- <Unit filename="parsing/syntax_parser.cpp" />
- <Unit filename="parsing/syntax_parser.h" />
- <Unit filename="parsing/syntax_tokens.cpp" />
- <Unit filename="parsing/syntax_tokens.h" />
- <Unit filename="parsing/token_chain.cpp" />
- <Unit filename="parsing/token_chain.h" />
- <Unit filename="parsing/token_print.cpp" />
- <Unit filename="parsing/token_print.h" />
- <Unit filename="parsing/tokenizer.cpp" />
- <Unit filename="parsing/tokenizer.h" />
- <Unit filename="parsing/types.cpp" />
- <Unit filename="parsing/types.h" />
- <Unit filename="pch.cpp" />
- <Unit filename="pch.h" />
- <Unit filename="revision.h" />
- <Unit filename="revision_hash.h" />
- <Unit filename="strings/string_conversions.cpp" />
- <Unit filename="strings/string_conversions.h" />
- <Unit filename="strings/string_hasher.h" />
- <Unit filename="strings/string_locale.cpp" />
- <Unit filename="strings/string_locale.h" />
- <Unit filename="strings/string_repository.cpp" />
- <Unit filename="strings/string_repository.h" />
- <Unit filename="version.h" />
- <Extensions>
- <code_completion />
- <debugger />
- <DoxyBlocks>
- <comment_style block="2" line="2" />
- <doxyfile_project />
- <doxyfile_build />
- <doxyfile_warnings />
- <doxyfile_output />
- <doxyfile_dot />
- <general />
- </DoxyBlocks>
- <envvars />
- </Extensions>
- </Project>
-</CodeBlocks_project_file>
R jasm/jasm.vcxproj => +0 -328
@@ 1,328 0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup Label="ProjectConfigurations">
- <ProjectConfiguration Include="debug-hasher|x64">
- <Configuration>debug-hasher</Configuration>
- <Platform>x64</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="debug-jasm-6502|x64">
- <Configuration>debug-jasm-6502</Configuration>
- <Platform>x64</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="debug-jasm-z80|x64">
- <Configuration>debug-jasm-z80</Configuration>
- <Platform>x64</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="release-hasher|x64">
- <Configuration>release-hasher</Configuration>
- <Platform>x64</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="release-jasm-6502|x64">
- <Configuration>release-jasm-6502</Configuration>
- <Platform>x64</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="release-jasm-z80|x64">
- <Configuration>release-jasm-z80</Configuration>
- <Platform>x64</Platform>
- </ProjectConfiguration>
- </ItemGroup>
- <PropertyGroup Label="Globals">
- <ProjectGuid>{D326DF3F-FBFA-4467-8918-B8B3D89BE0F3}</ProjectGuid>
- <Keyword>Win32Proj</Keyword>
- <RootNamespace>jasm</RootNamespace>
- <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='debug-jasm-z80|x64'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>true</UseDebugLibraries>
- <PlatformToolset>v140</PlatformToolset>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='debug-jasm-6502|x64'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>true</UseDebugLibraries>
- <PlatformToolset>v140</PlatformToolset>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='debug-hasher|x64'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>true</UseDebugLibraries>
- <PlatformToolset>v140</PlatformToolset>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='release-jasm-z80|x64'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>false</UseDebugLibraries>
- <PlatformToolset>v140</PlatformToolset>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='release-jasm-6502|x64'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>false</UseDebugLibraries>
- <PlatformToolset>v140</PlatformToolset>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='release-hasher|x64'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>false</UseDebugLibraries>
- <PlatformToolset>v140</PlatformToolset>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- <CharacterSet>Unicode</CharacterSet>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
- <ImportGroup Label="ExtensionSettings">
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='debug-jasm-z80|x64'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='debug-jasm-6502|x64'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='debug-hasher|x64'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='release-jasm-z80|x64'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='release-jasm-6502|x64'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='release-hasher|x64'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <PropertyGroup Label="UserMacros" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='debug-jasm-z80|x64'">
- <LinkIncremental>true</LinkIncremental>
- <TargetName>$(ProjectName)-z80</TargetName>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='debug-jasm-6502|x64'">
- <LinkIncremental>true</LinkIncremental>
- <TargetName>$(ProjectName)-6502</TargetName>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='debug-hasher|x64'">
- <LinkIncremental>true</LinkIncremental>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='release-jasm-z80|x64'">
- <LinkIncremental>false</LinkIncremental>
- <TargetName>$(ProjectName)-z80</TargetName>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='release-jasm-6502|x64'">
- <LinkIncremental>false</LinkIncremental>
- <TargetName>$(ProjectName)-6502</TargetName>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='release-hasher|x64'">
- <LinkIncremental>false</LinkIncremental>
- </PropertyGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='debug-jasm-z80|x64'">
- <ClCompile>
- <PrecompiledHeader>Use</PrecompiledHeader>
- <WarningLevel>Level4</WarningLevel>
- <Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>PROCESSOR=1;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <SDLCheck>true</SDLCheck>
- <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
- <AdditionalIncludeDirectories>$(ProjectDir);$(SolutionDir)core;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <TreatWarningAsError>true</TreatWarningAsError>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
- <AdditionalDependencies>core.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='debug-jasm-6502|x64'">
- <ClCompile>
- <PrecompiledHeader>Use</PrecompiledHeader>
- <WarningLevel>Level4</WarningLevel>
- <Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>PROCESSOR=0;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <SDLCheck>true</SDLCheck>
- <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
- <AdditionalIncludeDirectories>$(ProjectDir);$(SolutionDir)core;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <TreatWarningAsError>true</TreatWarningAsError>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
- <AdditionalDependencies>core.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='debug-hasher|x64'">
- <ClCompile>
- <PrecompiledHeader>Use</PrecompiledHeader>
- <WarningLevel>Level4</WarningLevel>
- <Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <SDLCheck>true</SDLCheck>
- <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
- <AdditionalIncludeDirectories>$(ProjectDir);$(SolutionDir)core;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <TreatWarningAsError>true</TreatWarningAsError>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
- <AdditionalDependencies>core.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='release-jasm-z80|x64'">
- <ClCompile>
- <WarningLevel>Level4</WarningLevel>
- <PrecompiledHeader>Use</PrecompiledHeader>
- <Optimization>MaxSpeed</Optimization>
- <FunctionLevelLinking>true</FunctionLevelLinking>
- <IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>PROCESSOR=1;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <SDLCheck>true</SDLCheck>
- <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
- <AdditionalIncludeDirectories>$(ProjectDir);$(SolutionDir)core;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <TreatWarningAsError>true</TreatWarningAsError>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <OptimizeReferences>true</OptimizeReferences>
- <AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
- <AdditionalDependencies>core.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='release-jasm-6502|x64'">
- <ClCompile>
- <WarningLevel>Level4</WarningLevel>
- <PrecompiledHeader>Use</PrecompiledHeader>
- <Optimization>MaxSpeed</Optimization>
- <FunctionLevelLinking>true</FunctionLevelLinking>
- <IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>PROCESSOR=0;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <SDLCheck>true</SDLCheck>
- <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
- <AdditionalIncludeDirectories>$(ProjectDir);$(SolutionDir)core;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <TreatWarningAsError>true</TreatWarningAsError>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <OptimizeReferences>true</OptimizeReferences>
- <AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
- <AdditionalDependencies>core.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
- <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='release-hasher|x64'">
- <ClCompile>
- <WarningLevel>Level4</WarningLevel>
- <PrecompiledHeader>Use</PrecompiledHeader>
- <Optimization>MaxSpeed</Optimization>
- <FunctionLevelLinking>true</FunctionLevelLinking>
- <IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <SDLCheck>true</SDLCheck>
- <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
- <AdditionalIncludeDirectories>$(ProjectDir);$(SolutionDir)core;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <TreatWarningAsError>true</TreatWarningAsError>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <OptimizeReferences>true</OptimizeReferences>
- <AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
- <AdditionalDependencies>core.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemGroup>
- <ClCompile Include="assembling\assembler.cpp" />
- <ClCompile Include="assembling\assembler_impl\assembler_impl.cpp" />
- <ClCompile Include="assembling\assembler_impl\expressions_impl.cpp" />
- <ClCompile Include="assembling\assembler_impl\functions_impl.cpp" />
- <ClCompile Include="assembling\assembler_impl\methods_impl.cpp" />
- <ClCompile Include="assembling\assembler_impl\operators_impl.cpp" />
- <ClCompile Include="assembling\assembler_impl\symbols_impl.cpp" />
- <ClCompile Include="assembling\assembler_impl\syntax_impl.cpp" />
- <ClCompile Include="assembling\functions.cpp" />
- <ClCompile Include="assembling\instructions_6502.cpp" />
- <ClCompile Include="assembling\instructions_z80.cpp" />
- <ClCompile Include="assembling\methods.cpp" />
- <ClCompile Include="assembling\symbol_environment.cpp" />
- <ClCompile Include="assembling\value.cpp" />
- <ClCompile Include="environment\command_line_args.cpp" />
- <ClCompile Include="io\data_reader.cpp" />
- <ClCompile Include="main.cpp" />
- <ClCompile Include="parsing\hasharray_repository.cpp" />
- <ClCompile Include="parsing\keyword_finder.cpp" />
- <ClCompile Include="parsing\keywords.cpp" />
- <ClCompile Include="parsing\operators.cpp" />
- <ClCompile Include="parsing\processor_keywords_6502.cpp" />
- <ClCompile Include="parsing\processor_keywords_z80.cpp" />
- <ClCompile Include="parsing\syntax_parser.cpp" />
- <ClCompile Include="parsing\syntax_tokens.cpp" />
- <ClCompile Include="parsing\token_chain.cpp" />
- <ClCompile Include="parsing\token_print.cpp" />
- <ClCompile Include="parsing\tokenizer.cpp" />
- <ClCompile Include="parsing\types.cpp" />
- <ClCompile Include="pch.cpp">
- <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='debug-jasm-z80|x64'">Create</PrecompiledHeader>
- <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='debug-jasm-6502|x64'">Create</PrecompiledHeader>
- <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='debug-hasher|x64'">Create</PrecompiledHeader>
- <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='release-jasm-z80|x64'">Create</PrecompiledHeader>
- <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='release-jasm-6502|x64'">Create</PrecompiledHeader>
- <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='release-hasher|x64'">Create</PrecompiledHeader>
- </ClCompile>
- <ClCompile Include="strings\string_conversions.cpp" />
- <ClCompile Include="strings\string_locale.cpp" />
- <ClCompile Include="strings\string_repository.cpp" />
- </ItemGroup>
- <ItemGroup>
- <ClInclude Include="assembling\assembler.h" />
- <ClInclude Include="assembling\assembler_impl\assembler_impl.h" />
- <ClInclude Include="assembling\function_pointer.h" />
- <ClInclude Include="assembling\functions.h" />
- <ClInclude Include="assembling\instructions.h" />
- <ClInclude Include="assembling\instructions_6502.h" />
- <ClInclude Include="assembling\instructions_common.h" />
- <ClInclude Include="assembling\instructions_z80.h" />
- <ClInclude Include="assembling\method_pointer.h" />
- <ClInclude Include="assembling\methods.h" />
- <ClInclude Include="assembling\scope_counter.h" />
- <ClInclude Include="assembling\symbol_environment.h" />
- <ClInclude Include="assembling\type_description.h" />
- <ClInclude Include="assembling\value.h" />
- <ClInclude Include="environment\command_line_args.h" />
- <ClInclude Include="exceptions\assembly_exception.h" />
- <ClInclude Include="exceptions\error_codes.h" />
- <ClInclude Include="io\data_reader.h" />
- <ClInclude Include="parsing\hasharray_repository.h" />
- <ClInclude Include="parsing\keyword_finder.h" />
- <ClInclude Include="parsing\keywords.h" />
- <ClInclude Include="parsing\operators.h" />
- <ClInclude Include="parsing\processor_keywords_6502.h" />
- <ClInclude Include="parsing\processor_keywords_z80.h" />
- <ClInclude Include="parsing\section.h" />
- <ClInclude Include="parsing\source_location.h" />
- <ClInclude Include="parsing\storage_type.h" />
- <ClInclude Include="parsing\syntax_parser.h" />
- <ClInclude Include="parsing\syntax_tokens.h" />
- <ClInclude Include="parsing\token_chain.h" />
- <ClInclude Include="parsing\token_print.h" />
- <ClInclude Include="parsing\tokenizer.h" />
- <ClInclude Include="parsing\types.h" />
- <ClInclude Include="pch.h" />
- <ClInclude Include="strings\string_conversions.h" />
- <ClInclude Include="strings\string_hasher.h" />
- <ClInclude Include="strings\string_locale.h" />
- <ClInclude Include="strings\string_repository.h" />
- </ItemGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
- <ImportGroup Label="ExtensionTargets">
- </ImportGroup>
-</Project>
No newline at end of file
R jasm/jasm.vcxproj.filters => +0 -245
@@ 1,245 0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup>
- <Filter Include="Resource Files">
- <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
- <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
- </Filter>
- <Filter Include="environment">
- <UniqueIdentifier>{270e7fa6-8ddb-4ff5-b222-979c5abac0d9}</UniqueIdentifier>
- </Filter>
- <Filter Include="parsing">
- <UniqueIdentifier>{ca988a3b-0475-4ae5-b5fd-fa4ddc5503b2}</UniqueIdentifier>
- </Filter>
- <Filter Include="io">
- <UniqueIdentifier>{381b2789-884b-4614-ab89-3f5392386939}</UniqueIdentifier>
- </Filter>
- <Filter Include="exceptions">
- <UniqueIdentifier>{315c5cf7-7a84-4867-b10c-8fef08cddba3}</UniqueIdentifier>
- </Filter>
- <Filter Include="strings">
- <UniqueIdentifier>{3cbd6ba9-628c-40f4-ad9e-dac18c4a0c35}</UniqueIdentifier>
- </Filter>
- <Filter Include="collections">
- <UniqueIdentifier>{458e776a-2c4e-4276-acc3-18caea7ad055}</UniqueIdentifier>
- </Filter>
- <Filter Include="assembling">
- <UniqueIdentifier>{246a6177-a06b-445b-91d0-9d7eb37ff4b2}</UniqueIdentifier>
- </Filter>
- <Filter Include="assembling\assembler_impl">
- <UniqueIdentifier>{288660f3-5cff-46bc-8765-4a5e02b59f77}</UniqueIdentifier>
- </Filter>
- <Filter Include="debug">
- <UniqueIdentifier>{837cdb39-47a2-4fcb-8e24-874b62a957ea}</UniqueIdentifier>
- </Filter>
- </ItemGroup>
- <ItemGroup>
- <ClCompile Include="main.cpp" />
- <ClCompile Include="environment\command_line_args.cpp">
- <Filter>environment</Filter>
- </ClCompile>
- <ClCompile Include="pch.cpp" />
- <ClCompile Include="parsing\tokenizer.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- <ClCompile Include="parsing\token_chain.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- <ClCompile Include="parsing\keyword_finder.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- <ClCompile Include="strings\string_conversions.cpp">
- <Filter>strings</Filter>
- </ClCompile>
- <ClCompile Include="strings\string_locale.cpp">
- <Filter>strings</Filter>
- </ClCompile>
- <ClCompile Include="strings\string_repository.cpp">
- <Filter>strings</Filter>
- </ClCompile>
- <ClCompile Include="parsing\syntax_parser.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- <ClCompile Include="parsing\token_print.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- <ClCompile Include="parsing\hasharray_repository.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- <ClCompile Include="assembling\assembler.cpp">
- <Filter>assembling</Filter>
- </ClCompile>
- <ClCompile Include="assembling\instructions_6502.cpp">
- <Filter>assembling</Filter>
- </ClCompile>
- <ClCompile Include="assembling\instructions_z80.cpp">
- <Filter>assembling</Filter>
- </ClCompile>
- <ClCompile Include="assembling\value.cpp">
- <Filter>assembling</Filter>
- </ClCompile>
- <ClCompile Include="io\data_reader.cpp">
- <Filter>io</Filter>
- </ClCompile>
- <ClCompile Include="parsing\keywords.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- <ClCompile Include="parsing\operators.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- <ClCompile Include="parsing\types.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- <ClCompile Include="assembling\assembler_impl\assembler_impl.cpp">
- <Filter>assembling\assembler_impl</Filter>
- </ClCompile>
- <ClCompile Include="assembling\assembler_impl\expressions_impl.cpp">
- <Filter>assembling\assembler_impl</Filter>
- </ClCompile>
- <ClCompile Include="assembling\assembler_impl\functions_impl.cpp">
- <Filter>assembling\assembler_impl</Filter>
- </ClCompile>
- <ClCompile Include="assembling\assembler_impl\operators_impl.cpp">
- <Filter>assembling\assembler_impl</Filter>
- </ClCompile>
- <ClCompile Include="assembling\assembler_impl\symbols_impl.cpp">
- <Filter>assembling\assembler_impl</Filter>
- </ClCompile>
- <ClCompile Include="assembling\assembler_impl\syntax_impl.cpp">
- <Filter>assembling\assembler_impl</Filter>
- </ClCompile>
- <ClCompile Include="assembling\functions.cpp">
- <Filter>assembling</Filter>
- </ClCompile>
- <ClCompile Include="assembling\methods.cpp">
- <Filter>assembling</Filter>
- </ClCompile>
- <ClCompile Include="assembling\assembler_impl\methods_impl.cpp">
- <Filter>assembling\assembler_impl</Filter>
- </ClCompile>
- <ClCompile Include="parsing\syntax_tokens.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- <ClCompile Include="assembling\symbol_environment.cpp">
- <Filter>assembling</Filter>
- </ClCompile>
- <ClCompile Include="parsing\processor_keywords_z80.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- <ClCompile Include="parsing\processor_keywords_6502.cpp">
- <Filter>parsing</Filter>
- </ClCompile>
- </ItemGroup>
- <ItemGroup>
- <ClInclude Include="environment\command_line_args.h">
- <Filter>environment</Filter>
- </ClInclude>
- <ClInclude Include="pch.h" />
- <ClInclude Include="parsing\tokenizer.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="parsing\token_chain.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="parsing\keyword_finder.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="exceptions\assembly_exception.h">
- <Filter>exceptions</Filter>
- </ClInclude>
- <ClInclude Include="exceptions\error_codes.h">
- <Filter>exceptions</Filter>
- </ClInclude>
- <ClInclude Include="strings\string_conversions.h">
- <Filter>strings</Filter>
- </ClInclude>
- <ClInclude Include="strings\string_locale.h">
- <Filter>strings</Filter>
- </ClInclude>
- <ClInclude Include="strings\string_hasher.h">
- <Filter>strings</Filter>
- </ClInclude>
- <ClInclude Include="strings\string_repository.h">
- <Filter>strings</Filter>
- </ClInclude>
- <ClInclude Include="parsing\syntax_parser.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="parsing\token_print.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="parsing\hasharray_repository.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="assembling\assembler.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="assembling\instructions.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="assembling\instructions_6502.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="assembling\instructions_common.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="assembling\instructions_z80.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="assembling\type_description.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="assembling\value.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="io\data_reader.h">
- <Filter>io</Filter>
- </ClInclude>
- <ClInclude Include="parsing\keywords.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="parsing\operators.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="parsing\section.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="parsing\source_location.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="parsing\types.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="assembling\assembler_impl\assembler_impl.h">
- <Filter>assembling\assembler_impl</Filter>
- </ClInclude>
- <ClInclude Include="assembling\functions.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="parsing\storage_type.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="assembling\methods.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="parsing\syntax_tokens.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="assembling\symbol_environment.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="parsing\processor_keywords_z80.h">
- <Filter>parsing</Filter>
- </ClInclude>
- <ClInclude Include="assembling\function_pointer.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="assembling\method_pointer.h">
- <Filter>assembling</Filter>
- </ClInclude>
- <ClInclude Include="assembling\scope_counter.h" />
- <ClInclude Include="parsing\processor_keywords_6502.h">
- <Filter>parsing</Filter>
- </ClInclude>
- </ItemGroup>
-</Project>
No newline at end of file
M jasm/main.cpp +24 -24
@@ 1,6 1,7 @@
#include "pch.h"
-#include <assembling/assembler.h>
+#include <assemble/assembler.h>
+#include <clocale>
#include <core/debug/timer.h>
#include <core/exceptions/file_exception.h>
#include <core/io/file_writer.h>
@@ 10,12 11,9 @@
#include <exceptions/assembly_exception.h>
#include <exceptions/error_codes.h>
#include <io/data_reader.h>
-#include <parsing/hasharray_repository.h>
-#include <parsing/syntax_parser.h>
-#include <parsing/tokenizer.h>
+#include <sstream>
#include <strings/string_repository.h>
-#include <clocale>
-#include <sstream>
+#include <utility/hasharray_repository.h>
using namespace jasm;
using namespace core;
@@ 114,15 112,26 @@ void write_sections(CommandLineArgs &arg
void assemble(CommandLineArgs &args)
{
StringRepository strings; // repository to map hashes back to strings
- HashArrayRepository hash_arrays(128); // repository to replace hash array references with a fixed size handle
std::vector<std::string> used_files; // the files used in the source code, used to connect tokens to files
- DataReader data_reader(args.include_dirs); // this loads binary files in the background
-
- auto tokens = tokenize(args.pseudo_instructions, args.input_file, strings, args.include_dirs, used_files, data_reader);
- auto syntax = parse_syntax(tokens, strings, hash_arrays, used_files, data_reader);
- auto sections = assemble(args.multiple_output_files, args.multi_bank_mode, args.pseudo_instructions, syntax, strings, hash_arrays, used_files,
- args.predefined_booleans, args.predefined_integers, args.predefined_strings,
- data_reader, args.max_errors, args.symbol_dump_file, args.vice_dump_file, args.gba_sym_dump_file);
+
+ auto sections = assemble(
+ args.multiple_output_files,
+ args.multi_bank_mode,
+ args.pseudo_instructions,
+ args.default_processor,
+ args.input_file,
+ args.include_dirs,
+ strings,
+ used_files,
+ args.predefined_booleans,
+ args.predefined_integers,
+ args.predefined_strings,
+ args.max_errors,
+ args.symbol_dump_file,
+ args.vice_dump_file,
+ args.gba_sym_dump_file,
+ args.output_hex_file
+ );
if (sections.empty()) {
warning() << "No sections with data, so no file output.\n";
@@ 146,19 155,10 @@ bool is_little_endian()
return u.c[0] == 1;
}
-const char *supported_processor()
-{
- const std::array<const char *, NUM_PROCESSORS> processors {{
- "6502",
- "z80",
- }};
- return processors[PROCESSOR];
-}
-
std::string version_string()
{
std::stringstream ss;
- ss << "jAsm " << supported_processor() << " assembler v" << version[0] << "." << version[1] << "." << revision << " (" << revision_hash << ")";
+ ss << "jAsm assembler v" << version[0] << "." << version[1] << "." << revision << " (" << revision_hash << ")";
return ss.str();
}
R jasm/parsing/token_print.cpp => +0 -59
@@ 1,59 0,0 @@
-#include "pch.h"
-
-#include <parsing/token_print.h>
-#include <parsing/tokenizer.h>
-
-namespace jasm
-{
-
-std::ostream& operator<<(std::ostream& out, const Token &t)
-{
- if (is_instruction(t)) {
- out << "instruction";
- } else {
- out << to_string(t.type);
- }
-
- switch (t.type) {
- case TokenType::Whitespace:
- case TokenType::Char:
- case TokenType::Integer:
- case TokenType::Float:
- case TokenType::String:
- case TokenType::End:
- case TokenType::Newline:
- case TokenType::NumTypes:
- break;
-
- case TokenType::Symbol:
- {
- if (t.instruction_index != InstructionType::NumTypes) {
- out << " " << to_string(t.instruction_index);
- }
- break;
- }
-
- case TokenType::Boolean:
- out << " " << to_string(t.boolean_index);
- break;
-
- case TokenType::Operator:
- out << " " << to_string(t.operator_index);
- break;
-
- case TokenType::Keyword:
- out << " " << to_string(t.keyword_index);
- break;
-
- case TokenType::ProcessorKeyword:
- out << " " << to_string(t.processor_keyword_index);
- break;
-
- case TokenType::Typename:
- out << " " << to_string(t.typename_index);
- break;
- }
- return out;
-}
-
-} // namespace jasm
R jasm/parsing/token_print.h => +0 -17
@@ 1,17 0,0 @@
-#pragma once
-
-#include <ostream>
-
-namespace jasm
-{
-
-struct Token;
-
-/// @addtogroup tokenize
-/// @{
-
-std::ostream& operator<<(std::ostream& out, const Token &t);
-
-/// @}
-
-} // namespace jasm
M jasm/pch.h +0 -7
@@ 2,13 2,6 @@
#define MARK_USE(x) ((void)x)
-/// The supported processors. Check against the PROCESSOR define to see which one is currently compiling.
-#define M6502 0
-#define Z80 1
-#define NUM_PROCESSORS 2
-
-#define SUPPORTS(x) (PROCESSOR == (x))
-
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wc++98-compat"
#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
M jasm/assembling/instructions_6502.cpp => jasm/processor/6502/instructions_6502.cpp +7 -8
@@ 1,11 1,11 @@
#include "pch.h"
-#if SUPPORTS(M6502)
-
-#include <assembling/instructions_6502.h>
+#include <processor/6502/instructions_6502.h>
namespace jasm
{
+ namespace mos6502
+ {
using namespace AddressingModeMask;
@@ 247,7 247,7 @@ bool is_ending_instruction(InstructionTy
|| type == InstructionType::Rts;
}
-const std::string_view to_string(AddressingModeType type)
+std::string_view to_string(AddressingModeType type)
{
const static std::string_view names[] = {
std::string_view("zero page address"),
@@ 269,7 269,7 @@ const std::string_view to_string(Address
return names[static_cast<size_t>(type)];
}
-const std::string_view to_string(InstructionType type)
+std::string_view to_string(InstructionType type)
{
static const std::string_view names[] = {
std::string_view("adc"),
@@ 338,6 338,5 @@ const std::string_view to_string(Instruc
return names[static_cast<size_t>(type)];
}
-}
-
-#endif
+ } // namespace mos6502
+} // namespace jasm
M jasm/assembling/instructions_6502.h => jasm/processor/6502/instructions_6502.h +8 -9
@@ 1,13 1,13 @@
#pragma once
-#if SUPPORTS(M6502)
-
-#include <assembling/instructions_common.h>
+#include <processor/instructions_common.h>
namespace jasm
{
-
-/// @addtogroup assembling
+ namespace mos6502
+ {
+
+/// @addtogroup assemble
/// @{
enum class InstructionType : uint8_t
@@ 157,12 157,11 @@ InstructionType inverse_branch(Instructi
bool is_ending_instruction(InstructionType type);
/// Convert an addressing mode to a string for printing.
-const std::string_view to_string(AddressingModeType type);
+std::string_view to_string(AddressingModeType type);
-const std::string_view to_string(InstructionType type);
+std::string_view to_string(InstructionType type);
/// @}
+ } // namespace mos6502
} // namespace jasm
-
-#endif
A => jasm/processor/6502/processor_6502.cpp +708 -0
@@ 0,0 1,708 @@
+#include "pch.h"
+
+#include <assemble/assembler_impl/assembler_impl.h>
+#include <assemble/scope_counter.h>
+#include <exceptions/assembly_exception.h>
+#include <io/hex_source_writer.h>
+#include <processor/6502/processor_6502.h>
+#include <processor/6502/processor_keywords_6502.h>
+#include <sstream>
+#include <syntax/syntax_parser.h>
+#include <syntax/syntax_tokens.h>
+#include <tokenize/tokens.h>
+
+namespace jasm
+{
+ namespace mos6502
+ {
+
+namespace
+{
+ enum
+ {
+ Imp = 1 << static_cast<int>(AddressingModeType::Implied),
+ Imm = 1 << static_cast<int>(AddressingModeType::Immediate),
+ Zp = 1 << static_cast<int>(AddressingModeType::ZeroPageAddr),
+ Zpx = 1 << static_cast<int>(AddressingModeType::ZeroPageIndexX),
+ Zpy = 1 << static_cast<int>(AddressingModeType::ZeroPageIndexY),
+ Abs = 1 << static_cast<int>(AddressingModeType::AbsoluteAddr),
+ AbsX = 1 << static_cast<int>(AddressingModeType::AbsoluteIndexX),
+ AbsY = 1 << static_cast<int>(AddressingModeType::AbsoluteIndexY),
+ Rel = 1 << static_cast<int>(AddressingModeType::RelativeAddr),
+ Ind = 1 << static_cast<int>(AddressingModeType::IndirectAddr),
+ IndX = 1 << static_cast<int>(AddressingModeType::IndirectIndexX),
+ IndY = 1 << static_cast<int>(AddressingModeType::IndirectIndexY),
+ };
+}
+
+struct InstructionToken : public SyntaxToken
+{
+ InstructionType instruction;
+ uint8_t padding1;
+ uint16_t addressing_modes; ///< Mask with InstructionType bits set for each possible addressing mode.
+ SourceLocation source_location; ///< Source location to instruction.
+ // 8 byte aligned
+ bool has_instruction_data_label; ///< True if there is a label defined that points to the instruction data.
+ bool global_data_label; ///< True if the label is global.
+ uint8_t padding2[2];
+ SourceLocation address_label_location; ///< Source location to address label, if existing.
+ // 8 byte aligned
+ uint64_t data_label_symbol_hash; ///< Symbol to define as the data label.
+ // 8 byte aligned
+};
+
+void Processor6502::register_processor_keywords(std::vector<std::string> &keywords)
+{
+ add_type_tokens<ProcessorKeywordType>(keywords, TokenType::ProcessorKeyword);
+}
+
+void Processor6502::register_processor_instructions(bool pseudo_instructions)
+{
+ // generate instruction lookup
+ uint8_t num_instructions = static_cast<uint8_t>(pseudo_instructions ? InstructionType::NumTypes : InstructionType::NumStandard);
+ for (uint8_t i = 0; i < num_instructions; ++i) {
+ const std::string_view name = to_string(static_cast<InstructionType>(i));
+ _instructions.insert(core::murmur_hash3_string_x64_64(name)) = i;
+ }
+}
+
+bool Processor6502::allow_processor_keyword_with_prim(uint64_t &/*keyword_hash*/) const
+{
+ // no prim translation
+ return false;
+}
+
+std::string Processor6502::token_to_string(const Token &t) const
+{
+ std::stringstream ss;
+ if (jasm::is_instruction(t)) {
+ ss << "instruction";
+ } else {
+ ss << to_string(t.type);
+ }
+
+ switch (t.type) {
+ case TokenType::Whitespace:
+ case TokenType::Char:
+ case TokenType::Integer:
+ case TokenType::Float:
+ case TokenType::String:
+ case TokenType::End:
+ case TokenType::Newline:
+ case TokenType::Processor:
+ case TokenType::NumTypes:
+ break;
+
+ case TokenType::Symbol:
+ {
+ if (t.instruction_index != invalid_instruction) {
+ ss << " " << to_string(static_cast<InstructionType>(t.instruction_index));
+ }
+ break;
+ }
+
+ case TokenType::Boolean:
+ ss << " " << to_string(t.boolean_index);
+ break;
+
+ case TokenType::Operator:
+ ss << " " << to_string(t.operator_index);
+ break;
+
+ case TokenType::Keyword:
+ ss << " " << to_string(t.keyword_index);
+ break;
+
+ case TokenType::ProcessorKeyword:
+ ss << " " << to_string(static_cast<ProcessorKeywordType>(t.processor_keyword_index));
+ break;
+
+ case TokenType::Typename:
+ ss << " " << to_string(t.typename_index);
+ break;
+ }
+ return ss.str();
+}
+
+uint16_t Processor6502::try_parse_addressing_mode(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *t) const
+{
+ // save read state to restore later since we are reading ahead.
+ TokenChainScope rewind_scope(parser.create_rewind_scope());
+
+ if (parser.is_operator(t, OperatorType::Hash)) {
+ // Immediate
+ return 1 << static_cast<int>(AddressingModeType::Immediate);
+
+ }
+ if (parser.is_operator(t, OperatorType::LeftParenthesis)) {
+ // (<something>
+ // possibly indirect addressing mode but could also be an expression beginning with parenthesis
+ // ZeroPageAddr
+ // ZeroPageIndexX
+ // ZeroPageIndexY
+ // AbsoluteAddr
+ // AbsoluteIndexX
+ // AbsoluteIndexY
+ // RelativeAddr
+ // IndirectAddr
+ // IndirectIndexX
+ // IndirectIndexY
+
+ // Try to parse an expression, ignoring the first parenthesis. That makes it possible to see
+ // if there is a comma within the parenthesis. This is not optimal for performance because
+ // it does more than it needs to for this.
+ const Token *next;
+ {
+ constexpr bool end_at_unmatched_right_parenthesis = true;
+ constexpr bool end_at_newline = true;
+ next = parser.parse_expression(parser.consume_next_token(), end_at_unmatched_right_parenthesis, end_at_newline);
+ }
+ if (parser.is_operator(next, OperatorType::Comma)) {
+ // (<expression>,
+ // IndirectIndexX
+
+ // verify that "x" follows
+ next = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ if (next->type != TokenType::ProcessorKeyword || static_cast<ProcessorKeywordType>(next->processor_keyword_index) != ProcessorKeywordType::X) {
+ std::stringstream ss;
+ ss << "Expected x for indirect addressing mode, but got " << token_to_string(*next);
+ throw AssemblyException(source_files, next->source_location, AssemblyErrorCodes::InvalidIndexRegisterInAddressingMode, ss.str());
+ }
+ // (<expression>,x
+
+ // verify that right parenthesis follows
+ next = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ if (next->type != TokenType::Operator || next->operator_index != OperatorType::RightParenthesis) {
+ std::stringstream ss;
+ ss << "Expected closing parenthesis in indirect addressing mode, but got " << token_to_string(*next);
+ throw AssemblyException(source_files, next->source_location, AssemblyErrorCodes::ExpectedEndingParenthesisInIndirectAddressingMode, ss.str());
+ }
+ // (<expression>,x)
+
+ return 1 << static_cast<int>(AddressingModeType::IndirectIndexX);
+
+ }
+
+ if (parser.is_operator(next, OperatorType::RightParenthesis)) {
+ // (<expression>)
+ // ZeroPageAddr
+ // ZeroPageIndexX
+ // ZeroPageIndexY
+ // AbsoluteAddr
+ // AbsoluteIndexX
+ // AbsoluteIndexY
+ // RelativeAddr
+ // IndirectAddr
+ // IndirectIndexY
+ next = parser.skip_spaces_and_tabs(parser.consume_next_token());
+
+ if (next->type == TokenType::Operator && next->operator_index < OperatorType::NumOperatorFunctions) {
+ // (<expression>)<operator>
+
+ // go back and parse the whole thing to get to the end of the expression and check if it ends
+ // with ,x or ,y. This is not optimal from a performance perspective. I could write specialized
+ // code to skip an expression.
+ rewind_scope.rewind();
+ {
+ constexpr bool end_at_unmatched_right_parenthesis = false;
+ constexpr bool end_at_newline = true;
+ next = parser.parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
+ }
+
+ if (parser.is_operator(next, OperatorType::Comma)) {
+ // <expression>,
+ // ZeroPageIndexX
+ // AbsoluteIndexX
+ // ZeroPageIndexY
+ // AbsoluteIndexY
+ next = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ if (next->type == TokenType::ProcessorKeyword && static_cast<ProcessorKeywordType>(next->processor_keyword_index) == ProcessorKeywordType::X) {
+ // <expression>,x
+ // ZeroPageIndexX
+ // AbsoluteIndexX
+ return 1 << static_cast<int>(AddressingModeType::ZeroPageIndexX) |
+ 1 << static_cast<int>(AddressingModeType::AbsoluteIndexX);
+ }
+ if (next->type == TokenType::ProcessorKeyword && static_cast<ProcessorKeywordType>(next->processor_keyword_index) == ProcessorKeywordType::Y) {
+ // <expression>,y
+ // ZeroPageIndexY
+ // AbsoluteIndexY
+ return 1 << static_cast<int>(AddressingModeType::ZeroPageIndexY) |
+ 1 << static_cast<int>(AddressingModeType::AbsoluteIndexY);
+ }
+ // the index register is invalid
+ std::stringstream ss;
+ ss << "Invalid index register in addressing mode. Expected x or y but got " << token_to_string(*next);
+ throw AssemblyException(source_files, next->source_location, AssemblyErrorCodes::InvalidIndexRegisterInAddressingMode, ss.str());
+ }
+ // (<expression>)<operator><expression>
+ // ZeroPageAddr
+ // AbsoluteAddr
+ // RelativeAddr
+ return 1 << static_cast<int>(AddressingModeType::ZeroPageAddr) |
+ 1 << static_cast<int>(AddressingModeType::AbsoluteAddr) |
+ 1 << static_cast<int>(AddressingModeType::RelativeAddr);
+ }
+ if (parser.is_operator(next, OperatorType::Comma)) {
+ // (<expression>),
+ next = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ if (next->type == TokenType::ProcessorKeyword && static_cast<ProcessorKeywordType>(next->processor_keyword_index) == ProcessorKeywordType::Y) {
+ // (<expression>),y
+ // IndirectIndexY
+ return 1 << static_cast<int>(AddressingModeType::IndirectIndexY);
+ }
+ // the index register is invalid
+ std::stringstream ss;
+ ss << "Invalid index register in addressing mode. Expected y but got " << token_to_string(*next);
+ throw AssemblyException(source_files, next->source_location, AssemblyErrorCodes::InvalidIndexRegisterInAddressingMode, ss.str());
+ }
+ // (<expression>)
+ // IndirectAddr
+ return 1 << static_cast<int>(AddressingModeType::IndirectAddr);
+
+ }
+
+ // no matching parenthesis was found
+ std::stringstream ss;
+ ss << "Unmatched left parenthesis in expression";
+ throw AssemblyException(source_files, t->source_location, AssemblyErrorCodes::UnmatchedLeftParenthesis, ss.str());
+ }
+
+ if (t->type == TokenType::Newline || parser.is_operator(t, OperatorType::Semicolon)) {
+ // Implied
+ return 1 << static_cast<int>(AddressingModeType::Implied);
+
+ }
+
+ // ZeroPageAddr
+ // ZeroPageIndexX
+ // ZeroPageIndexY
+ // AbsoluteAddr
+ // AbsoluteIndexX
+ // AbsoluteIndexY
+ // RelativeAddr
+ const Token *next;
+ {
+ constexpr bool end_at_unmatched_right_parenthesis = false;
+ constexpr bool end_at_newline = true;
+ next = parser.parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
+ }
+
+ if (parser.is_operator(next, OperatorType::Comma)) {
+ // <expression>,
+ // ZeroPageIndexX
+ // AbsoluteIndexX
+ // ZeroPageIndexY
+ // AbsoluteIndexY
+ next = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ if (next->type == TokenType::ProcessorKeyword && static_cast<ProcessorKeywordType>(next->processor_keyword_index) == ProcessorKeywordType::X) {
+ // <expression>,x
+ // ZeroPageIndexX
+ // AbsoluteIndexX
+ return 1 << static_cast<int>(AddressingModeType::ZeroPageIndexX) |
+ 1 << static_cast<int>(AddressingModeType::AbsoluteIndexX);
+ }
+ if (next->type == TokenType::ProcessorKeyword && static_cast<ProcessorKeywordType>(next->processor_keyword_index) == ProcessorKeywordType::Y) {
+ // <expression>,y
+ // ZeroPageIndexY
+ // AbsoluteIndexY
+ return 1 << static_cast<int>(AddressingModeType::ZeroPageIndexY) |
+ 1 << static_cast<int>(AddressingModeType::AbsoluteIndexY);
+ }
+ // the index register is invalid
+ std::stringstream ss;
+ ss << "Invalid index register in addressing mode. Expected x or y but got " << token_to_string(*next);
+ throw AssemblyException(source_files, next->source_location, AssemblyErrorCodes::InvalidIndexRegisterInAddressingMode, ss.str());
+ }
+ // <expression>
+ // ZeroPageAddr
+ // AbsoluteAddr
+ // RelativeAddr
+ return 1 << static_cast<int>(AddressingModeType::ZeroPageAddr) |
+ 1 << static_cast<int>(AddressingModeType::AbsoluteAddr) |
+ 1 << static_cast<int>(AddressingModeType::RelativeAddr);
+}
+
+void Processor6502::print_addressing_modes(std::stringstream &ss, uint16_t addressing_mode_mask)
+{
+ for (int i = 0; i < static_cast<int>(AddressingModeType::NumAddressingModes); ++i) {
+ if (((1 << i) & addressing_mode_mask) != 0)
+ ss << "\n " << to_string(static_cast<AddressingModeType>(i));
+ }
+}
+
+const Token *Processor6502::parse_instruction(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *t, uint8_t /*InstructionType*/ instruction_index) const
+{
+ InstructionType instruction = static_cast<InstructionType>(instruction_index);
+ const Token *begin_token = t;
+
+ t = parser.consume_next_token(); // instruction token
+ t = parser.skip_spaces_and_tabs(t);
+
+ // check for optional label to define at the instruction argument address
+ bool has_label_definition = false;
+ bool global_label = false;
+ uint64_t symbol_hash_label = 0;
+ const Token *label_token = t;
+ if (parser.label_definition_follows(t)) {
+ has_label_definition = true;
+
+ t = parser.parse_symbol_definition(t, global_label, symbol_hash_label);
+ t = parser.consume_next_token(); // the colon
+ t = parser.skip_spaces_and_tabs(t);
+ }
+
+ // determine possible used addressing modes and compare with existing
+ uint16_t parsed_addressing_modes = try_parse_addressing_mode(parser, source_files, t);
+ uint16_t possible_addressing_modes = addressing_modes(instruction);
+ uint16_t selected_addressing_modes = parsed_addressing_modes & possible_addressing_modes;
+ if (selected_addressing_modes == 0) {
+ std::stringstream ss;
+ ss << "Invalid addressing mode used. Code indicates one of the following:";
+ print_addressing_modes(ss, parsed_addressing_modes);
+ ss << "\nbut possible addressing modes for " << to_string(instruction) << " are:";
+ print_addressing_modes(ss, possible_addressing_modes);
+ throw AssemblyException(source_files, t->source_location, AssemblyErrorCodes::InvalidAddressingMode, ss.str());
+ }
+
+ // store the instruction with addressing mode mask in the output
+ InstructionToken &instruction_token = parser.reserve_token_space<InstructionToken>();
+ instruction_token.type = SyntaxTokenType::Instruction;
+ instruction_token.processor = ProcessorType::Mos6502;
+ instruction_token.size = sizeof(InstructionToken);
+ instruction_token.instruction = instruction;
+ instruction_token.addressing_modes = selected_addressing_modes;
+ instruction_token.source_location = begin_token->source_location;
+ instruction_token.has_instruction_data_label = has_label_definition;
+ instruction_token.global_data_label = global_label;
+ instruction_token.data_label_symbol_hash = symbol_hash_label;
+ instruction_token.address_label_location = label_token->source_location;
+
+ // now we should be able to parse the operand of the instruction
+ if (selected_addressing_modes == Imp) {
+ if (has_label_definition) {
+ std::stringstream ss;
+ ss << "Implied addressing modes cannot have label to instruction data. Add a newline or a semicolon before the label to resolve this.";
+ throw AssemblyException(source_files, label_token->source_location, AssemblyErrorCodes::AddressingModeCannotHaveDataLabel, ss.str());
+ }
+ return t;
+ }
+ if (selected_addressing_modes == Imm) {
+ assert(parser.is_operator(t, OperatorType::Hash));
+ constexpr bool end_at_unmatched_parenthesis = false;
+ constexpr bool end_at_newline = true;
+ return parser.parse_and_output_expression(parser.consume_next_token(), end_at_unmatched_parenthesis, end_at_newline);
+ }
+ if ((selected_addressing_modes & (Ind | IndX | IndY)) != 0) {
+ // an indirect mode
+ if ((selected_addressing_modes & IndX) != 0) {
+ // skip parenthesis, parse address expression, skip comma and x
+ assert(parser.is_operator(t, OperatorType::LeftParenthesis));
+ constexpr bool end_at_unmatched_parenthesis = false;
+ constexpr bool end_at_newline = true;
+ t = parser.parse_and_output_expression(parser.consume_next_token(), end_at_unmatched_parenthesis, end_at_newline);
+ assert(parser.is_operator(t, OperatorType::Comma));
+ t = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ assert(t->type == TokenType::ProcessorKeyword && static_cast<ProcessorKeywordType>(t->processor_keyword_index) == ProcessorKeywordType::X);
+ t = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ assert(parser.is_operator(t, OperatorType::RightParenthesis));
+ return parser.consume_next_token();
+ }
+ // parse address expression
+ constexpr bool end_at_unmatched_parenthesis = false;
+ constexpr bool end_at_newline = true;
+ t = parser.parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
+
+ if ((selected_addressing_modes & IndY) != 0) {
+ // skip comma and y
+ assert(parser.is_operator(t, OperatorType::Comma));
+ t = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ assert(t->type == TokenType::ProcessorKeyword && static_cast<ProcessorKeywordType>(t->processor_keyword_index) == ProcessorKeywordType::Y);
+ t = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ }
+ return t;
+ }
+
+ // parse address expression
+ constexpr bool end_at_unmatched_parenthesis = false;
+ constexpr bool end_at_newline = true;
+ t = parser.parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
+
+ if ((selected_addressing_modes & (Zpx | AbsX)) != 0) {
+ // skip comma and x
+ assert(parser.is_operator(t, OperatorType::Comma));
+ t = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ assert(t->type == TokenType::ProcessorKeyword && static_cast<ProcessorKeywordType>(t->processor_keyword_index) == ProcessorKeywordType::X);
+ t = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ }
+ if ((selected_addressing_modes & (Zpy | AbsY)) != 0) {
+ // skip comma and y
+ assert(parser.is_operator(t, OperatorType::Comma));
+ t = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ assert(t->type == TokenType::ProcessorKeyword && static_cast<ProcessorKeywordType>(t->processor_keyword_index) == ProcessorKeywordType::Y);
+ t = parser.skip_spaces_and_tabs(parser.consume_next_token());
+ }
+
+ return t;
+}
+
+void Processor6502::generate_subroutine_instruction(Assembler &assembler, bool generate, int32_t address, const SourceLocation &source_location) const
+{
+ // instructions are only allowed within code sections.
+ bool instructions_allowed = assembler.in_code_section();
+ if (UNLIKELY(!instructions_allowed)) {
+ // this is an unrecoverable error
+ std::stringstream ss;
+ ss << "Instructions must be in a code section.";
+ assembler.report_fatal_error(source_location, AssemblyErrorCodes::CodeMustBeInCodeSection, ss.str());
+ }
+
+ // recursive data generation may not be safe
+ if (assembler._data_generation_depth != 0) {
+ // this is an unrecoverable error
+ std::stringstream ss;
+ ss << "Recursive data generation isn't allowed.";
+ assembler.report_fatal_error(source_location, AssemblyErrorCodes::RecursiveDataGenerationNotAllowed, ss.str());
+ }
+
+ ScopeCounter<uint32_t> sc(assembler._data_generation_depth);
+
+ if (generate && address < 0) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a positive argument. Argument value was evaluated to " << address << ".";
+ assembler.report_error(source_location, AssemblyErrorCodes::AddressingModeRequiresPositiveArgument, ss.str());
+ }
+
+ if (assembler._multi_bank_mode) {
+ // in this mode, addresses gets truncated to support memory banks
+ address &= 0xffff;
+ }
+
+ if (generate) {
+ if (address > 65535) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a word size argument. Argument was evaluated to " << address << ".";
+ assembler.report_error(source_location, AssemblyErrorCodes::AddressingModeRequiresWordSizeArgument, ss.str());
+ }
+ Section::Contents ending_instruction = Section::Contents::ContinueExecutionInstruction;
+ auto &data = assembler._section->generated_data(ending_instruction);
+ data.push_back(opcode(InstructionType::Jsr, AddressingModeType::AbsoluteAddr));
+ data.push_back(static_cast<uint8_t>(address));
+ data.push_back(static_cast<uint8_t>(address >> 8));
+ if (assembler._hex_source_writer != nullptr) {
+ assembler._hex_source_writer->write_data(static_cast<uint32_t>(assembler._program_counter.integer_value), &data[data.size() - 3], 3, source_location.file_index, source_location.row, source_location.row + 1);
+ }
+ }
+ assembler._program_counter.integer_value += 3;
+}
+
+void Processor6502::generate_instruction_data_label(Assembler &assembler, bool generate, bool export_enabled, const InstructionToken &token, int address, int offset, uint8_t size) const
+{
+ // exporting local variables is not allowed
+ if (generate && export_enabled && !token.global_data_label) {
+ std::stringstream ss;
+ ss << assembler.variable_name(token.data_label_symbol_hash, token.global_data_label) << " cannot be exported since it is local.";
+ assembler.report_error(token.address_label_location, AssemblyErrorCodes::ExportingLocalIsNotAllowed, ss.str());
+ }
+
+ if (assembler.create_label(generate, token.data_label_symbol_hash, token.global_data_label, StorageType::Constant, token.address_label_location)) {
+ Value &new_label = assembler._current_pass.values.back();
+ if (size == 1) {
+ assembler.set_byte_offset(new_label, address, offset);
+ } else if (size == 2) {
+ assembler.set_word_offset(new_label, address, offset);
+ } else {
+ assert(false);
+ }
+ new_label.set_contains_address(true);
+ if (export_enabled) {
+ new_label.set_is_public(true);
+ }
+ }
+}
+
+const SyntaxToken *Processor6502::parse_instruction(Assembler &assembler, bool generate, const SyntaxToken *t, bool export_enabled) const
+{
+ assert(t->type == SyntaxTokenType::Instruction);
+ const InstructionToken &instruction_token = *static_cast<const InstructionToken *>(t);
+
+ // instructions are only allowed within code sections.
+ bool instructions_allowed = assembler.in_code_section();
+ if (UNLIKELY(!instructions_allowed)) {
+ // this is an unrecoverable error
+ std::stringstream ss;
+ ss << "Instructions must be in a code section.";
+ assembler.report_fatal_error(instruction_token.source_location, AssemblyErrorCodes::CodeMustBeInCodeSection, ss.str());
+ }
+
+ // recursive data generation may not be safe
+ if (assembler._data_generation_depth != 0) {
+ // this is an unrecoverable error
+ std::stringstream ss;
+ ss << "Recursive data generation isn't allowed.";
+ assembler.report_fatal_error(instruction_token.source_location, AssemblyErrorCodes::RecursiveDataGenerationNotAllowed, ss.str());
+ }
+ ScopeCounter<uint32_t> sc(assembler._data_generation_depth);
+
+ InstructionType instruction = instruction_token.instruction;
+ uint16_t addr_mode = instruction_token.addressing_modes;
+ Section::Contents ending_instruction = is_ending_instruction(instruction) ? Section::Contents::EndExecutionInstruction : Section::Contents::ContinueExecutionInstruction;
+
+ t = assembler.consume_next_token(); // instruction
+
+ // in the generation pass, the program counter is guaranteed to be an integer value
+ // so there is no need to verify this
+
+ if (addr_mode == AddressingModeMask::Imp) {
+ if (generate) {
+ auto &data = assembler._section->generated_data(ending_instruction);
+ data.push_back(opcode(instruction, AddressingModeType::Implied));
+ if (assembler._hex_source_writer != nullptr) {
+ assembler._hex_source_writer->write_data(static_cast<uint32_t>(assembler._program_counter.integer_value), &data[data.size() - 1], 1, instruction_token.source_location.file_index, instruction_token.source_location.row, instruction_token.source_location.row + 1);
+ }
+ }
+ ++assembler._program_counter.integer_value;
+ return t;
+ }
+
+ // in all other instructions, we have to parse the expression for the argument
+ const ExpressionToken *expr = static_cast<const ExpressionToken *>(t);
+ const Value argument = assembler.evaluate_expression(generate, t);
+ t = assembler.consume_next_token();
+
+ // The argument is guaranteed to be a valid type in the generation pass.
+ // In an assembly pass this can be Unknown.
+ int32_t argument_value = 0;
+
+ // in case of an assembly pass, unknown will be converted to 0, which is ok for all addressing modes except relative
+ if (assembler.is_integer(argument))
+ argument_value = assembler.dereference_integer(argument);
+ else if (!assembler.is_unknown(argument)) {
+ if (generate) {
+ std::stringstream ss;
+ ss << "Addressing mode needs an integer value. Argument type was " << to_string(assembler.type_of_value(argument)) << ".";
+ assembler.report_error(expr->source_location, AssemblyErrorCodes::AddressingModeRequiresIntegerArgument, ss.str());
+ }
+ }
+
+ if (addr_mode == AddressingModeMask::Imm) {
+ if (generate) {
+ // handle the case where the value doesn't fit in a byte
+ if (argument_value < -128 || argument_value > 255) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a byte size argument. Argument was evaluated to " << argument_value << ".";
+ assembler.report_error(expr->source_location, AssemblyErrorCodes::AddressingModeRequiresByteSizeArgument, ss.str());
+ }
+ auto &data = assembler._section->generated_data(ending_instruction);
+ data.push_back(opcode(instruction, AddressingModeType::Immediate));
+ data.push_back(static_cast<uint8_t>(argument_value));
+ if (assembler._hex_source_writer != nullptr) {
+ assembler._hex_source_writer->write_data(static_cast<uint32_t>(assembler._program_counter.integer_value), &data[data.size() - 2], 2, instruction_token.source_location.file_index, instruction_token.source_location.row, instruction_token.source_location.row + 1);
+ }
+ }
+ if (UNLIKELY(instruction_token.has_instruction_data_label)) {
+ generate_instruction_data_label(assembler, generate, export_enabled, instruction_token, assembler._program_counter.integer_value + 1, 0, 1);
+ }
+
+ assembler._program_counter.integer_value += 2;
+ return t;
+ }
+
+ if (generate && argument_value < 0) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a positive argument. Argument value was evaluated to " << argument_value << ".";
+ assembler.report_error(expr->source_location, AssemblyErrorCodes::AddressingModeRequiresPositiveArgument, ss.str());
+ }
+
+ if (assembler._multi_bank_mode && addr_mode != AddressingModeMask::Rel) {
+ // in this mode, addresses gets truncated to support memory banks
+ argument_value &= 0xffff;
+ }
+
+ if (addr_mode == (AddressingModeMask::Zp | AddressingModeMask::Abs)
+ || addr_mode == (AddressingModeMask::Zpx | AddressingModeMask::AbsX)
+ || addr_mode == (AddressingModeMask::Zpy | AddressingModeMask::AbsY))
+ {
+ // mask off the zero page or absolute addressing mode regardless of modes
+ addr_mode = argument_value > 255 ? select_word_mode(addr_mode) : select_byte_mode(addr_mode);
+ }
+
+ if (addr_mode == AddressingModeMask::Zp
+ || addr_mode == AddressingModeMask::Zpx
+ || addr_mode == AddressingModeMask::Zpy
+ || addr_mode == AddressingModeMask::IndX
+ || addr_mode == AddressingModeMask::IndY)
+ {
+ if (generate) {
+ if (argument_value > 255) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a byte size argument. Argument was evaluated to " << argument_value << ".";
+ assembler.report_error(expr->source_location, AssemblyErrorCodes::AddressingModeRequiresByteSizeArgument, ss.str());
+ }
+ auto &data = assembler._section->generated_data(ending_instruction);
+ data.push_back(opcode(instruction, mask_to_addressing_mode(addr_mode)));
+ data.push_back(static_cast<uint8_t>(argument_value));
+ if (assembler._hex_source_writer != nullptr) {
+ assembler._hex_source_writer->write_data(static_cast<uint32_t>(assembler._program_counter.integer_value), &data[data.size() - 2], 2, instruction_token.source_location.file_index, instruction_token.source_location.row, instruction_token.source_location.row + 1);
+ }
+ }
+ if (UNLIKELY(instruction_token.has_instruction_data_label)) {
+ generate_instruction_data_label(assembler, generate, export_enabled, instruction_token, assembler._program_counter.integer_value + 1, 0, 1);
+ }
+ assembler._program_counter.integer_value += 2;
+ return t;
+ }
+
+ if (addr_mode == AddressingModeMask::Abs
+ || addr_mode == AddressingModeMask::AbsX
+ || addr_mode == AddressingModeMask::AbsY
+ || addr_mode == AddressingModeMask::Ind)
+ {
+ if (generate) {
+ if (argument_value > 65535) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a word size argument. Argument was evaluated to " << argument_value << ".";
+ assembler.report_error(expr->source_location, AssemblyErrorCodes::AddressingModeRequiresWordSizeArgument, ss.str());
+ }
+ auto &data = assembler._section->generated_data(ending_instruction);
+ data.push_back(opcode(instruction, mask_to_addressing_mode(addr_mode)));
+ data.push_back(static_cast<uint8_t>(argument_value));
+ data.push_back(static_cast<uint8_t>(argument_value >> 8));
+ if (assembler._hex_source_writer != nullptr) {
+ assembler._hex_source_writer->write_data(static_cast<uint32_t>(assembler._program_counter.integer_value), &data[data.size() - 3], 3, instruction_token.source_location.file_index, instruction_token.source_location.row, instruction_token.source_location.row + 1);
+ }
+ }
+ if (UNLIKELY(instruction_token.has_instruction_data_label)) {
+ generate_instruction_data_label(assembler, generate, export_enabled, instruction_token, assembler._program_counter.integer_value + 1, 0, 2);
+ }
+ assembler._program_counter.integer_value += 3;
+ return t;
+ }
+
+ // only relative address left
+ assert(addr_mode == AddressingModeMask::Rel);
+
+ if (generate) {
+ int32_t reference_addr = assembler._program_counter.integer_value + 2; // move past the instruction
+ int32_t relative_addr = argument_value - reference_addr;
+ if (relative_addr < -128 || relative_addr > 127) {
+ std::stringstream ss;
+ ss << "Relative address out of range. Offset is " << relative_addr << " and needs to be in a [-128..127] range.";
+ assembler.report_error(expr->source_location, AssemblyErrorCodes::RelativeAddressOutOfRange, ss.str());
+ }
+ auto &data = assembler._section->generated_data(ending_instruction);
+ data.push_back(opcode(instruction, mask_to_addressing_mode(addr_mode)));
+ data.push_back(static_cast<uint8_t>(relative_addr));
+ if (assembler._hex_source_writer != nullptr) {
+ assembler._hex_source_writer->write_data(static_cast<uint32_t>(assembler._program_counter.integer_value), &data[data.size() - 2], 2, instruction_token.source_location.file_index, instruction_token.source_location.row, instruction_token.source_location.row + 1);
+ }
+ }
+ if (UNLIKELY(instruction_token.has_instruction_data_label)) {
+ generate_instruction_data_label(assembler, generate, export_enabled, instruction_token, assembler._program_counter.integer_value + 1, 0, 1);
+ }
+ assembler._program_counter.integer_value += 2;
+ return t;
+}
+
+ }
+}
A => jasm/processor/6502/processor_6502.h +37 -0
@@ 0,0 1,37 @@
+#pragma once
+
+#include <processor/processor.h>
+
+namespace jasm
+{
+ namespace mos6502
+ {
+
+struct InstructionToken;
+
+class Processor6502 : public Processor
+{
+public:
+ virtual void register_processor_keywords(std::vector<std::string> &keywords) override;
+ virtual void register_processor_instructions(bool pseudo_instructions) override;
+ virtual bool allow_processor_keyword_with_prim(uint64_t &keyword_hash) const override;
+
+ virtual std::string token_to_string(const Token &t) const override;
+ virtual const Token *parse_instruction(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *t, uint8_t /*InstructionType*/ instruction_index) const override;
+
+ virtual void generate_subroutine_instruction(Assembler &assembler, bool generate, int32_t address, const SourceLocation &source_location) const override;
+ virtual const SyntaxToken *parse_instruction(Assembler &assembler, bool generate, const SyntaxToken *t, bool export_enabled) const override;
+
+private:
+ /// Try to parse as much as needed to determine all possible addressing modes.
+ /// @return A mask with possible addressing modes.
+ uint16_t try_parse_addressing_mode(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *t) const;
+
+ static void print_addressing_modes(std::stringstream &ss, uint16_t addressing_mode_mask);
+
+ void generate_instruction_data_label(Assembler &assembler, bool generate, bool export_enabled, const InstructionToken &token, int address, int offset, uint8_t size) const;
+};
+
+
+ }
+}
M jasm/parsing/processor_keywords_6502.cpp => jasm/processor/6502/processor_keywords_6502.cpp +5 -6
@@ 1,13 1,13 @@
#include "pch.h"
-#include <parsing/processor_keywords_6502.h>
-
-#if SUPPORTS(M6502)
+#include <processor/6502/processor_keywords_6502.h>
namespace jasm
{
+ namespace mos6502
+ {
-const std::string_view to_string(ProcessorKeywordType type)
+std::string_view to_string(ProcessorKeywordType type)
{
static const std::string_view names[] = {
// instruction registers
@@ 20,6 20,5 @@ const std::string_view to_string(Process
return names[static_cast<size_t>(type)];
}
+ } // namespace mos6502
} // namespace jasm
-
-#endif
M jasm/parsing/processor_keywords_6502.h => jasm/processor/6502/processor_keywords_6502.h +4 -5
@@ 1,9 1,9 @@
#pragma once
-#if SUPPORTS(M6502)
-
namespace jasm
{
+ namespace mos6502
+ {
/// @addtogroup tokenize
/// @{
@@ 17,10 17,9 @@ enum class ProcessorKeywordType : uint8_
NumTypes,
};
-const std::string_view to_string(ProcessorKeywordType type);
+std::string_view to_string(ProcessorKeywordType type);
/// @}
+ } // namespace mos6502
} // namespace jasm
-
-#endif
M jasm/assembling/instructions.h => jasm/processor/instructions.h +9 -2
@@ 1,4 1,11 @@
#pragma once
-#include <assembling/instructions_6502.h>
-#include <assembling/instructions_z80.h>
+#include <processor/6502/instructions_6502.h>
+#include <processor/z80/instructions_z80.h>
+
+namespace jasm
+{
+
+ constexpr uint8_t invalid_instruction = 0xff;
+
+}
M jasm/assembling/instructions_common.h => jasm/processor/instructions_common.h +0 -0
A => jasm/processor/processor.cpp +174 -0
@@ 0,0 1,174 @@
+#include "pch.h"
+
+#include <core/collections/array_helper.h>
+#include <processor/6502/processor_6502.h>
+#include <processor/processor.h>
+#include <processor/processor_unspecified.h>
+#include <processor/z80/processor_z80.h>
+#include <string_view>
+#include <tokenize/keywords.h>
+#include <tokenize/operators.h>
+#include <tokenize/types.h>
+
+namespace jasm
+{
+
+namespace {
+ static const std::string_view names[] = {
+ std::string_view("unspecified"),
+ std::string_view("6502"),
+ std::string_view("6510"),
+ std::string_view("8502"),
+ std::string_view("z80"),
+ };
+}
+
+std::string_view to_string(ProcessorType p)
+{
+ static_assert(core::array_num_elements(names) == static_cast<size_t>(ProcessorType::NumProcessors), "Number of processors doesn't match number of strings");
+
+ assert(p < ProcessorType::NumProcessors);
+ return names[static_cast<size_t>(p)];
+}
+
+bool is_processor(const std::string_view &processor_name, ProcessorType &processor)
+{
+ for(uint32_t i = 1; i < static_cast<uint32_t>(ProcessorType::NumProcessors); ++i) {
+ if (processor_name == names[i]) {
+ processor = static_cast<ProcessorType>(i);
+ return true;
+ }
+ }
+ return false;
+}
+
+Processor::Processor()
+{
+}
+
+Processor::~Processor()
+{
+}
+
+void Processor::init(bool pseudo_instructions)
+{
+ // add token data for keywords
+ std::vector<std::string> keywords;
+ add_type_tokens<KeywordType>(keywords, TokenType::Keyword);
+ add_type_tokens<TypenameType>(keywords, TokenType::Typename);
+ add_type_tokens<BooleanType>(keywords, TokenType::Boolean);
+ register_processor_keywords(keywords);
+ _keyword_finder.set_keywords(keywords);
+
+ // the extra operators not used in tokenizer is not included here!
+ OperatorType operators_types[] = {
+ OperatorType::Period,
+ OperatorType::BooleanNot,
+ OperatorType::BitwiseNot,
+ OperatorType::Multiply,
+ OperatorType::Divide,
+ OperatorType::Plus,
+ OperatorType::Minus,
+ OperatorType::LeftShift,
+ OperatorType::RightShift,
+ OperatorType::Less,
+ OperatorType::Greater,
+ OperatorType::LessOrEqual,
+ OperatorType::GreaterOrEqual,
+ OperatorType::Equal,
+ OperatorType::NotEqual,
+ OperatorType::BitwiseAnd,
+ OperatorType::BitwiseXor,
+ OperatorType::BitwiseOr,
+ OperatorType::BooleanAnd,
+ OperatorType::BooleanOr,
+ OperatorType::Assignment,
+ OperatorType::AssignmentAdd,
+ OperatorType::AssignmentSubtract,
+ OperatorType::AssignmentMultiply,
+ OperatorType::AssignmentDivide,
+ OperatorType::AssignmentBooleanAnd,
+ OperatorType::AssignmentBooleanOr,
+ OperatorType::AssignmentBitwiseAnd,
+ OperatorType::AssignmentBitwiseOr,
+ OperatorType::AssignmentBitwiseXor,
+ OperatorType::AssignmentLeftShift,
+ OperatorType::AssignmentRightShift,
+ OperatorType::Colon,
+ OperatorType::Namespace,
+ OperatorType::Semicolon,
+ OperatorType::Comma,
+ OperatorType::Hash,
+ OperatorType::Percent,
+ OperatorType::LeftParenthesis,
+ OperatorType::RightParenthesis,
+ OperatorType::LeftBracket,
+ OperatorType::RightBracket,
+ OperatorType::LeftCurly,
+ OperatorType::RightCurly,
+ OperatorType::Increment,
+ OperatorType::Decrement,
+ OperatorType::At,
+ OperatorType::Ellipsis,
+ };
+
+ constexpr int num_generated_operators = 10;
+ static_assert(sizeof(operators_types) / sizeof(operators_types[0]) + num_generated_operators == static_cast<int>(OperatorType::NumTypes), "Number of types doesn't match number of definitions");
+
+ std::vector<std::string> operators;
+ for (size_t i = 0; i < sizeof(operators_types) / sizeof(operators_types[0]); ++i) {
+ TokenData token { std::string(to_string(operators_types[i])), TokenType::Operator, static_cast<uint8_t>(operators_types[i]) };
+ operators.push_back(token.name);
+ _hash_to_token.insert(core::murmur_hash3_string_x64_64(token.name)) = token;
+ }
+
+ _operator_finder.set_keywords(operators);
+
+ register_processor_instructions(pseudo_instructions);
+}
+
+const TokenData &Processor::hash_to_token(uint64_t hash) const
+{
+ auto it = _hash_to_token.find(hash);
+ assert(it != _hash_to_token.end());
+ return it->second;
+}
+
+bool Processor::is_instruction(uint64_t symbol_hash, uint8_t &instruction_index) const
+{
+ auto it = _instructions.find(symbol_hash);
+ if (it == _instructions.end()) {
+ return false;
+ }
+ instruction_index = it->second;
+ return true;
+}
+
+ProcessorCatalogue::ProcessorCatalogue(bool pseudo_instructions)
+{
+ _unspecified = std::make_unique<unspecified::ProcessorUnspecified>();
+ _unspecified->init(pseudo_instructions);
+ _mos6502 = std::make_unique<mos6502::Processor6502>();
+ _mos6502->init(pseudo_instructions);
+ _z80 = std::make_unique<z80::ProcessorZ80>();
+ _z80->init(pseudo_instructions);
+}
+
+const Processor *ProcessorCatalogue::processor(ProcessorType type) const
+{
+ switch (type) {
+ case ProcessorType::Unspecified:
+ case ProcessorType::NumProcessors:
+ return _unspecified.get();
+
+ case ProcessorType::Mos6502:
+ case ProcessorType::Mos6510:
+ case ProcessorType::Mos8502:
+ return _mos6502.get();
+
+ case ProcessorType::Zilog80:
+ return _z80.get();
+ }
+}
+
+}
A => jasm/processor/processor.h +138 -0
@@ 0,0 1,138 @@
+#pragma once
+
+#include <core/collections/hash_map.h>
+#include <core/collections/null_hash_compare.h>
+#include <core/strings/murmur_hash.h>
+#include <tokenize/keyword_finder.h>
+#include <tokenize/operators.h>
+#include <tokenize/token_type.h>
+
+namespace jasm
+{
+
+class Assembler;
+class SyntaxParser;
+struct SourceLocation;
+struct SyntaxToken;
+struct Token;
+
+enum class ProcessorType : uint8_t
+{
+ Unspecified,
+ Mos6502,
+ Mos6510,
+ Mos8502,
+ Zilog80,
+ NumProcessors
+};
+
+std::string_view to_string(ProcessorType p);
+
+bool is_processor(const std::string_view &processor_name, ProcessorType &processor);
+
+struct TokenData
+{
+ std::string name;
+ TokenType token_type;
+ uint8_t token_type_index;
+};
+
+/// The interface to a processor implementation.
+class Processor
+{
+public:
+ Processor();
+ virtual ~Processor();
+
+ void init(bool pseudo_instructions);
+
+ const KeywordFinder &keywords() const
+ {
+ return _keyword_finder;
+ }
+
+ const KeywordFinder &operators() const
+ {
+ return _operator_finder;
+ }
+
+ const TokenData &hash_to_token(uint64_t hash) const;
+
+ bool is_instruction(uint64_t symbol_hash, uint8_t &instruction_index) const;
+
+ /// @addtogroup tokenize
+ /// @{
+
+ /// This registers all register keyword names in @a keywords and inserts
+ /// hash and TokenData into @a _keyword_finder.
+ virtual void register_processor_keywords(std::vector<std::string> &keywords) = 0;
+ /// This registers all instructions into @a _instructions.
+ virtual void register_processor_instructions(bool pseudo_instructions) = 0;
+
+ /// This is called during tokenizing in case a keyword with a following prim character
+ /// is found. On Z80 this might need to be translated to a new keyword, including the prim.
+ /// @return True if the keyword is replaced with a keyword including a trailing prim.
+ virtual bool allow_processor_keyword_with_prim(uint64_t &keyword_hash) const = 0;
+
+ /// @}
+
+ /// @addtogroup syntax
+ /// @{
+
+ virtual std::string token_to_string(const Token &t) const = 0;
+
+ virtual const Token *parse_instruction(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *t, uint8_t /*InstructionType*/ instruction_index) const = 0;
+
+ /// @}
+
+ /// @addtogroup assemble
+ /// @{
+
+ /// This is called when generating a subroutine call instruction when the macro style call mechanism is used.
+ virtual void generate_subroutine_instruction(Assembler &assembler, bool generate, int32_t address, const SourceLocation &source_location) const = 0;
+
+ /// This parses an instruction syntax token and optionally generates the output data.
+ virtual const SyntaxToken *parse_instruction(Assembler &assembler, bool generate, const SyntaxToken *t, bool export_enabled) const = 0;
+
+ /// @}
+
+protected:
+ template<typename T>
+ void add_type_tokens(std::vector<std::string> &names, TokenType type) {
+ for (uint8_t i = 0; i < static_cast<uint8_t>(T::NumTypes); ++i) {
+ TokenData token { std::string(to_string(static_cast<T>(i))), type, i };
+ names.push_back(token.name);
+ _hash_to_token.insert(core::murmur_hash3_string_x64_64(token.name)) = token;
+ }
+ }
+
+ /// This is used to find a keyword in a string.
+ KeywordFinder _keyword_finder;
+ /// This is used to find an operator in a string.
+ KeywordFinder _operator_finder;
+ /// A map to look up token details based on a hash of the token name.
+ core::HashMap<uint64_t, TokenData, core::NullHashCompare<uint64_t>> _hash_to_token;
+ /// Lookup table from hashed instruction name to index convertible to InstructionType.
+ core::HashMap<uint64_t, uint8_t, core::NullHashCompare<uint64_t>> _instructions;
+
+ // options
+ static const size_t max_addressing_mode_printout_lines = 11; ///< Longer lists than this will not be printed at all.
+};
+
+/// This class has instances of all processors and provides easy access to them.
+class ProcessorCatalogue
+{
+public:
+ ProcessorCatalogue(bool pseudo_instructions);
+
+ /// Get a pointer to a specific processor type. The pointer is valid until
+ /// the catalogue is destroyed.
+ const Processor *processor(ProcessorType type) const;
+
+private:
+ std::unique_ptr<Processor> _unspecified;
+ std::unique_ptr<Processor> _mos6502;
+ std::unique_ptr<Processor> _z80;
+};
+
+}
A => jasm/processor/processor_unspecified.cpp +87 -0
@@ 0,0 1,87 @@
+#include "pch.h"
+
+#include <processor/instructions.h>
+#include <processor/processor_unspecified.h>
+#include <sstream>
+#include <tokenize/tokens.h>
+
+namespace jasm
+{
+ namespace unspecified
+ {
+
+void ProcessorUnspecified::register_processor_keywords(std::vector<std::string> &/*keywords*/)
+{
+}
+
+void ProcessorUnspecified::register_processor_instructions(bool /*pseudo_instructions*/)
+{
+}
+
+bool ProcessorUnspecified::allow_processor_keyword_with_prim(uint64_t &/*keyword_hash*/) const
+{
+ // no prim translation
+ return false;
+}
+
+std::string ProcessorUnspecified::token_to_string(const Token &t) const
+{
+ std::stringstream ss;
+ if (jasm::is_instruction(t)) {
+ ss << "instruction";
+ } else {
+ ss << to_string(t.type);
+ }
+
+ switch (t.type) {
+ case TokenType::Whitespace:
+ case TokenType::Char:
+ case TokenType::Integer:
+ case TokenType::Float:
+ case TokenType::String:
+ case TokenType::End:
+ case TokenType::Newline:
+ case TokenType::Processor:
+ case TokenType::Symbol:
+ case TokenType::ProcessorKeyword:
+ case TokenType::NumTypes:
+ break;
+
+ case TokenType::Boolean:
+ ss << " " << to_string(t.boolean_index);
+ break;
+
+ case TokenType::Operator:
+ ss << " " << to_string(t.operator_index);
+ break;
+
+ case TokenType::Keyword:
+ ss << " " << to_string(t.keyword_index);
+ break;
+
+ case TokenType::Typename:
+ ss << " " << to_string(t.typename_index);
+ break;
+ }
+ return ss.str();
+}
+
+const Token *ProcessorUnspecified::parse_instruction(SyntaxParser &/*parser*/, const std::vector<std::string> &/*source_files*/, const Token *t, uint8_t /*InstructionType*/ /*instruction_index*/) const
+{
+ assert(false); // this should never be called
+ return t;
+}
+
+void ProcessorUnspecified::generate_subroutine_instruction(Assembler &/*assembler*/, bool /*generate*/, int32_t /*address*/, const SourceLocation &/*source_location*/) const
+{
+ assert(false); // this should never be called
+}
+
+const SyntaxToken *ProcessorUnspecified::parse_instruction(Assembler &/*assembler*/, bool /*generate*/, const SyntaxToken *t, bool /*export_enabled*/) const
+{
+ assert(false); // this should never be called
+ return t;
+}
+
+ }
+}
A => jasm/processor/processor_unspecified.h +26 -0
@@ 0,0 1,26 @@
+#pragma once
+
+#include <processor/processor.h>
+
+namespace jasm
+{
+ namespace unspecified
+ {
+
+class ProcessorUnspecified : public Processor
+{
+public:
+ virtual void register_processor_keywords(std::vector<std::string> &keywords) override;
+ virtual void register_processor_instructions(bool pseudo_instructions) override;
+ virtual bool allow_processor_keyword_with_prim(uint64_t &keyword_hash) const override;
+
+ virtual std::string token_to_string(const Token &t) const override;
+ virtual const Token *parse_instruction(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *t, uint8_t /*InstructionType*/ instruction_index) const override;
+
+ virtual void generate_subroutine_instruction(Assembler &assembler, bool generate, int32_t address, const SourceLocation &source_location) const override;
+ virtual const SyntaxToken *parse_instruction(Assembler &assembler, bool generate, const SyntaxToken *t, bool export_enabled) const override;
+};
+
+
+ }
+}
M jasm/assembling/instructions_z80.cpp => jasm/processor/z80/instructions_z80.cpp +8 -9
@@ 1,15 1,15 @@
#include "pch.h"
-#if SUPPORTS(Z80)
-
-#include <assembling/instructions_z80.h>
#include <core/collections/array_helper.h>
#include <core/exceptions/exception.h>
+#include <processor/z80/instructions_z80.h>
namespace jasm
{
-
-const std::string_view to_string(InstructionType type)
+ namespace z80
+ {
+
+std::string_view to_string(InstructionType type)
{
static const std::string_view names[] = {
std::string_view("adc"),
@@ 86,7 86,7 @@ const std::string_view to_string(Instruc
return names[static_cast<size_t>(type)];
}
-const std::string_view to_string(InstructionArgumentType type)
+std::string_view to_string(InstructionArgumentType type)
{
static const std::string_view names[] = {
std::string_view("<none>"),
@@ 1814,6 1814,5 @@ bool can_have_indexed_offset(ProcessorKe
keyword == ProcessorKeywordType::IY;
}
-}
-
-#endif
+ } // namespace z80
+} // namespace jasm
M jasm/assembling/instructions_z80.h => jasm/processor/z80/instructions_z80.h +9 -10
@@ 1,15 1,15 @@
#pragma once
-#if SUPPORTS(Z80)
-
-#include <assembling/instructions_common.h>
#include <core/collections/static_array.h>
-#include <parsing/processor_keywords_z80.h>
+#include <processor/instructions_common.h>
+#include <processor/z80/processor_keywords_z80.h>
namespace jasm
{
+ namespace z80
+ {
-/// @addtogroup assembling
+/// @addtogroup assemble
/// @{
enum class InstructionType : uint8_t
@@ 244,11 244,10 @@ const core::StaticArray<AddressingMode>
/// where the addressing mode was found.
const InstructionOpCode &opcode(InstructionType instruction, uint8_t addressing_mode_index);
-const std::string_view to_string(InstructionType type);
-const std::string_view to_string(InstructionArgumentType type);
+std::string_view to_string(InstructionType type);
+std::string_view to_string(InstructionArgumentType type);
/// @}
-}
-
-#endif
+ } // namespace z80
+} // namespace jasm
M jasm/parsing/processor_keywords_z80.cpp => jasm/processor/z80/processor_keywords_z80.cpp +5 -6
@@ 1,13 1,13 @@
#include "pch.h"
-#include <parsing/processor_keywords_z80.h>
-
-#if SUPPORTS(Z80)
+#include <processor/z80/processor_keywords_z80.h>
namespace jasm
{
+ namespace z80
+ {
-const std::string_view to_string(ProcessorKeywordType type)
+std::string_view to_string(ProcessorKeywordType type)
{
static const std::string_view names[] = {
@@ 44,6 44,5 @@ const std::string_view to_string(Process
return names[static_cast<size_t>(type)];
}
+ } // namespace z80
} // namespace jasm
-
-#endif
M jasm/parsing/processor_keywords_z80.h => jasm/processor/z80/processor_keywords_z80.h +5 -6
@@ 1,10 1,10 @@
#pragma once
-#if SUPPORTS(Z80)
-
namespace jasm
{
-
+ namespace z80
+ {
+
/// @addtogroup tokenize
/// @{
@@ 52,10 52,9 @@ inline bool is_register(ProcessorKeyword
return type >= ProcessorKeywordType::NumBranchConditions || type == ProcessorKeywordType::C;
}
-const std::string_view to_string(ProcessorKeywordType type);
+std::string_view to_string(ProcessorKeywordType type);
/// @}
+ } // namespace z80
} // namespace jasm
-
-#endif
A => jasm/processor/z80/processor_z80.cpp +765 -0
@@ 0,0 1,765 @@
+#include "pch.h"
+
+#include <algorithm>
+#include <assemble/assembler_impl/assembler_impl.h>
+#include <assemble/scope_counter.h>
+#include <exceptions/assembly_exception.h>
+#include <exceptions/error_codes.h>
+#include <io/hex_source_writer.h>
+#include <processor/z80/processor_keywords_z80.h>
+#include <processor/z80/processor_z80.h>
+#include <sstream>
+#include <string_view>
+#include <syntax/syntax_parser.h>
+#include <syntax/syntax_tokens.h>
+
+namespace jasm
+{
+ namespace z80
+ {
+
+struct InstructionToken : public SyntaxToken
+{
+ InstructionType instruction;
+ uint8_t addressing_mode_index; ///< The selected addressing mode index used to lookup the instruction data using @a opcode().
+ uint8_t padding1[2];
+ SourceLocation source_location; ///< Source location to instruction.
+ // 8 byte aligned
+ bool has_instruction_data_label[2]; ///< True if there is a label defined that points to the instruction data.
+ bool global_data_label[2]; ///< True if the label is global.
+ uint32_t padding2;
+ // 8 byte aligned
+ SourceLocation address_label_location[2]; ///< Source location to address label, if existing.
+ // 8 byte aligned
+ uint64_t data_label_symbol_hash[2]; ///< Symbol to define as the data label.
+ // 8 byte aligned
+};
+
+
+void ProcessorZ80::register_processor_keywords(std::vector<std::string> &keywords)
+{
+ add_type_tokens<ProcessorKeywordType>(keywords, TokenType::ProcessorKeyword);
+}
+
+void ProcessorZ80::register_processor_instructions(bool pseudo_instructions)
+{
+ // generate instruction lookup
+ uint8_t num_instructions = static_cast<uint8_t>(pseudo_instructions ? InstructionType::NumTypes : InstructionType::NumStandard);
+ for (uint8_t i = 0; i < num_instructions; ++i) {
+ const std::string_view name = to_string(static_cast<InstructionType>(i));
+ _instructions.insert(core::murmur_hash3_string_x64_64(name)) = i;
+ }
+}
+
+bool ProcessorZ80::allow_processor_keyword_with_prim(uint64_t &keyword_hash) const
+{
+ if (keyword_hash == core::hash_constant(0x85555565f6597889ULL, "a")) {
+ keyword_hash = core::hash_constant(0x45a6060b75dcb28bULL, "a'");
+ return true;
+ } else if (keyword_hash == core::hash_constant(0x3265a8a124914099ULL, "af")) {
+ keyword_hash = core::hash_constant(0x75f0ca5c1761bc10, "af'");
+ return true;
+ } else if (keyword_hash == core::hash_constant(0x7a98a957b1d3d1ee, "b")) {
+ keyword_hash = core::hash_constant(0x4e710923ab8a5de3, "b'");
+ return true;
+ } else if (keyword_hash == core::hash_constant(0x8e38df6c4a1f74d7, "c")) {
+ keyword_hash = core::hash_constant(0x51f9b2208ed849be, "c'");
+ return true;
+ } else if (keyword_hash == core::hash_constant(0xcb72f2cd8447f776, "d")) {
+ keyword_hash = core::hash_constant(0x516d90c3787d85ce, "d'");
+ return true;
+ } else if (keyword_hash == core::hash_constant(0xc5b69249a3d5e994, "e")) {
+ keyword_hash = core::hash_constant(0x14115437bd14d165, "e'");
+ return true;
+ } else if (keyword_hash == core::hash_constant(0xd6fcb2bb61cb4523, "h")) {
+ keyword_hash = core::hash_constant(0x3a55eb6b0fcaef4f, "h'");
+ return true;
+ } else if (keyword_hash == core::hash_constant(0xf539fdab7bdf9f62, "l")) {
+ keyword_hash = core::hash_constant(0x2729f07f03d07daa, "l'");
+ return true;
+ }
+ return false;
+}
+
+std::string ProcessorZ80::token_to_string(const Token &t) const
+{
+ std::stringstream ss;
+ if (jasm::is_instruction(t)) {
+ ss << "instruction";
+ } else {
+ ss << to_string(t.type);
+ }
+
+ switch (t.type) {
+ case TokenType::Whitespace:
+ case TokenType::Char:
+ case TokenType::Integer:
+ case TokenType::Float:
+ case TokenType::String:
+ case TokenType::End:
+ case TokenType::Newline:
+ case TokenType::Processor:
+ case TokenType::NumTypes:
+ break;
+
+ case TokenType::Symbol:
+ {
+ if (t.instruction_index != invalid_instruction) {
+ ss << " " << to_string(static_cast<InstructionType>(t.instruction_index));
+ }
+ break;
+ }
+
+ case TokenType::Boolean:
+ ss << " " << to_string(t.boolean_index);
+ break;
+
+ case TokenType::Operator:
+ ss << " " << to_string(t.operator_index);
+ break;
+
+ case TokenType::Keyword:
+ ss << " " << to_string(t.keyword_index);
+ break;
+
+ case TokenType::ProcessorKeyword:
+ ss << " " << to_string(static_cast<ProcessorKeywordType>(t.processor_keyword_index));
+ break;
+
+ case TokenType::Typename:
+ ss << " " << to_string(t.typename_index);
+ break;
+ }
+ return ss.str();
+}
+
+InstructionArgumentType ProcessorZ80::try_parse_addressing_mode(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *&t, InstructionType instruction, bool &has_label, bool &global_label, SourceLocation &label_location, uint64_t &label_hash) const
+{
+ InstructionArgumentType a = InstructionArgumentType::None;
+
+ t = parser.skip_spaces_and_tabs(t);
+
+ // check for optional label to define at the instruction argument address
+ has_label = false;
+ global_label = false;
+ label_hash = 0;
+ if (UNLIKELY(parser.label_definition_follows(t))) {
+ has_label = true;
+ label_location = t->source_location;
+ t = parser.parse_symbol_definition(t, global_label, label_hash);
+ t = parser.consume_next_token(); // the colon
+ t = parser.skip_spaces_and_tabs(t);
+ }
+
+ // save read state to restore later since we are reading ahead.
+ const TokenReadPosition start_position = parser.get_read_position();
+ const Token *start_token = t;
+
+ if (t->type == TokenType::ProcessorKeyword) {
+ // can only be that keyword and nothing else
+ a = keyword_to_instruction_argument(instruction, static_cast<ProcessorKeywordType>(t->processor_keyword_index));
+ t = parser.consume_next_token();
+ } else if (parser.is_operator(t, OperatorType::LeftParenthesis)) {
+ // could be
+ // (<register>)
+ // (<register>+/-<expression>)
+ // (<expression>)
+ // (<expression>)<operator><expression>
+ t = parser.consume_next_token(); // left parenthesis
+ t = parser.skip_spaces_and_tabs(t);
+ if (t->type == TokenType::ProcessorKeyword) {
+ ProcessorKeywordType keyword = static_cast<ProcessorKeywordType>(t->processor_keyword_index);
+ if (is_valid_indirect_keyword(keyword)) {
+ // could be
+ // (BC), (DE), (HL), (SP), (C), (IX), (IY), (IX+d), (IY+d)
+ t = parser.consume_next_token(); // register
+ if (can_have_indexed_offset(keyword)) {
+ // could be
+ // (IX), (IY), (IX+d), (IY+d)
+ t = parser.skip_spaces_and_tabs(t);
+ if (parser.is_operator(t, OperatorType::RightParenthesis)) {
+ // could be
+ // (IX), (IY)
+ a = keyword_to_indirect_instruction_argument(keyword);
+
+ } else if (parser.is_operator(t, OperatorType::Plus) || parser.is_operator(t, OperatorType::Minus)) {
+ // could be
+ // (IX+d), (IY+d)
+ a = keyword_to_indexed_instruction_argument(keyword);
+
+ // skip past the index expression
+ constexpr bool end_at_unmatched_right_parenthesis = true;
+ constexpr bool end_at_newline = true;
+ t = parser.parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
+ }
+ } else {
+ // could be
+ // (BC), (DE), (HL), (SP), (C)
+ a = keyword_to_indirect_instruction_argument(keyword);
+ }
+ } else {
+ // invalid register or keyword
+ std::stringstream ss;
+ ss << to_string(keyword) << " cannot be used for indirect addessing.";
+ throw AssemblyException(source_files, t->source_location, AssemblyErrorCodes::KeywordCannotBeUsedForIndirectAddressing, ss.str());
+ }
+ t = parser.skip_spaces_and_tabs(t);
+ t = parser.parse_operator(t, OperatorType::RightParenthesis);
+
+ } else {
+ // could be
+ // (<expression>)
+ // (<expression>)<operator><expression>
+
+ {
+ constexpr bool end_at_unmatched_right_parenthesis = true;
+ constexpr bool end_at_newline = true;
+ t = parser.parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
+ }
+
+ t = parser.parse_operator(t, OperatorType::RightParenthesis);
+ t = parser.skip_spaces_and_tabs(t);
+
+ if (t->type == TokenType::Operator && t->operator_index < OperatorType::NumOperatorFunctions) {
+ // could be
+ // (<expression>)<operator><expression>
+ a = InstructionArgumentType::Number;
+
+ // Rewind and reparse the expression fully, otherwise we may not get calls or indexing right
+ // if first part is generating an object that is processed in the later part.
+ t = start_token;
+ parser.set_read_position(start_position);
+
+ constexpr bool end_at_unmatched_right_parenthesis = false;
+ constexpr bool end_at_newline = true;
+ t = parser.parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
+
+ } else {
+ // could be
+ // (<expression>)
+ a = InstructionArgumentType::IndirectAddress;
+ }
+
+ }
+ } else if (t->type == TokenType::Newline) {
+ // none
+ } else {
+ // could be
+ // <expression>
+ a = InstructionArgumentType::Number;
+
+ constexpr bool end_at_unmatched_right_parenthesis = false;
+ constexpr bool end_at_newline = true;
+ t = parser.parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
+ }
+
+ t = parser.skip_spaces_and_tabs(t);
+
+ return a;
+}
+
+AddressingModeArguments ProcessorZ80::try_parse_addressing_modes(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *t, InstructionType instruction, bool has_label[2], bool global_label[2], SourceLocation label_location[2], uint64_t label_hash[2]) const
+{
+ // save read state to restore later since we are reading ahead.
+ TokenChainScope rewind_scope(parser.create_rewind_scope());
+
+ InstructionArgumentType arg1 = try_parse_addressing_mode(parser, source_files, t, instruction, has_label[0], global_label[0], label_location[0], label_hash[0]);
+ InstructionArgumentType arg2 = InstructionArgumentType::None;
+ if (arg1 != InstructionArgumentType::None && parser.is_operator(t, OperatorType::Comma)) {
+ t = parser.consume_next_token(); // the comma
+ arg2 = try_parse_addressing_mode(parser, source_files, t, instruction, has_label[1], global_label[1], label_location[1], label_hash[1]);
+ }
+ return AddressingModeArguments{arg1, arg2};
+}
+
+bool find_addressing_mode(AddressingModeArguments args, const core::StaticArray<AddressingMode> &available, size_t &index)
+{
+ auto it = std::find_if(available.begin(), available.end(), [args](auto mode){ return args == mode.simplified; });
+ if (it == available.end()) {
+ index = 0;
+ return false;
+ }
+ index = static_cast<size_t>(it - available.begin());
+ return true;
+}
+
+void print_addressing_mode(std::stringstream &ss, InstructionType instruction, AddressingModeArguments args)
+{
+ ss << to_string(instruction);
+ if (args.arg1 != InstructionArgumentType::None) {
+ ss << ' ' << to_string(args.arg1);
+ if (args.arg2 != InstructionArgumentType::None) {
+ ss << ", " << to_string(args.arg2);
+ }
+ }
+ ss << '\n';
+}
+
+const Token *ProcessorZ80::parse_and_output_instruction_argument(SyntaxParser &parser, const Token *t, InstructionArgumentType arg, bool argument_has_label) const
+{
+ if (UNLIKELY(argument_has_label)) {
+ // just skip the label definition
+ bool global = false;
+ uint64_t symbol = 0;
+ t = parser.parse_symbol_definition(t, global, symbol);
+ t = parser.parse_operator(t, OperatorType::Colon);
+ }
+
+ switch (arg)
+ {
+ case InstructionArgumentType::None:
+ break;
+
+ case InstructionArgumentType::Number: // this is the generic "don't know the type yet"
+ case InstructionArgumentType::ByteValue:
+ case InstructionArgumentType::WordValue:
+ case InstructionArgumentType::RelativeAddress:
+ case InstructionArgumentType::Bit:
+ case InstructionArgumentType::PageZeroAddressing:
+ case InstructionArgumentType::InterruptNumber:
+ {
+ constexpr bool end_at_unmatched_parenthesis = false;
+ constexpr bool end_at_newline = true;
+ t = parser.parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
+
+ break;
+ }
+
+ case InstructionArgumentType::IndirectAddress:
+ case InstructionArgumentType::IndirectByteValue:
+ case InstructionArgumentType::IndirectWordValue:
+ {
+ t = parser.skip_spaces_and_tabs(t);
+ t = parser.parse_operator(t, OperatorType::LeftParenthesis);
+
+ constexpr bool end_at_unmatched_parenthesis = true;
+ constexpr bool end_at_newline = true;
+ t = parser.parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
+
+ t = parser.parse_operator(t, OperatorType::RightParenthesis);
+ break;
+ }
+
+ case InstructionArgumentType::IndirectRegisterC:
+ case InstructionArgumentType::IndirectRegisterBC:
+ case InstructionArgumentType::IndirectRegisterDE:
+ case InstructionArgumentType::IndirectRegisterHL:
+ case InstructionArgumentType::IndirectRegisterIX:
+ case InstructionArgumentType::IndirectRegisterIY:
+ case InstructionArgumentType::IndirectRegisterSP:
+ {
+ t = parser.skip_spaces_and_tabs(t);
+ t = parser.parse_operator(t, OperatorType::LeftParenthesis);
+
+ t = parser.skip_spaces_and_tabs(t);
+ assert(t->type == TokenType::ProcessorKeyword);
+ t = parser.consume_next_token(); // processor keyword
+
+ t = parser.skip_spaces_and_tabs(t);
+ t = parser.parse_operator(t, OperatorType::RightParenthesis);
+ break;
+ }
+
+ case InstructionArgumentType::IndirectIndexedRegisterIX:
+ case InstructionArgumentType::IndirectIndexedRegisterIY:
+ {
+ t = parser.skip_spaces_and_tabs(t);
+ t = parser.parse_operator(t, OperatorType::LeftParenthesis);
+
+ t = parser.skip_spaces_and_tabs(t);
+ assert(t->type == TokenType::ProcessorKeyword);
+ t = parser.consume_next_token(); // processor keyword
+
+ constexpr bool end_at_unmatched_parenthesis = true;
+ constexpr bool end_at_newline = true;
+ t = parser.parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
+
+ t = parser.parse_operator(t, OperatorType::RightParenthesis);
+ break;
+ }
+
+ case InstructionArgumentType::RegisterA:
+ case InstructionArgumentType::RegisterB:
+ case InstructionArgumentType::RegisterC:
+ case InstructionArgumentType::RegisterD:
+ case InstructionArgumentType::RegisterE:
+ case InstructionArgumentType::RegisterH:
+ case InstructionArgumentType::RegisterL:
+ case InstructionArgumentType::RegisterI:
+ case InstructionArgumentType::RegisterR:
+ case InstructionArgumentType::RegisterAF:
+ case InstructionArgumentType::RegisterBC:
+ case InstructionArgumentType::RegisterDE:
+ case InstructionArgumentType::RegisterHL:
+ case InstructionArgumentType::RegisterIX:
+ case InstructionArgumentType::RegisterIY:
+ case InstructionArgumentType::RegisterSP:
+ case InstructionArgumentType::RegisterAFPrim:
+ case InstructionArgumentType::ConditionC:
+ case InstructionArgumentType::ConditionM:
+ case InstructionArgumentType::ConditionNC:
+ case InstructionArgumentType::ConditionNZ:
+ case InstructionArgumentType::ConditionP:
+ case InstructionArgumentType::ConditionPE:
+ case InstructionArgumentType::ConditionPO:
+ case InstructionArgumentType::ConditionZ:
+ {
+ t = parser.skip_spaces_and_tabs(t);
+ assert(t->type == TokenType::ProcessorKeyword);
+ t = parser.consume_next_token(); // processor keyword
+ break;
+ }
+
+ case InstructionArgumentType::NumTypes:
+ assert(false);
+ throw AssemblyException("Internal error");
+ };
+ return t;
+}
+
+const Token *ProcessorZ80::parse_instruction(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *t, uint8_t /*InstructionType*/ instruction_index) const
+{
+ InstructionType instruction = static_cast<InstructionType>(instruction_index);
+ const Token *begin_token = t;
+
+ t = parser.consume_next_token(); // instruction token
+ t = parser.skip_spaces_and_tabs(t);
+
+ InstructionToken &instruction_token = parser.reserve_token_space<InstructionToken>();
+ instruction_token.type = SyntaxTokenType::Instruction;
+ instruction_token.processor = ProcessorType::Zilog80;
+ instruction_token.size = sizeof(InstructionToken);
+ instruction_token.instruction = instruction;
+
+ // determine used addressing mode
+ AddressingModeArguments addressing_mode = try_parse_addressing_modes(parser, source_files, t, instruction, instruction_token.has_instruction_data_label, instruction_token.global_data_label, instruction_token.address_label_location, instruction_token.data_label_symbol_hash);
+
+ // search through the array of possible addressing modes for the instruction
+ const core::StaticArray<AddressingMode> &available_modes = addressing_modes(instruction);
+ size_t found_index;
+ if (UNLIKELY(!find_addressing_mode(addressing_mode, available_modes, found_index))) {
+ std::stringstream ss;
+ ss << "Invalid addressing mode used. Code indicates:\n";
+ print_addressing_mode(ss, instruction, addressing_mode);
+ if (available_modes.size() <= max_addressing_mode_printout_lines) {
+ ss << "but possible addressing modes are:\n";
+ for(auto mode : available_modes) {
+ print_addressing_mode(ss, instruction, mode.detailed);
+ }
+ }
+ throw AssemblyException(source_files, t->source_location, AssemblyErrorCodes::InvalidAddressingMode, ss.str());
+ }
+
+ // store the instruction with addressing mode mask in the output
+ instruction_token.addressing_mode_index = static_cast<uint8_t>(found_index);
+ instruction_token.source_location = begin_token->source_location;
+
+ // now we need to parse the expressions required by the arguments
+ t = parse_and_output_instruction_argument(parser, t, addressing_mode.arg1, instruction_token.has_instruction_data_label[0]);
+ if (addressing_mode.arg2 != InstructionArgumentType::None) {
+ t = parser.skip_spaces_and_tabs(t);
+ t = parser.parse_operator(t, OperatorType::Comma);
+ t = parse_and_output_instruction_argument(parser, t, addressing_mode.arg2, instruction_token.has_instruction_data_label[1]);
+ }
+
+ return t;
+}
+
+void ProcessorZ80::generate_subroutine_instruction(Assembler &assembler, bool generate, int32_t address, const SourceLocation &source_location) const
+{
+ // instructions are only allowed within code sections.
+ bool instructions_allowed = assembler.in_code_section();
+ if (UNLIKELY(!instructions_allowed)) {
+ // this is an unrecoverable error
+ std::stringstream ss;
+ ss << "Instructions must be in a code section.";
+ assembler.report_fatal_error(source_location, AssemblyErrorCodes::CodeMustBeInCodeSection, ss.str());
+ }
+
+ // recursive data generation may not be safe
+ if (assembler._data_generation_depth != 0) {
+ // this is an unrecoverable error
+ std::stringstream ss;
+ ss << "Recursive data generation isn't allowed.";
+ assembler.report_fatal_error(source_location, AssemblyErrorCodes::RecursiveDataGenerationNotAllowed, ss.str());
+ }
+
+ ScopeCounter<uint32_t> sc(assembler._data_generation_depth);
+
+ const InstructionOpCode &opcode_data = opcode(InstructionType::Call, 0);
+ uint8_t mutable_opcode_data[4];
+ mutable_opcode_data[0] = opcode_data.op[0];
+ mutable_opcode_data[1] = opcode_data.op[1];
+ mutable_opcode_data[2] = opcode_data.op[2];
+ mutable_opcode_data[3] = opcode_data.op[3];
+
+ if (generate) {
+ if (UNLIKELY(assembler._multi_bank_mode && address >= 0)) {
+ address &= 0xffff;
+ }
+ if (address < -32768 || address > 65535) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a word size argument. Argument was evaluated to " << address << ".";
+ assembler.report_error(source_location, AssemblyErrorCodes::AddressingModeRequiresWordSizeArgument, ss.str());
+ }
+ mutable_opcode_data[opcode_data.offset_to_data[0] + 0] = static_cast<uint8_t>(address & 0xff);
+ mutable_opcode_data[opcode_data.offset_to_data[0] + 1] = static_cast<uint8_t>(address >> 8);
+
+ Section::Contents ending_instruction = Section::Contents::ContinueExecutionInstruction;
+ auto &data = assembler._section->generated_data(ending_instruction);
+ for(decltype(opcode_data.total_size) i = 0; i < opcode_data.total_size; ++i) {
+ data.push_back(mutable_opcode_data[i]);
+ }
+ if (assembler._hex_source_writer != nullptr) {
+ assembler._hex_source_writer->write_data(static_cast<uint32_t>(assembler._program_counter.integer_value), &data[data.size() - opcode_data.total_size], opcode_data.total_size, source_location.file_index, source_location.row, source_location.row + 1);
+ }
+
+ }
+ assembler._program_counter.integer_value += static_cast<int32_t>(opcode_data.total_size);
+}
+
+const SyntaxToken *ProcessorZ80::parse_instruction(Assembler &assembler, bool generate, const SyntaxToken *t, bool export_enabled) const
+{
+ assert(t->type == SyntaxTokenType::Instruction);
+ const InstructionToken &instruction_token = *static_cast<const InstructionToken *>(t);
+
+ // instructions are only allowed within code sections.
+ bool instructions_allowed = assembler.in_code_section();
+ if (UNLIKELY(!instructions_allowed)) {
+ // this is an unrecoverable error
+ std::stringstream ss;
+ ss << "Instructions must be in a code section.";
+ assembler.report_fatal_error(instruction_token.source_location, AssemblyErrorCodes::CodeMustBeInCodeSection, ss.str());
+ }
+
+ // recursive data generation may not be safe
+ if (assembler._data_generation_depth != 0) {
+ // this is an unrecoverable error
+ std::stringstream ss;
+ ss << "Recursive data generation isn't allowed.";
+ assembler.report_fatal_error(instruction_token.source_location, AssemblyErrorCodes::RecursiveDataGenerationNotAllowed, ss.str());
+ }
+ ScopeCounter<uint32_t> sc(assembler._data_generation_depth);
+
+ InstructionType instruction = instruction_token.instruction;
+ const InstructionOpCode &opcode_data = opcode(instruction, instruction_token.addressing_mode_index);
+ Section::Contents ending_instruction = is_ending_instruction(instruction) ? Section::Contents::EndExecutionInstruction : Section::Contents::ContinueExecutionInstruction;
+
+ if (UNLIKELY(generate && !assembler._pseudo_instructions && opcode_data.category == InstructionCategory::Pseudo)) {
+ std::stringstream ss;
+ ss << "Pseudo instructions require the pseudo instruction mode to be enabled.";
+ assembler.report_error(instruction_token.source_location, AssemblyErrorCodes::UseOfPseudoInstructionInStandardMode, ss.str());
+ }
+
+ t = assembler.consume_next_token(); // instruction
+
+ // in the generation pass, the program counter is guaranteed to be an integer value
+ // so there is no need to verify this
+
+ // generate labels to instruction data
+ for(int i = 0; i < 2; ++i) {
+ if (UNLIKELY(instruction_token.has_instruction_data_label[i])) {
+ // verify that the data label is valid for the addressing mode (can point to actual data)
+ uint8_t data_size = argument_data_size(i, opcode_data);
+ if (data_size != 0) {
+ // exporting local variables is not allowed
+ if (export_enabled && !instruction_token.global_data_label[i]) {
+ std::stringstream ss;
+ ss << assembler.variable_name(instruction_token.data_label_symbol_hash[i], instruction_token.global_data_label[i]) << " cannot be exported since it is local.";
+ assembler.report_error(instruction_token.address_label_location[i], AssemblyErrorCodes::ExportingLocalIsNotAllowed, ss.str());
+ }
+
+ if (assembler.create_label(generate, instruction_token.data_label_symbol_hash[i], instruction_token.global_data_label[i], StorageType::Constant, instruction_token.address_label_location[i])) {
+ Value &new_label = assembler._current_pass.values.back();
+ if (data_size == 1) {
+ assembler.set_byte_offset(new_label, assembler._program_counter.integer_value + argument_data_offset(i, opcode_data), 0);
+ } else if (data_size == 2) {
+ assembler.set_word_offset(new_label, assembler._program_counter.integer_value + argument_data_offset(i, opcode_data), 0);
+ } else {
+ assert(false);
+ }
+ new_label.set_contains_address(true);
+ if (export_enabled) {
+ new_label.set_is_public(true);
+ }
+ }
+ } else {
+ std::stringstream ss;
+ ss << "Addressing mode argument cannot have label to instruction data.";
+ assembler.report_error(instruction_token.address_label_location[i], AssemblyErrorCodes::AddressingModeArgumentCannotHaveDataLabel, ss.str());
+ }
+ }
+ }
+
+ // copy the opcode to be able to modify it before writing it to the data stream
+ uint8_t mutable_opcode_data[4];
+ mutable_opcode_data[0] = opcode_data.op[0];
+ mutable_opcode_data[1] = opcode_data.op[1];
+ mutable_opcode_data[2] = opcode_data.op[2];
+ mutable_opcode_data[3] = opcode_data.op[3];
+
+ if (opcode_data.format != OpCodeFormat::OpcodeOnly) {
+ assert(t->type == SyntaxTokenType::Expression);
+ const ExpressionToken *expr1 = static_cast<const ExpressionToken *>(t);
+ int32_t arg1 = assembler.evaluate_integer_expression_for_instruction_argument(generate, t);
+ t = assembler.consume_next_token();
+
+ switch (opcode_data.format)
+ {
+ case OpCodeFormat::OpcodeOnly:
+ break;
+ case OpCodeFormat::ByteArg:
+ if (generate) {
+ if (arg1 < -128 || arg1 > 255) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a byte size argument. Argument was evaluated to " << arg1 << ".";
+ assembler.report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresByteSizeArgument, ss.str());
+ }
+ mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(arg1);
+ }
+ break;
+ case OpCodeFormat::WordArg:
+ if (generate) {
+ if (UNLIKELY(assembler._multi_bank_mode && arg1 >= 0)) {
+ arg1 &= 0xffff;
+ }
+ if (arg1 < -32768 || arg1 > 65535) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a word size argument. Argument was evaluated to " << arg1 << ".";
+ assembler.report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresWordSizeArgument, ss.str());
+ }
+ mutable_opcode_data[opcode_data.offset_to_data[0] + 0] = static_cast<uint8_t>(arg1 & 0xff);
+ mutable_opcode_data[opcode_data.offset_to_data[0] + 1] = static_cast<uint8_t>(arg1 >> 8);
+ }
+ break;
+ case OpCodeFormat::OffsetArg:
+ if (generate) {
+ if (arg1 < -128 || arg1 > 127) {
+ std::stringstream ss;
+ ss << "Addressing mode needs an offset in range [-128..127]. Argument was evaluated to " << arg1 << ".";
+ assembler.report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresOffsetSizeArgument, ss.str());
+ }
+ mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(arg1);
+ }
+ break;
+ case OpCodeFormat::OffsetAndByteArg:
+ {
+ assert(t->type == SyntaxTokenType::Expression);
+ const ExpressionToken *expr2 = static_cast<const ExpressionToken *>(t);
+ int32_t arg2 = assembler.evaluate_integer_expression_for_instruction_argument(generate, t);
+ t = assembler.consume_next_token();
+
+ if (generate) {
+ if (arg1 < -128 || arg1 > 127) {
+ std::stringstream ss;
+ ss << "Addressing mode needs an offset in range [-128..127]. Argument was evaluated to " << arg1 << ".";
+ assembler.report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresOffsetSizeArgument, ss.str());
+ }
+ if (arg2 < -128 || arg2 > 255) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a byte size argument. Argument was evaluated to " << arg2 << ".";
+ assembler.report_error(expr2->source_location, AssemblyErrorCodes::AddressingModeRequiresByteSizeArgument, ss.str());
+ }
+
+ mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(arg1);
+ mutable_opcode_data[opcode_data.offset_to_data[1]] = static_cast<uint8_t>(arg2);
+ }
+ break;
+ }
+ case OpCodeFormat::InterruptModeArg:
+ {
+ if (generate) {
+ if (arg1 < 0 || arg1 > 2) {
+ std::stringstream ss;
+ ss << "Addressing mode needs an interrupt mode in range [0..2]. Argument was evaluated to " << arg1 << ".";
+ assembler.report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresByteSizeArgument, ss.str());
+ }
+ uint8_t opcode_part = 0;
+ if (arg1 == 0) {
+ opcode_part = 0x46;
+ } else if (arg1 == 1) {
+ opcode_part = 0x56;
+ } else if (arg1 == 2) {
+ opcode_part = 0x5e;
+ }
+ mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(opcode_part);
+ }
+ break;
+ }
+ case OpCodeFormat::BitAndRegisterArg:
+ {
+ if (generate) {
+ if (arg1 < 0 || arg1 > 7) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a bit argument in range [0..7]. Argument was evaluated to " << arg1 << ".";
+ assembler.report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresBitArgument, ss.str());
+ }
+ size_t bit_argument_offset = opcode_data.total_size - 1; // the bit is always placed in the last opcode byte
+ mutable_opcode_data[bit_argument_offset] |= static_cast<uint8_t>(arg1 << 3);
+ }
+ break;
+ }
+ case OpCodeFormat::BitAndOffsetArg:
+ {
+ assert(t->type == SyntaxTokenType::Expression);
+ const ExpressionToken *expr2 = static_cast<const ExpressionToken *>(t);
+ int32_t arg2 = assembler.evaluate_integer_expression_for_instruction_argument(generate, t);
+ t = assembler.consume_next_token();
+
+ if (generate) {
+ if (arg1 < 0 || arg1 > 7) {
+ std::stringstream ss;
+ ss << "Addressing mode needs a bit argument in range [0..7]. Argument was evaluated to " << arg1 << ".";
+ assembler.report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresBitArgument, ss.str());
+ }
+ if (arg2 < -128 || arg2 > 127) {
+ std::stringstream ss;
+ ss << "Addressing mode needs an offset in range [-128..127]. Argument was evaluated to " << arg2 << ".";
+ assembler.report_error(expr2->source_location, AssemblyErrorCodes::AddressingModeRequiresOffsetSizeArgument, ss.str());
+ }
+ mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(arg2);
+ size_t bit_argument_offset = opcode_data.total_size - 1; // the bit is always placed in the last opcode byte
+ mutable_opcode_data[bit_argument_offset] |= static_cast<uint8_t>(arg1 << 3);
+ }
+ break;
+ }
+ case OpCodeFormat::BranchOffsetArg:
+ if (generate) {
+ int32_t relative_offset = arg1 - (assembler._program_counter.integer_value + 2);
+ if (relative_offset < -128 || relative_offset > 127) {
+ std::stringstream ss;
+ ss << "Relative address out of range. Offset is " << relative_offset << " and needs to be in a [-128..127] range.";
+ assembler.report_error(expr1->source_location, AssemblyErrorCodes::RelativeAddressOutOfRange, ss.str());
+ }
+ mutable_opcode_data[opcode_data.offset_to_data[0]] = static_cast<uint8_t>(relative_offset);
+ }
+ break;
+ case OpCodeFormat::PageZeroArg:
+ if (generate) {
+ if ((arg1 & (~0b111000)) != 0) {
+ std::stringstream ss;
+ ss << "Zero page address must be 0, 8, 16, 24, 32, 40, 48, 56. Argument was evaluated to " << arg1 << ".";
+ assembler.report_error(expr1->source_location, AssemblyErrorCodes::AddressingModeRequiresZeroPageArgument, ss.str());
+ }
+ mutable_opcode_data[0] |= static_cast<uint8_t>(mutable_opcode_data[0] | arg1);
+ }
+ break;
+ }
+ }
+
+ if (generate) {
+ auto &data = assembler._section->generated_data(ending_instruction);
+ for(decltype(opcode_data.total_size) i = 0; i < opcode_data.total_size; ++i) {
+ data.push_back(mutable_opcode_data[i]);
+ }
+ if (assembler._hex_source_writer != nullptr) {
+ assembler._hex_source_writer->write_data(static_cast<uint32_t>(assembler._program_counter.integer_value), &data[data.size() - opcode_data.total_size], opcode_data.total_size, instruction_token.source_location.file_index, instruction_token.source_location.row, instruction_token.source_location.row + 1);
+ }
+ }
+ assembler._program_counter.integer_value += static_cast<int32_t>(opcode_data.total_size);
+
+ return t;
+}
+
+ }
+}
A => jasm/processor/z80/processor_z80.h +41 -0
@@ 0,0 1,41 @@
+#pragma once
+
+#include <processor/processor.h>
+#include <processor/z80/instructions_z80.h>
+
+namespace jasm
+{
+ class SyntaxParser;
+
+ namespace z80
+ {
+
+class ProcessorZ80: public Processor
+{
+public:
+ virtual void register_processor_keywords(std::vector<std::string> &keywords) override;
+ virtual void register_processor_instructions(bool pseudo_instructions) override;
+ virtual bool allow_processor_keyword_with_prim(uint64_t &keyword_hash) const override;
+
+ virtual std::string token_to_string(const Token &t) const override;
+ virtual const Token *parse_instruction(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *t, uint8_t /*InstructionType*/ instruction_index) const override;
+
+ virtual void generate_subroutine_instruction(Assembler &assembler, bool generate, int32_t address, const SourceLocation &source_location) const override;
+ virtual const SyntaxToken *parse_instruction(Assembler &assembler, bool generate, const SyntaxToken *t, bool export_enabled) const override;
+
+private:
+ /// Parse as much as needed to determine one argument type.
+ InstructionArgumentType try_parse_addressing_mode(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *&t, InstructionType instruction, bool &has_label, bool &global_label, SourceLocation &label_location, uint64_t &label_hash) const;
+
+ /// Try to parse as much as needed to determine the argument types.
+ /// The token stream is left untouched after the call.
+ /// @return A tuple with first and second argument types.
+ AddressingModeArguments try_parse_addressing_modes(SyntaxParser &parser, const std::vector<std::string> &source_files, const Token *t, InstructionType instruction, bool has_label[2], bool global_label[2], SourceLocation label_location[2], uint64_t label_hash[2]) const;
+
+ /// Parse and output the instruction argument.
+ const Token *parse_and_output_instruction_argument(SyntaxParser &parser, const Token *t, InstructionArgumentType arg, bool argument_has_label) const;
+};
+
+
+ }
+}
M jasm/strings/string_conversions.cpp +1 -1
@@ 1,4 1,4 @@
-#include <pch.h>
+#include "pch.h"
#include <array>
#include <core/collections/array_helper.h>
M jasm/strings/string_locale.cpp +1 -1
@@ 1,4 1,4 @@
-#include <pch.h>
+#include "pch.h"
#include <array>
#include <core/collections/array_helper.h>
A => jasm/syntax/chain_index_scope.cpp +23 -0
@@ 0,0 1,23 @@
+#include "pch.h"
+
+#include <syntax/chain_index_scope.h>
+#include <syntax/syntax_parser.h>
+
+namespace jasm
+{
+
+ChainIndexScope::ChainIndexScope(SyntaxParser &parser)
+ : _parser(parser)
+{
+ _chain_index = _parser._output_chain_index;
+}
+
+ChainIndexScope::~ChainIndexScope()
+{
+ // restore the previous chain
+ _parser._output_chain_index = _chain_index;
+ // the vector may have reallocated its contents so the current chain pointer needs to be refetched
+ _parser._output_chain = _parser._output[_parser._output_chain_index].get();
+}
+
+}
A => jasm/syntax/chain_index_scope.h +28 -0
@@ 0,0 1,28 @@
+#pragma once
+
+namespace jasm
+{
+
+class SyntaxParser;
+
+/// @addtogroup syntax
+/// @{
+
+/// A scope class used to save and restore the current chain index.
+class ChainIndexScope
+{
+public:
+ explicit ChainIndexScope(SyntaxParser &parser);
+ ~ChainIndexScope();
+
+ ChainIndexScope &operator=(const ChainIndexScope &) = delete;
+ ChainIndexScope &operator=(const ChainIndexScope &&) = delete;
+
+private:
+ SyntaxParser &_parser;
+ size_t _chain_index;
+};
+
+/// @}
+
+}
A => jasm/syntax/operators.cpp +14 -0
@@ 0,0 1,14 @@
+#include "pch.h"
+
+#include <syntax/operators.h>
+
+namespace jasm
+{
+
+OperatorInfo::OperatorInfo(OperatorPrecedence p, OperatorAssociativity a, uint32_t args)
+ : precendence(p)
+ , associativity(a)
+ , num_args(args)
+{}
+
+}
A => jasm/syntax/operators.h +46 -0
@@ 0,0 1,46 @@
+#pragma once
+
+namespace jasm
+{
+
+/// @addtogroup syntax
+/// @{
+
+enum class OperatorPrecedence : uint8_t
+{
+ Function,
+ UnaryOperator,
+ Shift,
+ MultiplyDivide,
+ PlusMinus,
+ BitwiseAnd,
+ BitwiseXor,
+ BitwiseOr,
+ Comparison,
+ Equality,
+ BooleanAnd,
+ BooleanOr,
+ Assignment,
+};
+
+enum class OperatorAssociativity : uint8_t
+{
+ LeftToRight,
+ RightToLeft,
+};
+
+struct OperatorInfo
+{
+ OperatorInfo() = default;
+ OperatorInfo(OperatorPrecedence p, OperatorAssociativity a, uint32_t args);
+
+ static constexpr uint32_t variable_args = 0xffffffff; ///< Value of num_args if the number of args is variable.
+
+ OperatorPrecedence precendence;
+ OperatorAssociativity associativity;
+ uint32_t num_args;
+};
+
+/// @}
+
+}
M jasm/parsing/section.h => jasm/syntax/section.h +1 -1
@@ 1,6 1,6 @@
#pragma once
-#include <parsing/source_location.h>
+#include <tokenize/source_location.h>
namespace jasm {
M jasm/parsing/storage_type.h => jasm/syntax/storage_type.h +0 -0
A => jasm/syntax/syntax_parse.cpp +26 -0
@@ 0,0 1,26 @@
+#include "pch.h"
+
+#include <core/debug/timer.h>
+#include <syntax/syntax_parse.h>
+#include <syntax/syntax_parser.h>
+
+namespace jasm
+{
+
+void parse_syntax(
+ const TokenChain &input,
+ std::vector<std::unique_ptr<TokenChain>> &output,
+ const ProcessorCatalogue &catalogue,
+ ProcessorType default_processor,
+ const StringRepository &strings,
+ HashArrayRepository &hash_arrays,
+ const std::vector<std::string> &source_files
+)
+{
+ core::TimerScope timer("Syntax parser");
+
+ SyntaxParser parser(input, output, catalogue, default_processor, strings, hash_arrays, source_files);
+ parser.parse();
+}
+
+}
A => jasm/syntax/syntax_parse.h +36 -0
@@ 0,0 1,36 @@
+#pragma once
+
+#include <processor/processor.h>
+
+namespace jasm
+{
+
+class HashArrayRepository;
+class ProcessorCatalogue;
+class StringRepository;
+class TokenChain;
+
+/// @addtogroup syntax
+/// @{
+
+/// Take the tokenized data and make verify the syntax. Produce a new token chain suitable for assembly.
+/// @param input A chain of tokenized tokens.
+/// @param output A list of token chain that next chains will be added to.
+/// @param catalogue Catalogue of processor implementations.
+/// @param default_processor The processor inherited from the include source. This can be unspecified.
+/// @param strings A reverse lookup from string hashes to strings.
+/// @param hash_arrays A repository for hash arrays to make it possible to store references in constant size memory.
+/// @param source_files An array of files used to lookup source file name when printing error messages.
+void parse_syntax(
+ const TokenChain &input,
+ std::vector<std::unique_ptr<TokenChain>> &output,
+ const ProcessorCatalogue &catalogue,
+ ProcessorType default_processor,
+ const StringRepository &strings,
+ HashArrayRepository &hash_arrays,
+ const std::vector<std::string> &source_files
+);
+
+/// @}
+
+} // namespace jasm
M jasm/parsing/syntax_parser.cpp => jasm/syntax/syntax_parser.cpp +151 -1117
@@ 6,411 6,47 @@
#include <core/strings/murmur_hash.h>
#include <exceptions/assembly_exception.h>
#include <io/data_reader.h>
-#include <parsing/hasharray_repository.h>
-#include <parsing/tokenizer.h>
-#include <parsing/syntax_parser.h>
-#include <parsing/token_chain.h>
-#include <parsing/token_print.h>
+#include <processor/processor.h>
#include <strings/string_repository.h>
#include <sstream>
-
+#include <syntax/chain_index_scope.h>
+#include <syntax/syntax_parser.h>
+#include <tokenize/tokenizer.h>
+#include <utility/hasharray_repository.h>
+#include <utility/token_chain.h>
namespace jasm
{
-using namespace core;
-
/// @addtogroup syntax
/// @{
-#if SUPPORTS(M6502)
- namespace
- {
- enum
- {
- Imp = 1 << static_cast<int>(AddressingModeType::Implied),
- Imm = 1 << static_cast<int>(AddressingModeType::Immediate),
- Zp = 1 << static_cast<int>(AddressingModeType::ZeroPageAddr),
- Zpx = 1 << static_cast<int>(AddressingModeType::ZeroPageIndexX),
- Zpy = 1 << static_cast<int>(AddressingModeType::ZeroPageIndexY),
- Abs = 1 << static_cast<int>(AddressingModeType::AbsoluteAddr),
- AbsX = 1 << static_cast<int>(AddressingModeType::AbsoluteIndexX),
- AbsY = 1 << static_cast<int>(AddressingModeType::AbsoluteIndexY),
- Rel = 1 << static_cast<int>(AddressingModeType::RelativeAddr),
- Ind = 1 << static_cast<int>(AddressingModeType::IndirectAddr),
- IndX = 1 << static_cast<int>(AddressingModeType::IndirectIndexX),
- IndY = 1 << static_cast<int>(AddressingModeType::IndirectIndexY),
- };
- }
-#endif
-
constexpr uint32_t TokenChainMainBufferSize = 4096;
constexpr uint32_t TokenChainMacroBufferSize = 256;
-enum class OperatorPrecedence : uint8_t
-{
- Function,
- UnaryOperator,
- Shift,
- MultiplyDivide,
- PlusMinus,
- BitwiseAnd,
- BitwiseXor,
- BitwiseOr,
- Comparison,
- Equality,
- BooleanAnd,
- BooleanOr,
- Assignment,
-};
-
-enum class OperatorAssociativity : uint8_t
-{
- LeftToRight,
- RightToLeft,
-};
-
-struct OperatorInfo
-{
- OperatorInfo() = default;
- OperatorInfo(OperatorPrecedence p, OperatorAssociativity a, uint32_t args) : precendence(p), associativity(a), num_args(args) {}
-
- static constexpr uint32_t variable_args = 0xffffffff; ///< Value of num_args if the number of args is variable.
-
- OperatorPrecedence precendence;
- OperatorAssociativity associativity;
- uint32_t num_args;
-};
-
/// An enumeration of all optimizer flags that can be enabled or disabled.
enum class OptimizerFlags
{
NumFlags,
};
-/// This class keeps the state of the syntax analysis that persists between statements.
-struct SyntaxState
-{
- SyntaxState()
- : macro_depth(0)
- , subroutine_depth(0)
- {
- }
-
- std::set<uint64_t> sections; ///< Set with string hashes for section names, used to detect duplicate names.
- int macro_depth; ///< Number of nested macro definitions.
- int subroutine_depth; ///< Number of nested subroutine definitions (only one allowed).
-};
-
-class SyntaxParser;
-
-/// A scope class used to save and restore the current chain index.
-class ChainIndexScope
-{
-public:
- explicit ChainIndexScope(SyntaxParser &parser);
- ~ChainIndexScope();
-
- ChainIndexScope &operator=(const ChainIndexScope &) = delete;
- ChainIndexScope &operator=(const ChainIndexScope &&) = delete;
-
-private:
- SyntaxParser &_parser;
- size_t _chain_index;
-};
-
-class SyntaxParser
-{
- friend class ChainIndexScope;
-
-public:
- SyntaxParser(const TokenChain &input, std::vector<TokenChain> &output, const StringRepository &strings, HashArrayRepository &hash_arrays, const std::vector<std::string> &source_files, DataReader &data_reader);
-
- SyntaxParser &operator=(const SyntaxParser &) = delete;
-
- void parse();
-
-private:
- void setup_operator_precedence();
-
- OperatorPrecedence operator_precedence(OperatorType type) const
- {
- assert(type < OperatorType::NumOperatorFunctions);
- return _operator_info[static_cast<int>(type)].precendence;
- }
-
- OperatorAssociativity operator_associativity(OperatorType type) const
- {
- assert(type < OperatorType::NumOperatorFunctions);
- return _operator_info[static_cast<int>(type)].associativity;
- }
-
- /// Parse the inner part of a scope and return when reaching the ending curly bracket.
- const Token *parse_inner_scope(const Token *t);
-
- /// Parse a scope with curly brackets and output using ScopeBegin and ScopeEnd tokens.
- const Token *parse_scope(const Token *t);
-
- const Token *next_token()
- {
- return static_cast<const Token *>(_input_reader.next_token());
- }
-
- void skip_token(const Token *t)
- {
- _input_reader.advance_read(sizeof(Token) + t->payload_size);
- }
-
- const Token *consume_next_token()
- {
- const Token *token = static_cast<const Token *>(_input_reader.next_token());
- skip_token(token);
- return token;
- }
-
- const Token *peek_next_token()
- {
- return static_cast<const Token *>(_input_reader.next_token());
- }
-
- /// Return the next token after skipping zero or more whitespaces and newlines.
- const Token *skip_whitespaces(const Token *token)
- {
- while (token->type == TokenType::Whitespace || token->type == TokenType::Newline)
- token = consume_next_token();
-
- return token;
- }
-
- /// Return the next token after skipping zero or more whitespaces but not newlines.
- const Token *skip_spaces_and_tabs(const Token *token)
- {
- while (token->type == TokenType::Whitespace)
- token = consume_next_token();
-
- return token;
- }
-
- /// Return true if the token matches a specific operator.
- inline bool is_operator(const Token *t, OperatorType type) const
- {
- return t->type == TokenType::Operator && t->operator_index == type;
- }
-
- /// Return true if the token matches a specific operator.
- inline bool is_keyword(const Token *t, KeywordType type) const
- {
- return t->type == TokenType::Keyword && t->keyword_index == type;
- }
-
- /// Return true if the token matches a specific operator.
- inline bool is_operator(const ExpressionComponent &t, OperatorType type) const
- {
- return t.type == ExpressionComponentType::Operator && t.operator_type == type;
- }
-
- /// Checks if the tokens following are a local or global symbol and a colon.
- /// The token stream is left untouched after the call so this is a non-destructive peek.
- /// Whitespaces must have been skipped prior to this call.
- bool label_definition_follows(const Token *t);
-
- /// Checks if the tokens following are a variable declaration and 'in'.
- /// The token stream is left untouched after the call so this is a non-destructive peek.
- /// The 'for' keyword and a left parenthesis must have been parsed already.
- /// Whitespaces must have been skipped prior to this call.
- bool range_for_loop_follows(const Token *t);
-
- #if SUPPORTS(M6502)
- /// Try to parse as much as needed to determine all possible addressing modes.
- /// @return A mask with possible addressing modes.
- uint16_t try_parse_addressing_mode(const Token *t);
-
- /// Output add addressing modes in the mask to the output stream.
- static void print_addressing_modes(std::stringstream &ss, uint16_t addressing_mode_mask);
- #endif
-
- #if SUPPORTS(Z80)
- /// Parse as much as needed to determine one argument type.
- InstructionArgumentType try_parse_addressing_mode(const Token *&t, InstructionType instruction, bool &has_label, bool &global_label, SourceLocation &label_location, uint64_t &label_hash);
-
- /// Try to parse as much as needed to determine the argument types.
- /// The token stream is left untouched after the call.
- /// @return A tuple with first and second argument types.
- AddressingModeArguments try_parse_addressing_modes(const Token *t, InstructionType instruction, bool has_label[2], bool global_label[2], SourceLocation label_location[2], uint64_t label_hash[2]);
-
- /// Parse and output the instruction argument.
- const Token *parse_and_output_instruction_argument(const Token *t, InstructionArgumentType arg, bool argument_has_label);
- #endif
-
- /// Parse a type reference and fill in the information about it in @a reference.
- const Token *parse_type_reference(const Token *t, TypeReference &reference, bool array_size_is_required);
-
- OptimizerType string_to_optimizer_type(const StringToken *t);
-
- const Token *parse_align(const Token *t);
- const Token *parse_instruction(const Token *t, InstructionType instruction);
- const Token *parse_using_statement(const Token *t);
- const Token *parse_section_mapping(const Token *t);
- const Token *parse_section_part(const Token *t, uint64_t section_name_hash, const SourceLocation &source_location);
- const Token *parse_section_statement(const Token *t);
- const Token *parse_optimizer_statement(const Token *t);
- const Token *parse_if_statement(const Token *t);
- const Token *parse_macro_statement(const Token *t);
- const Token *parse_return_statement(const Token *t);
- const Token *parse_subroutine_statement(const Token *t);
- const Token *parse_struct_definition(const Token *t);
- const Token *parse_define_statement(const Token *t);
- const Token *parse_reserve_statement(const Token *t);
- const Token *parse_enum_definition(const Token *t);
- const Token *parse_range_for_statement(const Token *t);
- const Token *parse_for_statement(const Token *t);
- const Token *parse_repeat_statement(const Token *t);
- const Token *parse_incbin(const Token *t);
- const Token *parse_namespace(const Token *t);
- const Token *parse_module(const Token *t);
- const Token *parse_export(const Token *t);
- #if defined(_DEBUG)
- const Token *parse_debug(const Token *t);
- #endif
-
- /// Parse a global symbol without namespaces and set @a symbol to its hash.
- /// @param symbol Set to the symbol hash.
- /// @return The token after the symbol name;
- const Token *parse_global_symbol_definition(const Token *t, uint64_t &symbol);
-
- /// Parse a local or global symbol without namespace and updates @a global to true or false.
- /// @param global Set to true if the symbol was a local symbol.
- /// @param symbol Set to the symbol hash.
- /// @return The token after the symbol name;
- const Token *parse_symbol_definition(const Token *t, bool &global, uint64_t &symbol);
-
- /// Parse a local symbol without namespace or a global symbol with optional namespace.
- /// @a _temp_hash_array is filled with the local symbol or namespace and global symbol hashes.
- /// @return The token after the symbol.
- const Token *parse_symbol_reference(const Token *t, bool use_auto_symbols, bool &global);
-
- /// Parse an expected operator.
- const Token *parse_operator(const Token *t, OperatorType expected_operator);
-
- /// Parse an expected keyword.
- const Token *parse_keyword(const Token *t, KeywordType expected_keyword);
-
- /// Parse a keyword statement.
- const Token *parse_keyword(const Token *t);
-
- /// Prepare the operator stack for a new operator.
- /// This involves moving operators with lower precedence into the expression array.
- void prepare_operator_stack_for_operator(OperatorType type);
-
- /// Parse an expected value or unary operator (beginning or after binary operator).
- const Token *parse_value_in_expression(const Token *token, bool &next_is_value, bool end_at_newline);
-
- /// @return True if a call operator, left parenthesis or left bracket was found and false if the stack became empty.
- bool pop_operators_until_left_bracket_or_parenthesis(OperatorType &type_found);
-
- /// Parse an expected binary operator after a value or assume the expression is over if
- /// an operator isn't found.
- const Token *parse_after_value_in_expression(const Token *token, bool &end_of_expression, bool &next_is_operator, bool end_at_unmatched_right_parenthesis, bool end_at_newline);
-
- /// Apply the operator on the top of _operator_stack on the nodes in _expression_stack and moves the operator to the expression stack.
- /// This basically creates a subtree with the operator on top and the nodes as branches.
- void apply_operator_on_expression_stack();
-
- /// Copy the tree in @a _component_storage with the root in index @a storage_index to the end of @a _expression_output.
- void copy_component_tree(size_t storage_index);
-
- /// Parse expression and put components in @a _component_storage with the trunk being the index of the single entry
- /// in @a _expression_stack.
- /// @param end_at_unmatched_right_parenthesis If true, the parsing will end on an unmatched right parenthesis.
- /// This will make it possible to parse parts of an expression. If false, an exception will be thrown.
- /// @param end_at_newline If true, the parsing will end on a newline. This is used in instruction parsing.
- const Token *parse_expression_unordered(const Token *token, bool end_at_unmatched_right_parenthesis, bool end_at_newline);
-
- /// Parse expression and fill @a _expression_output with the components as a flat tree structure.
- /// @param end_at_unmatched_right_parenthesis If true, the parsing will end on an unmatched right parenthesis.
- /// This will make it possible to parse parts of an expression. If false, an exception will be thrown.
- /// @param end_at_newline If true, the parsing will end on a newline. This is used in instruction parsing.
- const Token *parse_expression(const Token *t, bool end_at_unmatched_right_parenthesis, bool end_at_newline);
-
- /// Output an expression using the @a components argument.
- /// Start is the token where the expression starts to be able to store
- /// the location in the expression token.
- void output_expression(const Token *start, const std::vector<ExpressionComponent> &components);
-
- /// Parse expression and store the result as a symbol token in the output.
- /// @param end_at_unmatched_right_parenthesis If true, the parsing will end on an unmatched right parenthesis.
- /// This will make it possible to parse parts of an expression. If false, an exception will be thrown.
- /// @param end_at_newline If true, the parsing will end on a newline. This is used in instruction parsing.
- const Token *parse_and_output_expression(const Token *t, bool end_at_unmatched_right_parenthesis, bool end_at_newline);
-
- /// Parse expression that is expected to be enclosed in parenthesis and store the result as a symbol token in the output.
- const Token *parse_and_output_expression_within_parenthesis(const Token *t);
-
- /// Parse name declaration.
- const Token *parse_name_declaration(const Token *t);
-
- /// Parse assignment after const or var keyword.
- /// @param t Token after the const or var keyword.
- /// @param result_token Optional pointer to pointer that will be updated with a pointer to the created token.
- const Token *parse_variable_definition(const Token *t, StorageType storage_type, const SymbolDefinitionToken **result_token = nullptr);
-
- /// Parse a label definition.
- /// @param symbol The symbol after skipping initial operator for local symbols.
- /// @param global True for a global symbol and false for a local symbol.
- const Token *parse_label_definition(const SymbolToken *symbol, bool global);
-
- /// Parse a label with a colon or a statement.
- const Token *parse_label_or_statement(const Token *t);
-
- // Call these to register use of scope variables and have them be generated in the assemble pass.
- void mark_use_of_loop_variable();
- void mark_use_of_continue_variable();
-
- // input from tokenizer
- std::vector<TokenChain> &_output;
- const StringRepository &_strings;
- HashArrayRepository &_hash_arrays;
- const std::vector<std::string> &_source_files;
- DataReader &_data_reader;
-
- // syntax storage
- TokenReader _input_reader; ///< Keeps the read state from the _input.
- TokenChain *_output_chain; ///< Current token chain to output to. Each function and macro has its own chain for easy lookup.
- size_t _output_chain_index; ///< Index of the current output chain in the @a _output vector. This is used to lookup a new pointer when the number of chains has increased and possibly been reallocated elsewhere.
- std::vector<uint64_t> _temp_hash_array; ///< Temporary namespace storage while parsing.
- std::vector<DefineGroupToken *> _temp_pointer_array; ///< Temporary array of pointers to track group sizes.
- std::vector<uint32_t> _operator_stack; ///< Temporary expression operator stack for expression evaluations. This stores indices into _component_storage.
- std::vector<uint32_t> _expression_stack; ///< Temporary expression node output for expression evaluations. This stores indices into _component_storage.
- std::vector<ExpressionComponent> _component_storage; ///< Temporary storage for values and operators used in the syntax tree.
- std::vector<ExpressionComponent> _expression_output; ///< The output from the expression parser. This is a syntax tree. With the first element as root.
- OperatorInfo _operator_info[static_cast<int>(OperatorType::NumOperatorFunctions)]; ///< Operator precendence for each type of operator in OperatorType.
- std::vector<ScopeBeginToken *> _scope_stack; ///< Stack of entered scopes. This is used to mark scopes as using loop and continue variables.
- SyntaxState _state;
-
- // options
- static const size_t max_addressing_mode_printout_lines = 11; ///< Longer lists than this will not be printed at all.
-};
-
-ChainIndexScope::ChainIndexScope(SyntaxParser &parser)
- : _parser(parser)
-{
- _chain_index = _parser._output_chain_index;
-}
-
-ChainIndexScope::~ChainIndexScope()
-{
- // restore the previous chain
- _parser._output_chain_index = _chain_index;
- // the vector may have reallocated its contents so the current chain pointer needs to be refetched
- _parser._output_chain = &_parser._output[_parser._output_chain_index];
-}
-
-SyntaxParser::SyntaxParser(const TokenChain &input, std::vector<TokenChain> &output, const StringRepository &strings, HashArrayRepository &hash_arrays, const std::vector<std::string> &source_files, DataReader &data_reader)
+/// @}
+
+SyntaxParser::SyntaxParser(
+ const TokenChain &input,
+ std::vector<std::unique_ptr<TokenChain>> &output,
+ const ProcessorCatalogue &catalogue,
+ ProcessorType default_processor,
+ const StringRepository &strings,
+ HashArrayRepository &hash_arrays,
+ const std::vector<std::string> &source_files
+)
: _output(output)
+ , _catalogue(catalogue)
, _strings(strings)
, _hash_arrays(hash_arrays)
, _source_files(source_files)
- , _data_reader(data_reader)
+ , _processor(nullptr)
, _input_reader(input)
{
_temp_hash_array.reserve(16);
@@ 422,10 58,13 @@ SyntaxParser::SyntaxParser(const TokenCh
setup_operator_precedence();
+ _processor = _catalogue.processor(default_processor);
+ _processor_stack.push_back(_processor);
+
// setup current chain pointer and index
_output_chain_index = _output.size();
- _output.push_back(TokenChain(TokenChainMainBufferSize));
- _output_chain = &_output.back();
+ _output.push_back(std::make_unique<TokenChain>(TokenChainMainBufferSize));
+ _output_chain = _output.back().get();
}
void SyntaxParser::setup_operator_precedence()
@@ 492,7 131,7 @@ const Token *SyntaxParser::parse_global_
t = skip_whitespaces(t);
if (t->type != TokenType::Symbol || is_instruction(*t)) {
std::stringstream ss;
- ss << "Expected symbol name but got " << *t;
+ ss << "Expected symbol name but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedSymbolAfterNamespaceKeyword, ss.str());
}
symbol = static_cast<const SymbolToken *>(t)->symbol_hash;
@@ 522,7 161,7 @@ const Token *SyntaxParser::parse_symbol_
bool is_symbol = t->type == TokenType::Symbol && !(global && is_instruction(*t));
if (!is_symbol) {
std::stringstream ss;
- ss << "Expected symbol name but got " << *t;
+ ss << "Expected symbol name but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedLocalOrGlobalSymbol, ss.str());
}
@@ 553,7 192,7 @@ const Token *SyntaxParser::parse_symbol_
if (t->type != TokenType::Symbol) {
std::stringstream ss;
- ss << "Expected local symbol name but got " << *t;
+ ss << "Expected local symbol name but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedLocalOrGlobalSymbol, ss.str());
}
@@ 577,7 216,7 @@ const Token *SyntaxParser::parse_symbol_
if (t->type != TokenType::Symbol) {
std::stringstream ss;
- ss << "Expected namespace or symbol after " << to_string(OperatorType::Namespace) << " but got " << *t;
+ ss << "Expected namespace or symbol after " << to_string(OperatorType::Namespace) << " but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedSymbolOrNamespaceAfterColon, ss.str());
}
}
@@ 595,14 234,14 @@ const Token *SyntaxParser::parse_symbol_
t = consume_next_token();
if (t->type != TokenType::Symbol || is_instruction(*t)) {
std::stringstream ss;
- ss << "Expected namespace or symbol after " << to_string(OperatorType::Namespace) << " but got " << *t;
+ ss << "Expected namespace or symbol after " << to_string(OperatorType::Namespace) << " but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedSymbolOrNamespaceAfterColon, ss.str());
}
// we got a symbol so we loop and see what follows
}
} else {
std::stringstream ss;
- ss << "Expected local or global symbol but got " << *t;
+ ss << "Expected local or global symbol but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedLocalOrGlobalSymbol, ss.str());
}
@@ 617,7 256,7 @@ const Token *SyntaxParser::parse_operato
return consume_next_token();
std::stringstream ss;
- ss << "Expected operator " << to_string(expected_operator) << " but got " << *t;
+ ss << "Expected operator " << to_string(expected_operator) << " but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedOperator, ss.str());
}
@@ 629,7 268,7 @@ const Token *SyntaxParser::parse_keyword
return consume_next_token();
std::stringstream ss;
- ss << "Expected keyword " << to_string(expected_keyword) << " but got " << *t;
+ ss << "Expected keyword " << to_string(expected_keyword) << " but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedKeyword, ss.str());
}
@@ 726,6 365,26 @@ const Token *SyntaxParser::parse_export(
return t;
}
+const Token *SyntaxParser::parse_processor(const Token *t)
+{
+ ProcessorType processor = t->processor;
+ SyntaxToken &token = _output_chain->reserve<SyntaxToken>();
+ token.type = SyntaxTokenType::Processor;
+ token.processor = processor;
+ token.size = sizeof(SyntaxToken);
+
+ if (processor == ProcessorType::Unspecified) {
+ assert(!_processor_stack.empty());
+ _processor_stack.pop_back();
+ _processor = _processor_stack.back();
+ } else {
+ _processor = _catalogue.processor(processor);
+ _processor_stack.push_back(_processor);
+ }
+ t = consume_next_token();
+ return t;
+}
+
void SyntaxParser::prepare_operator_stack_for_operator(OperatorType type)
{
while (!_operator_stack.empty()) {
@@ 814,14 473,14 @@ const Token *SyntaxParser::parse_value_i
if (UNLIKELY(auto_symbol)) {
bool allowed = false;
uint64_t source_auto_symbols[] = {
- hash_constant(0xcfcf631033a8ce0bULL, "loop"),
- hash_constant(0xe3346b77faff7b33ULL, "continue"),
- hash_constant(0x27de6b5e0ecaf3bdULL, "i"),
+ core::hash_constant(0xcfcf631033a8ce0bULL, "loop"),
+ core::hash_constant(0xe3346b77faff7b33ULL, "continue"),
+ core::hash_constant(0x27de6b5e0ecaf3bdULL, "i"),
};
uint64_t target_auto_symbols[] = {
- hash_constant(0xdb831a5e32f85dcfULL, "@loop"),
- hash_constant(0x232e8dde60eefef3ULL, "@continue"),
- hash_constant(0x2d8619a103210bb8ULL, "@i"),
+ core::hash_constant(0xdb831a5e32f85dcfULL, "@loop"),
+ core::hash_constant(0x232e8dde60eefef3ULL, "@continue"),
+ core::hash_constant(0x2d8619a103210bb8ULL, "@i"),
};
// find and convert names to symbol name including the @ character
for(size_t i = 0; i < sizeof(source_auto_symbols)/sizeof(source_auto_symbols[0]); ++i) {
@@ 837,9 496,9 @@ const Token *SyntaxParser::parse_value_i
throw AssemblyException(_source_files, symbol_start->source_location, AssemblyErrorCodes::UnsupportedAutoLabel, ss.str());
}
- if (name == hash_constant(0xdb831a5e32f85dcfULL, "@loop")) {
+ if (name == core::hash_constant(0xdb831a5e32f85dcfULL, "@loop")) {
mark_use_of_loop_variable();
- } else if (name == hash_constant(0x232e8dde60eefef3ULL, "@continue")) {
+ } else if (name == core::hash_constant(0x232e8dde60eefef3ULL, "@continue")) {
mark_use_of_continue_variable();
}
}
@@ 926,7 585,7 @@ const Token *SyntaxParser::parse_value_i
} else {
std::stringstream ss;
- ss << "Expected expression value but got " << *token;
+ ss << "Expected expression value but got " << _processor->token_to_string(*token);
throw AssemblyException(_source_files, token->source_location, AssemblyErrorCodes::ExpectedExpressionValue, ss.str());
}
@@ 1367,675 1026,16 @@ bool SyntaxParser::range_for_loop_follow
}
// the 'in' word
- bool in_matches = t->type == TokenType::Symbol && static_cast<const SymbolToken *>(t)->symbol_hash == hash_constant(0x6844b03e1cbc50b0ULL, "in");
+ bool in_matches = t->type == TokenType::Symbol && static_cast<const SymbolToken *>(t)->symbol_hash == core::hash_constant(0x6844b03e1cbc50b0ULL, "in");
return in_matches;
}
-#if SUPPORTS(M6502)
-
-uint16_t SyntaxParser::try_parse_addressing_mode(const Token *t)
-{
- // save read state to restore later since we are reading ahead.
- TokenChainScope rewind_scope(_input_reader);
-
- if (is_operator(t, OperatorType::Hash)) {
- // Immediate
- return 1 << static_cast<int>(AddressingModeType::Immediate);
-
- }
- if (is_operator(t, OperatorType::LeftParenthesis)) {
- // (<something>
- // possibly indirect addressing mode but could also be an expression beginning with parenthesis
- // ZeroPageAddr
- // ZeroPageIndexX
- // ZeroPageIndexY
- // AbsoluteAddr
- // AbsoluteIndexX
- // AbsoluteIndexY
- // RelativeAddr
- // IndirectAddr
- // IndirectIndexX
- // IndirectIndexY
-
- // Try to parse an expression, ignoring the first parenthesis. That makes it possible to see
- // if there is a comma within the parenthesis. This is not optimal for performance because
- // it does more than it needs to for this.
- const Token *next;
- {
- constexpr bool end_at_unmatched_right_parenthesis = true;
- constexpr bool end_at_newline = true;
- next = parse_expression(consume_next_token(), end_at_unmatched_right_parenthesis, end_at_newline);
- }
- if (is_operator(next, OperatorType::Comma)) {
- // (<expression>,
- // IndirectIndexX
-
- // verify that "x" follows
- next = skip_spaces_and_tabs(consume_next_token());
- if (next->type != TokenType::ProcessorKeyword || next->processor_keyword_index != ProcessorKeywordType::X) {
- std::stringstream ss;
- ss << "Expected x for indirect addressing mode, but got " << *next;
- throw AssemblyException(_source_files, next->source_location, AssemblyErrorCodes::InvalidIndexRegisterInAddressingMode, ss.str());
- }
- // (<expression>,x
-
- // verify that right parenthesis follows
- next = skip_spaces_and_tabs(consume_next_token());
- if (next->type != TokenType::Operator || next->operator_index != OperatorType::RightParenthesis) {
- std::stringstream ss;
- ss << "Expected closing parenthesis in indirect addressing mode, but got " << *next;
- throw AssemblyException(_source_files, next->source_location, AssemblyErrorCodes::ExpectedEndingParenthesisInIndirectAddressingMode, ss.str());
- }
- // (<expression>,x)
-
- return 1 << static_cast<int>(AddressingModeType::IndirectIndexX);
-
- }
-
- if (is_operator(next, OperatorType::RightParenthesis)) {
- // (<expression>)
- // ZeroPageAddr
- // ZeroPageIndexX
- // ZeroPageIndexY
- // AbsoluteAddr
- // AbsoluteIndexX
- // AbsoluteIndexY
- // RelativeAddr
- // IndirectAddr
- // IndirectIndexY
- next = skip_spaces_and_tabs(consume_next_token());
-
- if (next->type == TokenType::Operator && next->operator_index < OperatorType::NumOperatorFunctions) {
- // (<expression>)<operator>
-
- // go back and parse the whole thing to get to the end of the expression and check if it ends
- // with ,x or ,y. This is not optimal from a performance perspective. I could write specialized
- // code to skip an expression.
- rewind_scope.rewind();
- {
- constexpr bool end_at_unmatched_right_parenthesis = false;
- constexpr bool end_at_newline = true;
- next = parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
- }
-
- if (is_operator(next, OperatorType::Comma)) {
- // <expression>,
- // ZeroPageIndexX
- // AbsoluteIndexX
- // ZeroPageIndexY
- // AbsoluteIndexY
- next = skip_spaces_and_tabs(consume_next_token());
- if (next->type == TokenType::ProcessorKeyword && next->processor_keyword_index == ProcessorKeywordType::X) {
- // <expression>,x
- // ZeroPageIndexX
- // AbsoluteIndexX
- return 1 << static_cast<int>(AddressingModeType::ZeroPageIndexX) |
- 1 << static_cast<int>(AddressingModeType::AbsoluteIndexX);
- }
- if (next->type == TokenType::ProcessorKeyword && next->processor_keyword_index == ProcessorKeywordType::Y) {
- // <expression>,y
- // ZeroPageIndexY
- // AbsoluteIndexY
- return 1 << static_cast<int>(AddressingModeType::ZeroPageIndexY) |
- 1 << static_cast<int>(AddressingModeType::AbsoluteIndexY);
- }
- // the index register is invalid
- std::stringstream ss;
- ss << "Invalid index register in addressing mode. Expected x or y but got " << *next;
- throw AssemblyException(_source_files, next->source_location, AssemblyErrorCodes::InvalidIndexRegisterInAddressingMode, ss.str());
- }
- // (<expression>)<operator><expression>
- // ZeroPageAddr
- // AbsoluteAddr
- // RelativeAddr
- return 1 << static_cast<int>(AddressingModeType::ZeroPageAddr) |
- 1 << static_cast<int>(AddressingModeType::AbsoluteAddr) |
- 1 << static_cast<int>(AddressingModeType::RelativeAddr);
- }
- if (is_operator(next, OperatorType::Comma)) {
- // (<expression>),
- next = skip_spaces_and_tabs(consume_next_token());
- if (next->type == TokenType::ProcessorKeyword && next->processor_keyword_index == ProcessorKeywordType::Y) {
- // (<expression>),y
- // IndirectIndexY
- return 1 << static_cast<int>(AddressingModeType::IndirectIndexY);
- }
- // the index register is invalid
- std::stringstream ss;
- ss << "Invalid index register in addressing mode. Expected y but got " << *next;
- throw AssemblyException(_source_files, next->source_location, AssemblyErrorCodes::InvalidIndexRegisterInAddressingMode, ss.str());
- }
- // (<expression>)
- // IndirectAddr
- return 1 << static_cast<int>(AddressingModeType::IndirectAddr);
-
- }
-
- // no matching parenthesis was found
- std::stringstream ss;
- ss << "Unmatched left parenthesis in expression";
- throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::UnmatchedLeftParenthesis, ss.str());
-
- }
-
- if (t->type == TokenType::Newline || is_operator(t, OperatorType::Semicolon)) {
- // Implied
- return 1 << static_cast<int>(AddressingModeType::Implied);
-
- }
-
- // ZeroPageAddr
- // ZeroPageIndexX
- // ZeroPageIndexY
- // AbsoluteAddr
- // AbsoluteIndexX
- // AbsoluteIndexY
- // RelativeAddr
- const Token *next;
- {
- constexpr bool end_at_unmatched_right_parenthesis = false;
- constexpr bool end_at_newline = true;
- next = parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
- }
-
- if (is_operator(next, OperatorType::Comma)) {
- // <expression>,
- // ZeroPageIndexX
- // AbsoluteIndexX
- // ZeroPageIndexY
- // AbsoluteIndexY
- next = skip_spaces_and_tabs(consume_next_token());
- if (next->type == TokenType::ProcessorKeyword && next->processor_keyword_index == ProcessorKeywordType::X) {
- // <expression>,x
- // ZeroPageIndexX
- // AbsoluteIndexX
- return 1 << static_cast<int>(AddressingModeType::ZeroPageIndexX) |
- 1 << static_cast<int>(AddressingModeType::AbsoluteIndexX);
- }
- if (next->type == TokenType::ProcessorKeyword && next->processor_keyword_index == ProcessorKeywordType::Y) {
- // <expression>,y
- // ZeroPageIndexY
- // AbsoluteIndexY
- return 1 << static_cast<int>(AddressingModeType::ZeroPageIndexY) |
- 1 << static_cast<int>(AddressingModeType::AbsoluteIndexY);
- }
- // the index register is invalid
- std::stringstream ss;
- ss << "Invalid index register in addressing mode. Expected x or y but got " << *next;
- throw AssemblyException(_source_files, next->source_location, AssemblyErrorCodes::InvalidIndexRegisterInAddressingMode, ss.str());
- }
- // <expression>
- // ZeroPageAddr
- // AbsoluteAddr
- // RelativeAddr
- return 1 << static_cast<int>(AddressingModeType::ZeroPageAddr) |
- 1 << static_cast<int>(AddressingModeType::AbsoluteAddr) |
- 1 << static_cast<int>(AddressingModeType::RelativeAddr);
-}
-
-void SyntaxParser::print_addressing_modes(std::stringstream &ss, uint16_t addressing_mode_mask)
-{
- for (int i = 0; i < static_cast<int>(AddressingModeType::NumAddressingModes); ++i) {
- if (((1 << i) & addressing_mode_mask) != 0)
- ss << "\n " << to_string(static_cast<AddressingModeType>(i));
- }
-}
-
-const Token *SyntaxParser::parse_instruction(const Token *t, InstructionType instruction)
-{
- const Token *begin_token = t;
-
- t = consume_next_token(); // instruction token
- t = skip_spaces_and_tabs(t);
-
- // check for optional label to define at the instruction argument address
- bool has_label_definition = false;
- bool global_label = false;
- uint64_t symbol_hash_label = 0;
- const Token *label_token = t;
- if (label_definition_follows(t)) {
- has_label_definition = true;
-
- t = parse_symbol_definition(t, global_label, symbol_hash_label);
- t = consume_next_token(); // the colon
- t = skip_spaces_and_tabs(t);
- }
-
- // determine possible used addressing modes and compare with existing
- uint16_t parsed_addressing_modes = try_parse_addressing_mode(t);
- uint16_t possible_addressing_modes = addressing_modes(instruction);
- uint16_t selected_addressing_modes = parsed_addressing_modes & possible_addressing_modes;
- if (selected_addressing_modes == 0) {
- std::stringstream ss;
- ss << "Invalid addressing mode used. Code indicates one of the following:";
- print_addressing_modes(ss, parsed_addressing_modes);
- ss << "\nbut possible addressing modes for " << to_string(instruction) << " are:";
- print_addressing_modes(ss, possible_addressing_modes);
- throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::InvalidAddressingMode, ss.str());
- }
-
- // store the instruction with addressing mode mask in the output
- InstructionToken &instruction_token = _output_chain->reserve<InstructionToken>();
- instruction_token.type = SyntaxTokenType::Instruction;
- instruction_token.size = sizeof(InstructionToken);
- instruction_token.instruction = instruction;
- instruction_token.addressing_modes = selected_addressing_modes;
- instruction_token.source_location = begin_token->source_location;
- instruction_token.has_instruction_data_label = has_label_definition;
- instruction_token.global_data_label = global_label;
- instruction_token.data_label_symbol_hash = symbol_hash_label;
- instruction_token.address_label_location = label_token->source_location;
-
- // now we should be able to parse the operand of the instruction
- if (selected_addressing_modes == Imp) {
- if (has_label_definition) {
- std::stringstream ss;
- ss << "Implied addressing modes cannot have label to instruction data. Add a newline or a semicolon before the label to resolve this.";
- throw AssemblyException(_source_files, label_token->source_location, AssemblyErrorCodes::AddressingModeCannotHaveDataLabel, ss.str());
- }
- return t;
- }
- if (selected_addressing_modes == Imm) {
- assert(is_operator(t, OperatorType::Hash));
- constexpr bool end_at_unmatched_parenthesis = false;
- constexpr bool end_at_newline = true;
- return parse_and_output_expression(consume_next_token(), end_at_unmatched_parenthesis, end_at_newline);
- }
- if ((selected_addressing_modes & (Ind | IndX | IndY)) != 0) {
- // an indirect mode
- if ((selected_addressing_modes & IndX) != 0) {
- // skip parenthesis, parse address expression, skip comma and x
- assert(is_operator(t, OperatorType::LeftParenthesis));
- constexpr bool end_at_unmatched_parenthesis = false;
- constexpr bool end_at_newline = true;
- t = parse_and_output_expression(consume_next_token(), end_at_unmatched_parenthesis, end_at_newline);
- assert(is_operator(t, OperatorType::Comma));
- t = skip_spaces_and_tabs(consume_next_token());
- assert(t->type == TokenType::ProcessorKeyword && t->processor_keyword_index == ProcessorKeywordType::X);
- t = skip_spaces_and_tabs(consume_next_token());
- assert(is_operator(t, OperatorType::RightParenthesis));
- return consume_next_token();
- }
- // parse address expression
- constexpr bool end_at_unmatched_parenthesis = false;
- constexpr bool end_at_newline = true;
- t = parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
-
- if ((selected_addressing_modes & IndY) != 0) {
- // skip comma and y
- assert(is_operator(t, OperatorType::Comma));
- t = skip_spaces_and_tabs(consume_next_token());
- assert(t->type == TokenType::ProcessorKeyword && t->processor_keyword_index == ProcessorKeywordType::Y);
- t = skip_spaces_and_tabs(consume_next_token());
- }
- return t;
- }
-
- // parse address expression
- constexpr bool end_at_unmatched_parenthesis = false;
- constexpr bool end_at_newline = true;
- t = parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
-
- if ((selected_addressing_modes & (Zpx | AbsX)) != 0) {
- // skip comma and x
- assert(is_operator(t, OperatorType::Comma));
- t = skip_spaces_and_tabs(consume_next_token());
- assert(t->type == TokenType::ProcessorKeyword && t->processor_keyword_index == ProcessorKeywordType::X);
- t = skip_spaces_and_tabs(consume_next_token());
- }
- if ((selected_addressing_modes & (Zpy | AbsY)) != 0) {
- // skip comma and y
- assert(is_operator(t, OperatorType::Comma));
- t = skip_spaces_and_tabs(consume_next_token());
- assert(t->type == TokenType::ProcessorKeyword && t->processor_keyword_index == ProcessorKeywordType::Y);
- t = skip_spaces_and_tabs(consume_next_token());
- }
-
- return t;
-}
-
-#endif // SUPPORTS(M6502)
-
-#if SUPPORTS(Z80)
-
-InstructionArgumentType SyntaxParser::try_parse_addressing_mode(const Token *&t, InstructionType instruction, bool &has_label, bool &global_label, SourceLocation &label_location, uint64_t &label_hash)
-{
- InstructionArgumentType a = InstructionArgumentType::None;
-
- t = skip_spaces_and_tabs(t);
-
- // check for optional label to define at the instruction argument address
- has_label = false;
- global_label = false;
- label_hash = 0;
- if (UNLIKELY(label_definition_follows(t))) {
- has_label = true;
- label_location = t->source_location;
- t = parse_symbol_definition(t, global_label, label_hash);
- t = consume_next_token(); // the colon
- t = skip_spaces_and_tabs(t);
- }
-
- // save read state to restore later since we are reading ahead.
- auto start_position = _input_reader.position_value();
- const Token *start_token = t;
-
- if (t->type == TokenType::ProcessorKeyword) {
- // can only be that keyword and nothing else
- a = keyword_to_instruction_argument(instruction, t->processor_keyword_index);
- t = consume_next_token();
- } else if (is_operator(t, OperatorType::LeftParenthesis)) {
- // could be
- // (<register>)
- // (<register>+/-<expression>)
- // (<expression>)
- // (<expression>)<operator><expression>
- t = consume_next_token(); // left parenthesis
- t = skip_spaces_and_tabs(t);
- if (t->type == TokenType::ProcessorKeyword) {
- ProcessorKeywordType keyword = t->processor_keyword_index;
- if (is_valid_indirect_keyword(keyword)) {
- // could be
- // (BC), (DE), (HL), (SP), (C), (IX), (IY), (IX+d), (IY+d)
- t = consume_next_token(); // register
- if (can_have_indexed_offset(keyword)) {
- // could be
- // (IX), (IY), (IX+d), (IY+d)
- t = skip_spaces_and_tabs(t);
- if (is_operator(t, OperatorType::RightParenthesis)) {
- // could be
- // (IX), (IY)
- a = keyword_to_indirect_instruction_argument(keyword);
-
- } else if (is_operator(t, OperatorType::Plus) || is_operator(t, OperatorType::Minus)) {
- // could be
- // (IX+d), (IY+d)
- a = keyword_to_indexed_instruction_argument(keyword);
-
- // skip past the index expression
- constexpr bool end_at_unmatched_right_parenthesis = true;
- constexpr bool end_at_newline = true;
- t = parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
- }
- } else {
- // could be
- // (BC), (DE), (HL), (SP), (C)
- a = keyword_to_indirect_instruction_argument(keyword);
- }
- } else {
- // invalid register or keyword
- std::stringstream ss;
- ss << to_string(keyword) << " cannot be used for indirect addessing.";
- throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::KeywordCannotBeUsedForIndirectAddressing, ss.str());
- }
- t = skip_spaces_and_tabs(t);
- t = parse_operator(t, OperatorType::RightParenthesis);
-
- } else {
- // could be
- // (<expression>)
- // (<expression>)<operator><expression>
-
- {
- constexpr bool end_at_unmatched_right_parenthesis = true;
- constexpr bool end_at_newline = true;
- t = parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
- }
-
- t = parse_operator(t, OperatorType::RightParenthesis);
- t = skip_spaces_and_tabs(t);
-
- if (t->type == TokenType::Operator && t->operator_index < OperatorType::NumOperatorFunctions) {
- // could be
- // (<expression>)<operator><expression>
- a = InstructionArgumentType::Number;
-
- // Rewind and reparse the expression fully, otherwise we may not get calls or indexing right
- // if first part is generating an object that is processed in the later part.
- t = start_token;
- _input_reader.set_position_value(start_position);
-
- constexpr bool end_at_unmatched_right_parenthesis = false;
- constexpr bool end_at_newline = true;
- t = parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
-
- } else {
- // could be
- // (<expression>)
- a = InstructionArgumentType::IndirectAddress;
- }
-
- }
- } else if (t->type == TokenType::Newline) {
- // none
- } else {
- // could be
- // <expression>
- a = InstructionArgumentType::Number;
-
- constexpr bool end_at_unmatched_right_parenthesis = false;
- constexpr bool end_at_newline = true;
- t = parse_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
- }
-
- t = skip_spaces_and_tabs(t);
-
- return a;
-}
-
-AddressingModeArguments SyntaxParser::try_parse_addressing_modes(const Token *t, InstructionType instruction, bool has_label[2], bool global_label[2], SourceLocation label_location[2], uint64_t label_hash[2])
-{
- // save read state to restore later since we are reading ahead.
- TokenChainScope rewind_scope(_input_reader);
-
- InstructionArgumentType arg1 = try_parse_addressing_mode(t, instruction, has_label[0], global_label[0], label_location[0], label_hash[0]);
- InstructionArgumentType arg2 = InstructionArgumentType::None;
- if (arg1 != InstructionArgumentType::None && is_operator(t, OperatorType::Comma)) {
- t = consume_next_token(); // the comma
- arg2 = try_parse_addressing_mode(t, instruction, has_label[1], global_label[1], label_location[1], label_hash[1]);
- }
- return AddressingModeArguments{arg1, arg2};
-}
-
-bool find_addressing_mode(AddressingModeArguments args, const core::StaticArray<AddressingMode> &available, size_t &index)
-{
- auto it = std::find_if(available.begin(), available.end(), [args](auto mode){ return args == mode.simplified; });
- if (it == available.end()) {
- index = 0;
- return false;
- }
- index = static_cast<size_t>(it - available.begin());
- return true;
-}
-
-void print_addressing_mode(std::stringstream &ss, InstructionType instruction, AddressingModeArguments args)
-{
- ss << to_string(instruction);
- if (args.arg1 != InstructionArgumentType::None) {
- ss << ' ' << to_string(args.arg1);
- if (args.arg2 != InstructionArgumentType::None) {
- ss << ", " << to_string(args.arg2);
- }
- }
- ss << '\n';
-}
-
-const Token *SyntaxParser::parse_and_output_instruction_argument(const Token *t, InstructionArgumentType arg, bool argument_has_label)
-{
- if (UNLIKELY(argument_has_label)) {
- // just skip the label definition
- bool global = false;
- uint64_t symbol = 0;
- t = parse_symbol_definition(t, global, symbol);
- t = parse_operator(t, OperatorType::Colon);
- }
-
- switch (arg)
- {
- case InstructionArgumentType::None:
- break;
-
- case InstructionArgumentType::Number: // this is the generic "don't know the type yet"
- case InstructionArgumentType::ByteValue:
- case InstructionArgumentType::WordValue:
- case InstructionArgumentType::RelativeAddress:
- case InstructionArgumentType::Bit:
- case InstructionArgumentType::PageZeroAddressing:
- case InstructionArgumentType::InterruptNumber:
- {
- constexpr bool end_at_unmatched_parenthesis = false;
- constexpr bool end_at_newline = true;
- t = parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
-
- break;
- }
-
- case InstructionArgumentType::IndirectAddress:
- case InstructionArgumentType::IndirectByteValue:
- case InstructionArgumentType::IndirectWordValue:
- {
- t = skip_spaces_and_tabs(t);
- t = parse_operator(t, OperatorType::LeftParenthesis);
-
- constexpr bool end_at_unmatched_parenthesis = true;
- constexpr bool end_at_newline = true;
- t = parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
-
- t = parse_operator(t, OperatorType::RightParenthesis);
- break;
- }
-
- case InstructionArgumentType::IndirectRegisterC:
- case InstructionArgumentType::IndirectRegisterBC:
- case InstructionArgumentType::IndirectRegisterDE:
- case InstructionArgumentType::IndirectRegisterHL:
- case InstructionArgumentType::IndirectRegisterIX:
- case InstructionArgumentType::IndirectRegisterIY:
- case InstructionArgumentType::IndirectRegisterSP:
- {
- t = skip_spaces_and_tabs(t);
- t = parse_operator(t, OperatorType::LeftParenthesis);
-
- t = skip_spaces_and_tabs(t);
- assert(t->type == TokenType::ProcessorKeyword);
- t = consume_next_token(); // processor keyword
-
- t = skip_spaces_and_tabs(t);
- t = parse_operator(t, OperatorType::RightParenthesis);
- break;
- }
-
- case InstructionArgumentType::IndirectIndexedRegisterIX:
- case InstructionArgumentType::IndirectIndexedRegisterIY:
- {
- t = skip_spaces_and_tabs(t);
- t = parse_operator(t, OperatorType::LeftParenthesis);
-
- t = skip_spaces_and_tabs(t);
- assert(t->type == TokenType::ProcessorKeyword);
- t = consume_next_token(); // processor keyword
-
- constexpr bool end_at_unmatched_parenthesis = true;
- constexpr bool end_at_newline = true;
- t = parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
-
- t = parse_operator(t, OperatorType::RightParenthesis);
- break;
- }
-
- case InstructionArgumentType::RegisterA:
- case InstructionArgumentType::RegisterB:
- case InstructionArgumentType::RegisterC:
- case InstructionArgumentType::RegisterD:
- case InstructionArgumentType::RegisterE:
- case InstructionArgumentType::RegisterH:
- case InstructionArgumentType::RegisterL:
- case InstructionArgumentType::RegisterI:
- case InstructionArgumentType::RegisterR:
- case InstructionArgumentType::RegisterAF:
- case InstructionArgumentType::RegisterBC:
- case InstructionArgumentType::RegisterDE:
- case InstructionArgumentType::RegisterHL:
- case InstructionArgumentType::RegisterIX:
- case InstructionArgumentType::RegisterIY:
- case InstructionArgumentType::RegisterSP:
- case InstructionArgumentType::RegisterAFPrim:
- case InstructionArgumentType::ConditionC:
- case InstructionArgumentType::ConditionM:
- case InstructionArgumentType::ConditionNC:
- case InstructionArgumentType::ConditionNZ:
- case InstructionArgumentType::ConditionP:
- case InstructionArgumentType::ConditionPE:
- case InstructionArgumentType::ConditionPO:
- case InstructionArgumentType::ConditionZ:
- {
- t = skip_spaces_and_tabs(t);
- assert(t->type == TokenType::ProcessorKeyword);
- t = consume_next_token(); // processor keyword
- break;
- }
-
- case InstructionArgumentType::NumTypes:
- assert(false);
- throw AssemblyException("Internal error");
- };
- return t;
-}
-
-const Token *SyntaxParser::parse_instruction(const Token *t, InstructionType instruction)
-{
- const Token *begin_token = t;
-
- t = consume_next_token(); // instruction token
- t = skip_spaces_and_tabs(t);
-
- InstructionToken &instruction_token = _output_chain->reserve<InstructionToken>();
- instruction_token.type = SyntaxTokenType::Instruction;
- instruction_token.size = sizeof(InstructionToken);
- instruction_token.instruction = instruction;
-
- // determine used addressing mode
- AddressingModeArguments addressing_mode = try_parse_addressing_modes(t, instruction, instruction_token.has_instruction_data_label, instruction_token.global_data_label, instruction_token.address_label_location, instruction_token.data_label_symbol_hash);
-
- // search through the array of possible addressing modes for the instruction
- const core::StaticArray<AddressingMode> &available_modes = addressing_modes(instruction);
- size_t found_index;
- if (UNLIKELY(!find_addressing_mode(addressing_mode, available_modes, found_index))) {
- std::stringstream ss;
- ss << "Invalid addressing mode used. Code indicates:\n";
- print_addressing_mode(ss, instruction, addressing_mode);
- if (available_modes.size() <= max_addressing_mode_printout_lines) {
- ss << "but possible addressing modes are:\n";
- for(auto mode : available_modes) {
- print_addressing_mode(ss, instruction, mode.detailed);
- }
- }
- throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::InvalidAddressingMode, ss.str());
- }
-
- // store the instruction with addressing mode mask in the output
- instruction_token.addressing_mode_index = static_cast<uint8_t>(found_index);
- instruction_token.source_location = begin_token->source_location;
-
- // now we need to parse the expressions required by the arguments
- t = parse_and_output_instruction_argument(t, addressing_mode.arg1, instruction_token.has_instruction_data_label[0]);
- if (addressing_mode.arg2 != InstructionArgumentType::None) {
- t = skip_spaces_and_tabs(t);
- t = parse_operator(t, OperatorType::Comma);
- t = parse_and_output_instruction_argument(t, addressing_mode.arg2, instruction_token.has_instruction_data_label[1]);
- }
-
- return t;
-}
-
-#endif // SUPPORTS
-
const Token *SyntaxParser::parse_using_statement(const Token *t)
{
t = skip_whitespaces(t);
if (t->type != TokenType::Keyword || t->keyword_index != KeywordType::Namespace) {
std::stringstream ss;
- ss << "Expected namespace keyword but got " << *t;
+ ss << "Expected namespace keyword but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedNamespaceKeyword, ss.str());
}
@@ 2046,7 1046,7 @@ const Token *SyntaxParser::parse_using_s
bool is_symbol = t->type == TokenType::Symbol && !is_instruction(*t);
if (!is_symbol && !is_operator(t, OperatorType::Namespace)) {
std::stringstream ss;
- ss << "Namespace expected in using statement, but got " << *t;
+ ss << "Namespace expected in using statement, but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, symbol_start->source_location, AssemblyErrorCodes::ExpectedNamespaceInUsingStatement, ss.str());
}
@@ 2165,18 1165,18 @@ const Token *SyntaxParser::parse_section
SectionType section_type = SectionType::None;
bool is_symbol = t->type == TokenType::Symbol;
bool section_part = false;
- if (is_symbol && static_cast<const SymbolToken *>(t)->symbol_hash == hash_constant(0xe8443b5d4326e23aULL, "code")) {
+ if (is_symbol && static_cast<const SymbolToken *>(t)->symbol_hash == core::hash_constant(0xe8443b5d4326e23aULL, "code")) {
section_type = SectionType::Code;
- } else if (is_symbol && static_cast<const SymbolToken *>(t)->symbol_hash == hash_constant(0x1a160cdedb256990ULL, "bss")) {
+ } else if (is_symbol && static_cast<const SymbolToken *>(t)->symbol_hash == core::hash_constant(0x1a160cdedb256990ULL, "bss")) {
section_type = SectionType::Bss;
- } else if (is_symbol && static_cast<const SymbolToken *>(t)->symbol_hash == hash_constant(0x839745e36a8ea16cULL, "part")) {
+ } else if (is_symbol && static_cast<const SymbolToken *>(t)->symbol_hash == core::hash_constant(0x839745e36a8ea16cULL, "part")) {
section_part = true;
- } else if (is_symbol && static_cast<const SymbolToken *>(t)->symbol_hash == hash_constant(0x4def7bac3187bbd3ULL, "mapping")) {
+ } else if (is_symbol && static_cast<const SymbolToken *>(t)->symbol_hash == core::hash_constant(0x4def7bac3187bbd3ULL, "mapping")) {
t = consume_next_token();
return parse_section_mapping(t);
} else {
std::stringstream ss;
- ss << "Expected code, bss, part or mapping keyword, but got " << *t;
+ ss << "Expected code, bss, part or mapping keyword, but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedCodeOrBssAfterSection, ss.str());
}
@@ 2187,13 1187,13 @@ const Token *SyntaxParser::parse_section
if (t->type != TokenType::String) {
std::stringstream ss;
- ss << "Expected section name as a quoted string, but got " << *t;
+ ss << "Expected section name as a quoted string, but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedSectionNameAsAQuotedString, ss.str());
}
uint64_t name_hash = static_cast<const StringToken *>(t)->value;
// detect empty section names
- if (name_hash == hash_constant(0, "")) {
+ if (name_hash == core::hash_constant(0, "")) {
std::stringstream ss;
ss << "Empty section name";
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::EmptySectionName, ss.str());
@@ 2245,10 1245,10 @@ const Token *SyntaxParser::parse_section
OptimizerType SyntaxParser::string_to_optimizer_type(const StringToken *t)
{
static uint64_t optimizer_strings[] = {
- hash_constant(0xa13435ad5787c1f6ULL, "zero compare"),
- hash_constant(0x768c31d8545cb539ULL, "tail recursion"),
- hash_constant(0x9f67cc59c2c44a7aULL, "double load"),
- hash_constant(0x8a155feb2a9d2ddeULL, "unused function"),
+ core::hash_constant(0xa13435ad5787c1f6ULL, "zero compare"),
+ core::hash_constant(0x768c31d8545cb539ULL, "tail recursion"),
+ core::hash_constant(0x9f67cc59c2c44a7aULL, "double load"),
+ core::hash_constant(0x8a155feb2a9d2ddeULL, "unused function"),
};
static_assert(sizeof(optimizer_strings) / sizeof(uint64_t) == static_cast<int>(OptimizerType::NumTypes), "Number of optimizer modes doesn't match number of strings");
@@ 2267,8 1267,8 @@ const Token *SyntaxParser::parse_optimiz
t = skip_whitespaces(t);
// check for complete on or off first
- uint64_t on_symbol = hash_constant(0x2dc975ff398a1ab9ULL, "on");
- uint64_t off_symbol = hash_constant(0x674313aceb22ee1dULL, "off");
+ uint64_t on_symbol = core::hash_constant(0x2dc975ff398a1ab9ULL, "on");
+ uint64_t off_symbol = core::hash_constant(0x674313aceb22ee1dULL, "off");
if (t->type == TokenType::Symbol) {
uint64_t symbol_hash = static_cast<const SymbolToken *>(t)->symbol_hash;
if (symbol_hash == on_symbol || symbol_hash == off_symbol) {
@@ 2285,7 1285,7 @@ const Token *SyntaxParser::parse_optimiz
// not global on or off, so we are looking for an optimizer string now
if (t->type != TokenType::String) {
std::stringstream ss;
- ss << "Expected optimizer string or on/off keyword but got " << *t;
+ ss << "Expected optimizer string or on/off keyword but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedOptimizerString, ss.str());
}
@@ 2304,7 1304,7 @@ const Token *SyntaxParser::parse_optimiz
optimizer_token.optimizer_type = optimizer_type;
} else {
std::stringstream ss;
- ss << "Expected on or off keyword but got " << *t;
+ ss << "Expected on or off keyword but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedOnOrOffOptimizerKeyword, ss.str());
}
@@ 2371,13 1371,13 @@ const Token *SyntaxParser::parse_macro_s
const Token *symbol_token = t;
if (t->type != TokenType::Operator || t->operator_index != OperatorType::Period) {
std::stringstream ss;
- ss << "Expected macro argument local symbol but got " << *t;
+ ss << "Expected macro argument local symbol but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedMacroArgumentLocalSymbol, ss.str());
}
t = consume_next_token();
if (t->type != TokenType::Symbol) {
std::stringstream ss;
- ss << "Expected macro argument local symbol but got a period and " << *t;
+ ss << "Expected macro argument local symbol but got a period and " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedMacroArgumentLocalSymbol, ss.str());
}
// Linear search amongst the other arguments.
@@ 2413,8 1413,8 @@ const Token *SyntaxParser::parse_macro_s
ChainIndexScope cis(*this); // make sure the chain index is restored afterwards
_output_chain_index = _output.size();
- _output.push_back(TokenChain(TokenChainMacroBufferSize));
- _output_chain = &_output[_output_chain_index];
+ _output.push_back(std::make_unique<TokenChain>(TokenChainMacroBufferSize));
+ _output_chain = _output[_output_chain_index].get();
MacroDefinitionToken &def_token = _output_chain->reserve<MacroDefinitionToken>();
def_token.type = SyntaxTokenType::MacroDef;
@@ 2525,7 1525,7 @@ const Token *SyntaxParser::parse_type_re
} else {
std::stringstream ss;
- ss << "Expected type but got " << *t;
+ ss << "Expected type but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedTypeInStruct, ss.str());
}
@@ 2590,7 1590,7 @@ const Token *SyntaxParser::parse_struct_
if (t->type != TokenType::Symbol) {
std::stringstream ss;
- ss << "Expected struct member name but got " << *t;
+ ss << "Expected struct member name but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedStructMemberName, ss.str());
}
@@ 2678,6 1678,15 @@ const Token *SyntaxParser::parse_define_
constexpr bool end_at_newline = false;
t = parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
+ define_token.end_source_location = t->source_location;
+ // attempt to adjust to avoid including next row because the next token is most
+ // probably on a new row
+ if (define_token.source_location.file_index == define_token.end_source_location.file_index &&
+ define_token.source_location.row < define_token.end_source_location.row)
+ {
+ --define_token.end_source_location.row;
+ }
+
// increase the current group size with this element
if (in_group)
++_temp_pointer_array.back()->num_elements;
@@ 2706,6 1715,9 @@ const Token *SyntaxParser::parse_define_
// flag a repeating pattern for this group since it ends with ellipsis
_temp_pointer_array.back()->repeated_pattern = true;
+ t = skip_whitespaces(t);
+ define_token.end_source_location = t->source_location;
+
// after an ellipsis the group must be ended
t = parse_operator(t, OperatorType::RightCurly);
_temp_pointer_array.pop_back();
@@ 2717,6 1729,9 @@ const Token *SyntaxParser::parse_define_
// no comma, so if the depth is non-zero we must get a closing bracket
if (!_temp_pointer_array.empty()) {
+ t = skip_whitespaces(t);
+ define_token.end_source_location = t->source_location;
+
t = parse_operator(t, OperatorType::RightCurly);
_temp_pointer_array.pop_back();
@@ 2898,7 1913,7 @@ const Token *SyntaxParser::parse_range_f
loop_begin_token.key_location.clear();
}
- assert(t->type == TokenType::Symbol && static_cast<const SymbolToken *>(t)->symbol_hash == hash_constant(0x6844b03e1cbc50b0ULL, "in"));
+ assert(t->type == TokenType::Symbol && static_cast<const SymbolToken *>(t)->symbol_hash == core::hash_constant(0x6844b03e1cbc50b0ULL, "in"));
t = consume_next_token(); // 'in'
constexpr bool end_at_unmatched_right_parenthesis = true;
@@ 3072,19 2087,16 @@ const Token *SyntaxParser::parse_incbin(
t = consume_next_token();
t = skip_whitespaces(t);
- // tokenizer already checked that a string follows
- assert(t->type == TokenType::String);
- const StringToken *string_token = static_cast<const StringToken *>(t);
-
- // redo the queuing to get the handle again (no harm)
- output_token.load_handle = _data_reader.queue_load(_strings.get(string_token->value));
- output_token.source_location = string_token->source_location;
+ output_token.source_location = t->source_location;
output_token.has_start_offset = false;
output_token.has_max_size = false;
- t = consume_next_token(); // the string
+ constexpr bool end_at_unmatched_parenthesis = false;
+ constexpr bool end_at_newline = false;
+ t = parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
t = skip_whitespaces(t);
+
if (!is_operator(t, OperatorType::Comma))
return t;
@@ 3092,8 2104,6 @@ const Token *SyntaxParser::parse_incbin(
// parse start offset
output_token.has_start_offset = true;
- constexpr bool end_at_unmatched_parenthesis = false;
- constexpr bool end_at_newline = false;
t = parse_and_output_expression(t, end_at_unmatched_parenthesis, end_at_newline);
t = skip_whitespaces(t);
@@ 3109,6 2119,32 @@ const Token *SyntaxParser::parse_incbin(
return t;
}
+const Token *SyntaxParser::parse_include(const Token *t)
+{
+ if (_state.macro_depth > 0) {
+ std::stringstream ss;
+ ss << "Includes are not allowed within macros";
+ throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::IncludeNotAllowedInMacros, ss.str());
+ }
+ if (_state.subroutine_depth > 0) {
+ std::stringstream ss;
+ ss << "Includes are not allowed withing subroutines.";
+ throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::IncludeNotAllowedInSubroutine, ss.str());
+ }
+
+ SyntaxToken &output_token = _output_chain->reserve<SyntaxToken>();
+ output_token.type = SyntaxTokenType::Include;
+ output_token.size = sizeof(SyntaxToken);
+
+ t = consume_next_token();
+
+ constexpr bool end_at_unmatched_right_parenthesis = false;
+ constexpr bool end_at_newline = false;
+ t = parse_and_output_expression(t, end_at_unmatched_right_parenthesis, end_at_newline);
+
+ return t;
+}
+
#if defined(_DEBUG)
const Token *SyntaxParser::parse_debug(const Token *t)
{
@@ 3208,15 2244,22 @@ const Token *SyntaxParser::parse_keyword
t = parse_incbin(t);
break;
+ case KeywordType::Include:
+ t = parse_include(t);
+ break;
+
+ case KeywordType::Processor:
+ t = parse_processor(t);
+ break;
+
case KeywordType::Else:
case KeywordType::Elif:
- case KeywordType::Include:
case KeywordType::Address:
case KeywordType::Dynamic:
case KeywordType::Import:
{
std::stringstream ss;
- ss << "Unexpected " << *t;
+ ss << "Unexpected " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::UnexpectedKeyword, ss.str());
}
@@ 3225,7 2268,7 @@ const Token *SyntaxParser::parse_keyword
case KeywordType::NumTypes:
{
std::stringstream ss;
- ss << "Keyword " << *t << " not implemented";
+ ss << "Keyword " << _processor->token_to_string(*t) << " not implemented";
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::NotImplemented, ss.str());
}
@@ 3250,7 2293,7 @@ const Token *SyntaxParser::parse_name_de
if (!is_operator(t, OperatorType::Period)) {
std::stringstream ss;
- ss << "Expected local symbol name but got " << *t;
+ ss << "Expected local symbol name but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedLocalSymbolName, ss.str());
}
@@ 3381,7 2424,7 @@ const Token *SyntaxParser::parse_label_o
symbol = consume_next_token();
if (symbol->type != TokenType::Symbol) {
std::stringstream ss;
- ss << "Local symbol expected, but got a punction mark followed by " << *symbol;
+ ss << "Local symbol expected, but got a punction mark followed by " << _processor->token_to_string(*symbol);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::LocalSymbolExpected, ss.str());
}
}
@@ 3413,9 2456,9 @@ const Token *SyntaxParser::parse_inner_s
case TokenType::Symbol:
{
- InstructionType instruction = InstructionType::NumTypes;
+ uint8_t instruction = invalid_instruction;
if (is_instruction(*t, instruction)) {
- t = parse_instruction(t, instruction);
+ t = _processor->parse_instruction(*this, _source_files, t, instruction);
} else {
t = parse_label_or_statement(t);
}
@@ 3472,6 2515,10 @@ const Token *SyntaxParser::parse_inner_s
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::UnexpectedProcessorKeyword, ss.str());
}
+ case TokenType::Processor:
+ t = parse_processor(t);
+ break;
+
case TokenType::Whitespace:
case TokenType::Newline:
// this should never happen since whitespace has already been skipped
@@ 3539,7 2586,7 @@ void SyntaxParser::parse()
if (t->type != TokenType::End) {
std::stringstream ss;
- ss << "Expected end of file but got " << *t;
+ ss << "Expected end of file but got " << _processor->token_to_string(*t);
throw AssemblyException(_source_files, t->source_location, AssemblyErrorCodes::ExpectedEndOfFile, ss.str());
}
@@ 3549,17 2596,4 @@ void SyntaxParser::parse()
end_token.size = sizeof(SyntaxToken);
}
-std::vector<TokenChain> parse_syntax(const TokenChain &input, const StringRepository &strings, HashArrayRepository &hash_arrays, const std::vector<std::string> &source_files, DataReader &data_reader)
-{
- TimerScope timer("Syntax parser");
-
- std::vector<TokenChain> output;
- output.reserve(64);
- SyntaxParser parser(input, output, strings, hash_arrays, source_files, data_reader);
- parser.parse();
- return output;
-}
-
-/// @}
-
} // namespace jasm
M jasm/parsing/syntax_parser.h => jasm/syntax/syntax_parser.h +291 -10
@@ 1,24 1,305 @@
#pragma once
-#include <parsing/syntax_tokens.h>
+#include <syntax/operators.h>
+#include <syntax/syntax_state.h>
+#include <syntax/syntax_tokens.h>
+#include <tokenize/operators.h>
+#include <tokenize/tokens.h>
+#include <utility/token_chain_scope.h>
namespace jasm
{
-class DataReader;
+class HashArrayRepository;
+class ProcessorCatalogue;
class StringRepository;
-class HashArrayRepository;
/// @addtogroup syntax
/// @{
-/// Take the tokenized data and make verify the syntax. Produce a new token chain suitable for assembly.
-/// @param input A chain of tokenized tokens.
-/// @param strings A reverse lookup from string hashes to strings.
-/// @param hash_arrays A repository for hash arrays to make it possible to store references in constant size memory.
-/// @param source_files An array of files used to lookup source file name when printing error messages.
-/// @param data_reader An asynchronous data reader for incbin files.
-std::vector<TokenChain> parse_syntax(const TokenChain &input, const StringRepository &strings, HashArrayRepository &hash_arrays, const std::vector<std::string> &source_files, DataReader &data_reader);
+class SyntaxParser
+{
+ friend class ChainIndexScope;
+
+public:
+ SyntaxParser(
+ const TokenChain &input,
+ std::vector<std::unique_ptr<TokenChain>> &output,
+ const ProcessorCatalogue &catalogue,
+ ProcessorType default_processor,
+ const StringRepository &strings,
+ HashArrayRepository &hash_arrays,
+ const std::vector<std::string> &source_files
+ );
+
+ SyntaxParser &operator=(const SyntaxParser &) = delete;
+
+ void parse();
+
+ const Token *consume_next_token()
+ {
+ const Token *token = static_cast<const Token *>(_input_reader.next_token());
+ skip_token(token);
+ return token;
+ }
+
+ const Token *peek_next_token()
+ {
+ return static_cast<const Token *>(_input_reader.next_token());
+ }
+
+ /// Return the next token after skipping zero or more whitespaces and newlines.
+ const Token *skip_whitespaces(const Token *token)
+ {
+ while (token->type == TokenType::Whitespace || token->type == TokenType::Newline)
+ token = consume_next_token();
+
+ return token;
+ }
+
+ /// Return the next token after skipping zero or more whitespaces but not newlines.
+ const Token *skip_spaces_and_tabs(const Token *token)
+ {
+ while (token->type == TokenType::Whitespace)
+ token = consume_next_token();
+
+ return token;
+ }
+
+ /// Return true if the token matches a specific operator.
+ inline bool is_operator(const Token *t, OperatorType type) const
+ {
+ return t->type == TokenType::Operator && t->operator_index == type;
+ }
+
+ /// Reserve an amount of bytes to write a token type to the output.
+ /// This is called from the processor implementations.
+ /// @return A pointer into the buffer where it is safe to write the token.
+ template<typename T>
+ T &reserve_token_space()
+ {
+ return _output_chain->reserve<T>();
+ }
+
+ TokenChainScope create_rewind_scope()
+ {
+ return TokenChainScope(_input_reader);
+ }
+
+ /// Get the current token input read position.
+ TokenReadPosition get_read_position()
+ {
+ return _input_reader.position_value();
+ }
+
+ /// Set the current token input read position.
+ void set_read_position(TokenReadPosition position)
+ {
+ _input_reader.set_position_value(position);
+ }
+
+ /// Checks if the tokens following are a local or global symbol and a colon.
+ /// The token stream is left untouched after the call so this is a non-destructive peek.
+ /// Whitespaces must have been skipped prior to this call.
+ bool label_definition_follows(const Token *t);
+
+ /// Parse a global symbol without namespaces and set @a symbol to its hash.
+ /// @param symbol Set to the symbol hash.
+ /// @return The token after the symbol name;
+ const Token *parse_global_symbol_definition(const Token *t, uint64_t &symbol);
+
+ /// Parse a local or global symbol without namespace and updates @a global to true or false.
+ /// @param global Set to true if the symbol was a local symbol.
+ /// @param symbol Set to the symbol hash.
+ /// @return The token after the symbol name;
+ const Token *parse_symbol_definition(const Token *t, bool &global, uint64_t &symbol);
+
+ /// Parse a local symbol without namespace or a global symbol with optional namespace.
+ /// @a _temp_hash_array is filled with the local symbol or namespace and global symbol hashes.
+ /// @return The token after the symbol.
+ const Token *parse_symbol_reference(const Token *t, bool use_auto_symbols, bool &global);
+
+ /// Parse expression and fill @a _expression_output with the components as a flat tree structure.
+ /// @param end_at_unmatched_right_parenthesis If true, the parsing will end on an unmatched right parenthesis.
+ /// This will make it possible to parse parts of an expression. If false, an exception will be thrown.
+ /// @param end_at_newline If true, the parsing will end on a newline. This is used in instruction parsing.
+ const Token *parse_expression(const Token *t, bool end_at_unmatched_right_parenthesis, bool end_at_newline);
+
+ /// Parse expression and store the result as a symbol token in the output.
+ /// @param end_at_unmatched_right_parenthesis If true, the parsing will end on an unmatched right parenthesis.
+ /// This will make it possible to parse parts of an expression. If false, an exception will be thrown.
+ /// @param end_at_newline If true, the parsing will end on a newline. This is used in instruction parsing.
+ const Token *parse_and_output_expression(const Token *t, bool end_at_unmatched_right_parenthesis, bool end_at_newline);
+
+ /// Parse an expected operator.
+ const Token *parse_operator(const Token *t, OperatorType expected_operator);
+
+private:
+ void setup_operator_precedence();
+
+ OperatorPrecedence operator_precedence(OperatorType type) const
+ {
+ assert(type < OperatorType::NumOperatorFunctions);
+ return _operator_info[static_cast<int>(type)].precendence;
+ }
+
+ OperatorAssociativity operator_associativity(OperatorType type) const
+ {
+ assert(type < OperatorType::NumOperatorFunctions);
+ return _operator_info[static_cast<int>(type)].associativity;
+ }
+
+ /// Parse the inner part of a scope and return when reaching the ending curly bracket.
+ const Token *parse_inner_scope(const Token *t);
+
+ /// Parse a scope with curly brackets and output using ScopeBegin and ScopeEnd tokens.
+ const Token *parse_scope(const Token *t);
+
+ const Token *next_token()
+ {
+ return static_cast<const Token *>(_input_reader.next_token());
+ }
+
+ void skip_token(const Token *t)
+ {
+ _input_reader.advance_read(sizeof(Token) + t->payload_size);
+ }
+
+ /// Return true if the token matches a specific operator.
+ inline bool is_keyword(const Token *t, KeywordType type) const
+ {
+ return t->type == TokenType::Keyword && t->keyword_index == type;
+ }
+
+ /// Return true if the token matches a specific operator.
+ inline bool is_operator(const ExpressionComponent &t, OperatorType type) const
+ {
+ return t.type == ExpressionComponentType::Operator && t.operator_type == type;
+ }
+
+ /// Checks if the tokens following are a variable declaration and 'in'.
+ /// The token stream is left untouched after the call so this is a non-destructive peek.
+ /// The 'for' keyword and a left parenthesis must have been parsed already.
+ /// Whitespaces must have been skipped prior to this call.
+ bool range_for_loop_follows(const Token *t);
+
+ /// Parse a type reference and fill in the information about it in @a reference.
+ const Token *parse_type_reference(const Token *t, TypeReference &reference, bool array_size_is_required);
+
+ OptimizerType string_to_optimizer_type(const StringToken *t);
+
+ const Token *parse_align(const Token *t);
+ const Token *parse_using_statement(const Token *t);
+ const Token *parse_section_mapping(const Token *t);
+ const Token *parse_section_part(const Token *t, uint64_t section_name_hash, const SourceLocation &source_location);
+ const Token *parse_section_statement(const Token *t);
+ const Token *parse_optimizer_statement(const Token *t);
+ const Token *parse_if_statement(const Token *t);
+ const Token *parse_macro_statement(const Token *t);
+ const Token *parse_return_statement(const Token *t);
+ const Token *parse_subroutine_statement(const Token *t);
+ const Token *parse_struct_definition(const Token *t);
+ const Token *parse_define_statement(const Token *t);
+ const Token *parse_reserve_statement(const Token *t);
+ const Token *parse_enum_definition(const Token *t);
+ const Token *parse_range_for_statement(const Token *t);
+ const Token *parse_for_statement(const Token *t);
+ const Token *parse_repeat_statement(const Token *t);
+ const Token *parse_incbin(const Token *t);
+ const Token *parse_include(const Token *t);
+ const Token *parse_namespace(const Token *t);
+ const Token *parse_module(const Token *t);
+ const Token *parse_export(const Token *t);
+ const Token *parse_processor(const Token *t);
+ #if defined(_DEBUG)
+ const Token *parse_debug(const Token *t);
+ #endif
+
+ /// Parse an expected keyword.
+ const Token *parse_keyword(const Token *t, KeywordType expected_keyword);
+
+ /// Parse a keyword statement.
+ const Token *parse_keyword(const Token *t);
+
+ /// Prepare the operator stack for a new operator.
+ /// This involves moving operators with lower precedence into the expression array.
+ void prepare_operator_stack_for_operator(OperatorType type);
+
+ /// Parse an expected value or unary operator (beginning or after binary operator).
+ const Token *parse_value_in_expression(const Token *token, bool &next_is_value, bool end_at_newline);
+
+ /// @return True if a call operator, left parenthesis or left bracket was found and false if the stack became empty.
+ bool pop_operators_until_left_bracket_or_parenthesis(OperatorType &type_found);
+
+ /// Parse an expected binary operator after a value or assume the expression is over if
+ /// an operator isn't found.
+ const Token *parse_after_value_in_expression(const Token *token, bool &end_of_expression, bool &next_is_operator, bool end_at_unmatched_right_parenthesis, bool end_at_newline);
+
+ /// Apply the operator on the top of _operator_stack on the nodes in _expression_stack and moves the operator to the expression stack.
+ /// This basically creates a subtree with the operator on top and the nodes as branches.
+ void apply_operator_on_expression_stack();
+
+ /// Copy the tree in @a _component_storage with the root in index @a storage_index to the end of @a _expression_output.
+ void copy_component_tree(size_t storage_index);
+
+ /// Parse expression and put components in @a _component_storage with the trunk being the index of the single entry
+ /// in @a _expression_stack.
+ /// @param end_at_unmatched_right_parenthesis If true, the parsing will end on an unmatched right parenthesis.
+ /// This will make it possible to parse parts of an expression. If false, an exception will be thrown.
+ /// @param end_at_newline If true, the parsing will end on a newline. This is used in instruction parsing.
+ const Token *parse_expression_unordered(const Token *token, bool end_at_unmatched_right_parenthesis, bool end_at_newline);
+
+ /// Output an expression using the @a components argument.
+ /// Start is the token where the expression starts to be able to store
+ /// the location in the expression token.
+ void output_expression(const Token *start, const std::vector<ExpressionComponent> &components);
+
+ /// Parse expression that is expected to be enclosed in parenthesis and store the result as a symbol token in the output.
+ const Token *parse_and_output_expression_within_parenthesis(const Token *t);
+
+ /// Parse name declaration.
+ const Token *parse_name_declaration(const Token *t);
+
+ /// Parse assignment after const or var keyword.
+ /// @param t Token after the const or var keyword.
+ /// @param result_token Optional pointer to pointer that will be updated with a pointer to the created token.
+ const Token *parse_variable_definition(const Token *t, StorageType storage_type, const SymbolDefinitionToken **result_token = nullptr);
+
+ /// Parse a label definition.
+ /// @param symbol The symbol after skipping initial operator for local symbols.
+ /// @param global True for a global symbol and false for a local symbol.
+ const Token *parse_label_definition(const SymbolToken *symbol, bool global);
+
+ /// Parse a label with a colon or a statement.
+ const Token *parse_label_or_statement(const Token *t);
+
+ // Call these to register use of scope variables and have them be generated in the assemble pass.
+ void mark_use_of_loop_variable();
+ void mark_use_of_continue_variable();
+
+ // input from tokenizer
+ std::vector<std::unique_ptr<TokenChain>> &_output;
+ const ProcessorCatalogue &_catalogue;
+ const StringRepository &_strings;
+ HashArrayRepository &_hash_arrays;
+ const std::vector<std::string> &_source_files;
+
+ // syntax storage
+ std::vector<const Processor *> _processor_stack; ///< Stack of processors. The top one is in use. This can't become empty.
+ const Processor *_processor; ///< Current used processor.
+ TokenReader _input_reader; ///< Keeps the read state from the _input.
+ TokenChain *_output_chain; ///< Current token chain to output to. Each function and macro has its own chain for easy lookup.
+ size_t _output_chain_index; ///< Index of the current output chain in the @a _output vector. This is used to lookup a new pointer when the number of chains has increased and possibly been reallocated elsewhere.
+ std::vector<uint64_t> _temp_hash_array; ///< Temporary namespace storage while parsing.
+ std::vector<DefineGroupToken *> _temp_pointer_array; ///< Temporary array of pointers to track group sizes.
+ std::vector<uint32_t> _operator_stack; ///< Temporary expression operator stack for expression evaluations. This stores indices into _component_storage.
+ std::vector<uint32_t> _expression_stack; ///< Temporary expression node output for expression evaluations. This stores indices into _component_storage.
+ std::vector<ExpressionComponent> _component_storage; ///< Temporary storage for values and operators used in the syntax tree.
+ std::vector<ExpressionComponent> _expression_output; ///< The output from the expression parser. This is a syntax tree. With the first element as root.
+ OperatorInfo _operator_info[static_cast<int>(OperatorType::NumOperatorFunctions)]; ///< Operator precendence for each type of operator in OperatorType.
+ std::vector<ScopeBeginToken *> _scope_stack; ///< Stack of entered scopes. This is used to mark scopes as using loop and continue variables.
+ SyntaxState _state;
+};
/// @}
A => jasm/syntax/syntax_state.cpp +14 -0
@@ 0,0 1,14 @@
+#include "pch.h"
+
+#include <syntax/syntax_state.h>
+
+namespace jasm
+{
+
+SyntaxState::SyntaxState()
+ : macro_depth(0)
+ , subroutine_depth(0)
+{
+}
+
+}
A => jasm/syntax/syntax_state.h +21 -0
@@ 0,0 1,21 @@
+#pragma once
+
+namespace jasm
+{
+
+/// @addtogroup syntax
+/// @{
+
+/// This class keeps the state of the syntax analysis that persists between statements.
+struct SyntaxState
+{
+ SyntaxState();
+
+ std::set<uint64_t> sections; ///< Set with string hashes for section names, used to detect duplicate names.
+ int macro_depth; ///< Number of nested macro definitions.
+ int subroutine_depth; ///< Number of nested subroutine definitions (only one allowed).
+};
+
+/// @}
+
+}
M jasm/parsing/syntax_tokens.cpp => jasm/syntax/syntax_tokens.cpp +3 -3
@@ 1,11 1,11 @@
#include "pch.h"
-#include <parsing/syntax_tokens.h>
+#include <syntax/syntax_tokens.h>
namespace jasm
{
-const std::string_view to_string(SectionType type)
+std::string_view to_string(SectionType type)
{
static const std::string_view names[] = {
std::string_view("none"),
@@ 18,7 18,7 @@ const std::string_view to_string(Section
return names[static_cast<size_t>(type)];
}
-const std::string_view to_string(ExpressionComponentType type)
+std::string_view to_string(ExpressionComponentType type)
{
static const std::string_view names[] = {
std::string_view("bool"),
M jasm/parsing/syntax_tokens.h => jasm/syntax/syntax_tokens.h +15 -53
@@ 1,11 1,12 @@
#pragma once
-#include <assembling/instructions.h>
-#include <parsing/operators.h>
-#include <parsing/section.h>
-#include <parsing/source_location.h>
-#include <parsing/storage_type.h>
-#include <parsing/types.h>
+#include <processor/instructions.h>
+#include <processor/processor.h>
+#include <syntax/section.h>
+#include <syntax/storage_type.h>
+#include <tokenize/operators.h>
+#include <tokenize/source_location.h>
+#include <tokenize/types.h>
namespace jasm
{
@@ 24,6 25,7 @@ enum class SyntaxTokenType : uint8_t
Expression,
Align,
Using,
+ Processor,
Instruction,
Section,
SectionPart,
@@ 52,6 54,7 @@ enum class SyntaxTokenType : uint8_t
DefineGroup,
DefineEnd,
Reserve,
+ Include,
Incbin,
End,
#if defined(_DEBUG)
@@ 185,7 188,8 @@ struct ExpressionComponent
struct SyntaxToken
{
SyntaxTokenType type;
- uint8_t padding0[3];
+ ProcessorType processor; ///< Only used in Processor tokens. Undefined means pop the stack.
+ uint8_t padding0[2];
uint32_t size; ///< Size of the token.
// 8 byte aligned
};
@@ 230,7 234,6 @@ struct UsingToken : public SyntaxToken
SourceLocation source_location; ///< Location of the namespace in the source.
};
-
struct DeclareToken : public SyntaxToken
{
uint64_t name_hash; ///< Local name hash to predeclare.
@@ 262,45 265,6 @@ struct ExpressionTokenWithPayload : publ
ExpressionComponent components[1]; // a variable sized array really
};
-#if SUPPORTS(M6502)
-
-struct InstructionToken : public SyntaxToken
-{
- InstructionType instruction;
- uint8_t padding1;
- uint16_t addressing_modes; ///< Mask with InstructionType bits set for each possible addressing mode.
- SourceLocation source_location; ///< Source location to instruction.
- // 8 byte aligned
- bool has_instruction_data_label; ///< True if there is a label defined that points to the instruction data.
- bool global_data_label; ///< True if the label is global.
- uint8_t padding2[2];
- SourceLocation address_label_location; ///< Source location to address label, if existing.
- // 8 byte aligned
- uint64_t data_label_symbol_hash; ///< Symbol to define as the data label.
- // 8 byte aligned
-};
-
-#elif SUPPORTS(Z80)
-
-struct InstructionToken : public SyntaxToken
-{
- InstructionType instruction;
- uint8_t addressing_mode_index; ///< The selected addressing mode index used to lookup the instruction data using @a opcode().
- uint8_t padding1[2];
- SourceLocation source_location; ///< Source location to instruction.
- // 8 byte aligned
- bool has_instruction_data_label[2]; ///< True if there is a label defined that points to the instruction data.
- bool global_data_label[2]; ///< True if the label is global.
- uint32_t padding2;
- // 8 byte aligned
- SourceLocation address_label_location[2]; ///< Source location to address label, if existing.
- // 8 byte aligned
- uint64_t data_label_symbol_hash[2]; ///< Symbol to define as the data label.
- // 8 byte aligned
-};
-
-#endif // SUPPORTS
-
struct SectionToken : public SyntaxToken
{
uint64_t name_hash; ///< The hash of the section name. This is used as a filename in case code sections overlap.
@@ 495,7 459,7 @@ struct DefineToken : public SyntaxToken
SourceLocation keyword_location; ///< Define keyword location.
// 8 byte aligned
SourceLocation source_location; ///< Symbol definition location.
- uint32_t padding2;
+ SourceLocation end_source_location; ///< Location of the right curly bracket or end of expression.
// 8 byte aligned
uint64_t name_hash; ///< Name of the label to reference the data with.
// 8 byte aligned
@@ 530,10 494,8 @@ struct ReserveToken : public SyntaxToken
struct IncBinToken : public SyntaxToken
{
- uint64_t load_handle; ///< Handle to the background loaded file.
- // 8 byte aligned
SourceLocation keyword_location; ///< Keyword location.
- SourceLocation source_location; ///< Symbol definition location.
+ SourceLocation source_location; ///< Filename expression location.
// 8 byte aligned
bool has_start_offset; ///< True if an expression follows with the start offset to start reading from.
bool has_max_size; ///< True if an expression follows with the max file size to read.
@@ 541,8 503,8 @@ struct IncBinToken : public SyntaxToken
// 8 byte aligned
};
-const std::string_view to_string(SectionType type);
-const std::string_view to_string(ExpressionComponentType type);
+std::string_view to_string(SectionType type);
+std::string_view to_string(ExpressionComponentType type);
/// @}
M jasm/parsing/keyword_finder.cpp => jasm/tokenize/keyword_finder.cpp +3 -3
@@ 3,7 3,7 @@
#include <algorithm>
#include <core/strings/murmur_hash.h>
#include <core/strings/utf8.h>
-#include <parsing/keyword_finder.h>
+#include <tokenize/keyword_finder.h>
namespace jasm
{
@@ 87,7 87,7 @@ void KeywordFinder::place_keyword(std::w
}
}
-bool KeywordFinder::match(std::wstring_view str, uint64_t &hash)
+bool KeywordFinder::match(std::wstring_view str, uint64_t &hash) const
{
uint32_t node_index = 1; // zero is reserved as end marker
while (!str.empty() && node_index != 0) {
@@ 113,7 113,7 @@ bool KeywordFinder::match(std::wstring_v
return false;
}
-bool KeywordFinder::match_beginning(std::wstring_view str, uint64_t &hash)
+bool KeywordFinder::match_beginning(std::wstring_view str, uint64_t &hash) const
{
uint32_t node_index = 1; // zero is reserved as end marker
bool matched = false;
M jasm/parsing/keyword_finder.h => jasm/tokenize/keyword_finder.h +2 -2
@@ 18,13 18,13 @@ public:
/// @param str String to match keyword in.
/// @param hash Murmur hash of the found keyword.
/// @return true if a keyword matched.
- bool match(std::wstring_view str, uint64_t &hash);
+ bool match(std::wstring_view str, uint64_t &hash) const;
/// Check for a keyword match in a string view.
/// This match is greedy and will ignore that there may be more characters
/// unmatched after the match. "++hello" will match "++".
/// @param str beginning of string to match.
- bool match_beginning(std::wstring_view str, uint64_t &hash);
+ bool match_beginning(std::wstring_view str, uint64_t &hash) const;
private:
struct Node
M jasm/parsing/keywords.cpp => jasm/tokenize/keywords.cpp +4 -3
@@ 1,10 1,10 @@
#include "pch.h"
-#include <parsing/keywords.h>
+#include <tokenize/keywords.h>
namespace jasm {
-const std::string_view to_string(KeywordType type)
+std::string_view to_string(KeywordType type)
{
static const std::string_view names[] = {
std::string_view("include"),
@@ 36,6 36,7 @@ const std::string_view to_string(Keyword
std::string_view("dynamic"),
std::string_view("address"),
std::string_view("repeat"),
+ std::string_view("processor"),
#if defined(_DEBUG)
std::string_view("_debug_"),
#endif
@@ 46,7 47,7 @@ const std::string_view to_string(Keyword
return names[static_cast<size_t>(type)];
}
-const std::string_view to_string(BooleanType type)
+std::string_view to_string(BooleanType type)
{
static const std::string_view names[] = {
std::string_view("false"),
M jasm/parsing/keywords.h => jasm/tokenize/keywords.h +3 -2
@@ 36,6 36,7 @@ enum class KeywordType : uint8_t
Dynamic,
Address,
Repeat,
+ Processor,
#if defined(_DEBUG)
Debug,
#endif
@@ 49,8 50,8 @@ enum class BooleanType : uint8_t
NumTypes,
};
-const std::string_view to_string(KeywordType type);
-const std::string_view to_string(BooleanType type);
+std::string_view to_string(KeywordType type);
+std::string_view to_string(BooleanType type);
/// @}
M jasm/parsing/operators.cpp => jasm/tokenize/operators.cpp +2 -2
@@ 1,6 1,6 @@
#include "pch.h"
-#include <parsing/operators.h>
+#include <tokenize/operators.h>
namespace jasm {
@@ 51,7 51,7 @@ static OperatorDesc operators[] = {
};
-const std::string_view to_string(OperatorType type)
+std::string_view to_string(OperatorType type)
{
static const std::string_view names[] = {
std::string_view("()"), // token not used in tokenizer but used to order functions in syntax parser
M jasm/parsing/operators.h => jasm/tokenize/operators.h +1 -1
@@ 119,7 119,7 @@ inline bool is_assignment(OperatorType t
const OperatorDesc &operator_desc(OperatorType type);
-const std::string_view to_string(OperatorType type);
+std::string_view to_string(OperatorType type);
/// @}
A => jasm/tokenize/position_tracker.cpp +24 -0
@@ 0,0 1,24 @@
+#include "pch.h"
+
+#include <tokenize/position_tracker.h>
+
+namespace jasm
+{
+
+PositionTracker::PositionTracker(const wchar_t *text_, MaskType *masks_, std::vector<size_t> *row_locations)
+ : column(1)
+ , row(1)
+ , text(text_)
+ , masks(masks_)
+ , _text_start(text_)
+ , _row_locations(row_locations)
+{
+ if (row_locations != nullptr) {
+ // add a dummy zero row since there will be no such row
+ row_locations->push_back(0);
+ // first row starts at index zero as well
+ row_locations->push_back(0);
+ }
+}
+
+}
A => jasm/tokenize/position_tracker.h +138 -0
@@ 0,0 1,138 @@
+#pragma once
+
+#include <core/math/sign.h>
+#include <locale>
+
+namespace jasm
+{
+
+using MaskType = std::ctype<wchar_t>::mask;
+
+/// @addtogroup tokenize
+/// @{
+
+/// Keeps track of column and row when fetching characters from a char array.
+class PositionTracker
+{
+public:
+ PositionTracker(const wchar_t *text_, MaskType *masks_, std::vector<size_t> *row_locations);
+
+ inline wchar_t peek_char(int offset = 0) const
+ {
+ return text[offset];
+ }
+ inline MaskType peek_mask(int offset = 0) const
+ {
+ return masks[offset];
+ }
+ inline bool is_end(int offset = 0) const
+ {
+ return text[offset] == 0;
+ }
+ inline bool is_newline(int offset = 0) const
+ {
+ return text[offset] == 10 || text[offset] == 13;
+ }
+ inline bool is_space(int offset = 0) const
+ {
+ #if defined(_MSC_VER)
+ return (masks[offset] & std::ctype<wchar_t>::blank) != 0;
+ #elif defined(__GNUC__)
+ return (masks[offset] & std::ctype<wchar_t>::space) != 0;
+ #else
+ #error "Compiler not supported"
+ #endif
+ }
+ inline bool is_alpha(int offset = 0) const
+ {
+ return (masks[offset] & std::ctype<wchar_t>::alpha) != 0;
+ }
+ inline bool is_alpha_numeric(int offset = 0) const
+ {
+ return (masks[offset] & std::ctype<wchar_t>::alnum) != 0;
+ }
+ inline bool is_hex(int offset = 0) const
+ {
+ return (masks[offset] & std::ctype<wchar_t>::xdigit) != 0;
+ }
+ inline bool is_digit(int offset = 0) const
+ {
+ return (masks[offset] & std::ctype<wchar_t>::digit) != 0;
+ }
+ inline bool is_symbol_start(int offset = 0) const
+ {
+ return is_alpha(offset) || text[offset] == L'_';
+ }
+ inline bool is_symbol_content(int offset = 0) const
+ {
+ return is_alpha_numeric(offset) || text[offset] == L'_';
+ }
+
+ /// Consume one character and keep track of column and row.
+ /// Newlines are guaranteed to be returned as one character, either newline or return.
+ inline void consume()
+ {
+ if (text[0] == L'\r' && text[1] == L'\n') {
+ column = 1;
+ ++row;
+ text += 2;
+ masks += 2;
+ if (_row_locations != nullptr) {
+ _row_locations->push_back(core::unsign_cast(text - _text_start));
+ }
+
+ } else if (text[0] == L'\r' || text[0] == L'\n') {
+ column = 1;
+ ++row;
+ ++text;
+ ++masks;
+ if (_row_locations != nullptr) {
+ _row_locations->push_back(core::unsign_cast(text - _text_start));
+ }
+
+ } else {
+ ++column;
+ ++text;
+ ++masks;
+ }
+ }
+
+ /// Consume n characters and keep track of column and row.
+ /// Newlines cannot be part of consumed characters.
+ inline void consume(uint32_t size)
+ {
+ column += size;
+ text += size;
+ masks += size;
+ }
+
+ /// Consume one character and keep track of column and row. This can only be used if the
+ /// peeked character isn't a newline.
+ inline void consume_single()
+ {
+ ++column;
+ ++text;
+ ++masks;
+ }
+
+ /// Add an extra line. Call this at the end of parsing to get a pointer to the end.
+ inline void add_last_line()
+ {
+ if (_row_locations != nullptr) {
+ _row_locations->push_back(core::unsign_cast(text - _text_start));
+ }
+ }
+
+ uint32_t column;
+ uint32_t row;
+ const wchar_t *text;
+ MaskType *masks;
+
+private:
+ const wchar_t *_text_start;
+ std::vector<size_t> *_row_locations;
+};
+
+/// @}
+
+}
M jasm/parsing/source_location.h => jasm/tokenize/source_location.h +0 -0
A => jasm/tokenize/token_type.h +31 -0
@@ 0,0 1,31 @@
+#pragma once
+
+namespace jasm
+{
+
+/// @addtogroup tokenize
+/// @{
+
+enum class TokenType : uint8_t
+{
+ Whitespace,
+ Newline,
+ Boolean,
+ Char,
+ Integer,
+ Float,
+ String,
+ Operator,
+ Keyword,
+ Typename,
+ Symbol,
+ End,
+ ProcessorKeyword,
+ Processor,
+
+ NumTypes,
+};
+
+/// @}
+
+}
A => jasm/tokenize/tokenize.cpp +30 -0
@@ 0,0 1,30 @@
+#include "pch.h"
+
+#include <core/debug/timer.h>
+#include <tokenize/tokenize.h>
+#include <tokenize/tokenizer.h>
+#include <utility/token_chain.h>
+
+namespace jasm
+{
+
+TokenChain tokenize(
+ uint32_t file_index,
+ const std::string &filename,
+ const std::string &file_path,
+ const ProcessorCatalogue &catalogue,
+ ProcessorType default_processor,
+ StringRepository &strings,
+ std::vector<size_t> *file_row_locations,
+ std::wstring *file_contents
+)
+{
+ core::TimerScope timer("Tokenizer");
+ Tokenizer t(catalogue, file_row_locations, file_contents);
+ TokenChain tc(4096);
+ t.init();
+ t.tokenize(file_index, filename, file_path, tc, default_processor, strings);
+ return tc;
+}
+
+} // namespace jasm
A => jasm/tokenize/tokenize.h +36 -0
@@ 0,0 1,36 @@
+#pragma once
+
+#include <processor/processor.h>
+
+namespace jasm
+{
+
+class TokenChain;
+class ProcessorCatalogue;
+class StringRepository;
+
+/// @addtogroup tokenize
+/// @{
+
+/// Tokenize the source file to produce a stream of tokens.
+/// The tokens have information about where in the source they are located to allow error
+/// messages to refer back to the source code lines. The token chain is a list of memory
+/// blocks which can be read in order by the syntax parser.
+/// @param row_locations For each line (the index), a character index that points to the first character in the line is stored. Or nullptr if the tokenizer shouldn't collect the row locations.
+/// @param contents Pointer to a string with the file contents of each source file, or nullptr if the tokenizer shouldn't keep the data.
+/// @throw FileException is thrown if a file operation failed.
+/// @throw AssemblyException is thrown if assembly failed.
+TokenChain tokenize(
+ uint32_t file_index,
+ const std::string &filename,
+ const std::string &file_path,
+ const ProcessorCatalogue &catalogue,
+ ProcessorType default_processor,
+ StringRepository &strings,
+ std::vector<size_t> *row_locations,
+ std::wstring *contents
+);
+
+/// @}
+
+} // namespace jasm
M jasm/parsing/tokenizer.cpp => jasm/tokenize/tokenizer.cpp +173 -541
@@ 7,328 7,46 @@
#include <core/io/file_helpers.h>
#include <core/io/file_id.h>
#include <core/io/text_reader.h>
+#include <core/math/sign.h>
#include <core/strings/murmur_hash.h>
#include <core/strings/utf8.h>
#include <exceptions/assembly_exception.h>
#include <io/data_reader.h>
#include <locale>
-#include <parsing/keyword_finder.h>
-#include <parsing/token_chain.h>
-#include <parsing/tokenizer.h>
+#include <processor/instructions.h>
#include <sstream>
#include <strings/string_repository.h>
+#include <tokenize/keyword_finder.h>
+#include <tokenize/position_tracker.h>
+#include <tokenize/tokenizer.h>
+#include <tokenize/tokens.h>
+#include <utility/token_chain.h>
namespace jasm
{
-using namespace core;
-
using MaskType = std::ctype<wchar_t>::mask;
-/// Keeps track of column and row when fetching characters from a char array.
-class PositionTracker
-{
-public:
- PositionTracker(const wchar_t *text_, MaskType *masks_) : column(1), row(1), text(text_), masks(masks_) {}
-
- inline wchar_t peek_char(int offset = 0) const
- {
- return text[offset];
- }
- inline MaskType peek_mask(int offset = 0) const
- {
- return masks[offset];
- }
- inline bool is_end(int offset = 0) const
- {
- return text[offset] == 0;
- }
- inline bool is_newline(int offset = 0) const
- {
- return text[offset] == 10 || text[offset] == 13;
- }
- inline bool is_space(int offset = 0) const
- {
- #if defined(_MSC_VER)
- return (masks[offset] & std::ctype<wchar_t>::blank) != 0;
- #elif defined(__GNUC__)
- return (masks[offset] & std::ctype<wchar_t>::space) != 0;
- #else
- #error "Compiler not supported"
- #endif
- }
- inline bool is_alpha(int offset = 0) const
- {
- return (masks[offset] & std::ctype<wchar_t>::alpha) != 0;
- }
- inline bool is_alpha_numeric(int offset = 0) const
- {
- return (masks[offset] & std::ctype<wchar_t>::alnum) != 0;
- }
- inline bool is_hex(int offset = 0) const
- {
- return (masks[offset] & std::ctype<wchar_t>::xdigit) != 0;
- }
- inline bool is_digit(int offset = 0) const
- {
- return (masks[offset] & std::ctype<wchar_t>::digit) != 0;
- }
- inline bool is_symbol_start(int offset = 0) const
- {
- return is_alpha(offset) || text[offset] == L'_';
- }
- inline bool is_symbol_content(int offset = 0) const
- {
- return is_alpha_numeric(offset) || text[offset] == L'_';
- }
-
- /// Consume one character and keep track of column and row.
- /// Newlines are guaranteed to be returned as one character, either newline or return.
- inline void consume()
- {
- if (text[0] == L'\r' && text[1] == L'\n') {
- column = 1;
- ++row;
- ++text;
- ++masks;
- } else if (text[0] == L'\r' || text[0] == L'\n') {
- column = 1;
- ++row;
- } else
- ++column;
-
- ++text;
- ++masks;
- }
-
- /// Consume n characters and keep track of column and row.
- /// Newlines cannot be part of consumed characters.
- inline void consume(uint32_t size)
- {
- column += size;
- text += size;
- masks += size;
- }
-
- /// Consume one character and keep track of column and row. This can only be used if the
- /// peeked character isn't a newline.
- inline void consume_single()
- {
- ++column;
- ++text;
- ++masks;
- }
-
- uint32_t column;
- uint32_t row;
- const wchar_t *text;
- MaskType *masks;
-};
-
-class Tokenizer
+void Tokenizer::init()
{
- Tokenizer &operator=(const Tokenizer &) = delete;
-
-public:
- struct TokenData
- {
- std::string name;
- TokenType token_type;
- uint8_t token_type_index;
- };
-
- Tokenizer(bool pseudo_instructions, const std::vector<std::string> &include_dirs, std::vector<std::string> &used_files, DataReader &data_reader)
- : _pseudo_instructions(pseudo_instructions)
- , _include_dirs(include_dirs)
- , _used_files(used_files)
- , _data_reader(data_reader)
- {
- }
-
- void init(const std::string &file);
-
- void tokenize(const size_t file_index, TokenChain &token_chain, StringRepository &strings);
-
-private:
- template<typename T>
- void add_type_tokens(std::vector<std::string> &names, TokenType type) {
- for (uint8_t i = 0; i < static_cast<uint8_t>(T::NumTypes); ++i) {
- TokenData token { std::string(to_string(static_cast<T>(i))), type, i };
- names.push_back(token.name);
- _hash_to_token.insert(murmur_hash3_string_x64_64(token.name)) = token;
- }
- }
-
- /// Turn backslashes into frontslashes in a string.
- static std::string to_front_slashes(const std::string &path);
-
- /// Add a file to the used list and return its index.
- size_t add_used_file(const std::string &file);
-
- /// Tokenize either an int or a floating point number. Use temp as string storage and read from tracker.
- void tokenize_int_or_float(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain);
- /// Tokenize a floating point number. Use temp as string storage and read from tracker.
- void tokenize_float(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain, uint32_t column, uint32_t row);
-
- void tokenize_binary(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain);
- void tokenize_hex(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain);
- void tokenize_char(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain);
- void tokenize_string(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain, StringRepository &strings);
- OperatorType tokenize_operator(const size_t file_index, uint64_t hash, PositionTracker &tracker, TokenChain &token_chain);
- void tokenize_symbol_or_keyword(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain, StringRepository &strings, bool allow_keyword);
-
- void parse_include(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain, StringRepository &strings);
- void preparse_incbin(const size_t file_index, PositionTracker tracker);
-
- /// Parse quoted string into _temp_wstring.
- void parse_quoted_string(const size_t file_index, PositionTracker &tracker);
- /// Convert _temp_wstring into a string.
- std::string temp_string()
- {
- _temp_string = core::wide_to_utf8(std::wstring_view(_temp_wstring.data(), _temp_wstring.size()));
- return _temp_string;
- }
- std::wstring_view temp_wview()
- {
- return std::wstring_view(_temp_wstring.data(), _temp_wstring.size());
- }
-
- /// Parse one character in a string or character literal. Handles newline, tab, return, quote, single quote and backslash.
- /// \return The parsed character.
- wchar_t parse_next_string_character(PositionTracker &tracker);
-
- /// Temporary string used when parsing.
- std::vector<wchar_t> _temp_wstring;
- /// Temporary string used when parsing.
- std::string _temp_string;
- /// This is used to find a keyword in a string.
- KeywordFinder _keyword_finder;
- /// This is used to find an operator in a string.
- KeywordFinder _operator_finder;
- /// A map to look up token details based on a hash of the token name.
- HashMap<uint64_t, TokenData, NullHashCompare<uint64_t>> _hash_to_token;
- /// An array of include file identifiers used to determine file recursion.
- std::vector<FileId> _include_id_history;
- /// An array of include files in the same order as @a _include_id_history to be able to print the history in error messages.
- std::vector<std::string> _include_file_history;
-
- /// True if extended instructions are used.
- bool _pseudo_instructions;
- /// An array of include directories we must search in to find include files.
- const std::vector<std::string> &_include_dirs;
- /// This array is filled with the included filenames and is used outside to map tokens to filenames.
- std::vector<std::string> &_used_files;
- /// This is used to trigger file loads in the background.
- DataReader &_data_reader;
- /// Lookup table from hashed instruction name to instruction type.
- core::HashMap<uint64_t, InstructionType, core::NullHashCompare<uint64_t>> _instructions;
-};
-
-
-void Tokenizer::init(const std::string &file)
-{
- FileId fid;
- if (!file_id(file, fid)) {
- std::stringstream ss;
- ss << "Failed to open '" << file << "'";
- throw FileException(ss.str());
- }
-
- _include_id_history.push_back(fid);
- _include_file_history.push_back(file);
- add_used_file(file);
-
_temp_wstring.reserve(256);
-
- // add token data for keywords
- std::vector<std::string> keywords;
- add_type_tokens<KeywordType>(keywords, TokenType::Keyword);
- add_type_tokens<TypenameType>(keywords, TokenType::Typename);
- add_type_tokens<BooleanType>(keywords, TokenType::Boolean);
- add_type_tokens<ProcessorKeywordType>(keywords, TokenType::ProcessorKeyword);
- _keyword_finder.set_keywords(keywords);
-
- // the extra operators not used in tokenizer is not included here!
- OperatorType operators_types[] = {
- OperatorType::Period,
- OperatorType::BooleanNot,
- OperatorType::BitwiseNot,
- OperatorType::Multiply,
- OperatorType::Divide,
- OperatorType::Plus,
- OperatorType::Minus,
- OperatorType::LeftShift,
- OperatorType::RightShift,
- OperatorType::Less,
- OperatorType::Greater,
- OperatorType::LessOrEqual,
- OperatorType::GreaterOrEqual,
- OperatorType::Equal,
- OperatorType::NotEqual,
- OperatorType::BitwiseAnd,
- OperatorType::BitwiseXor,
- OperatorType::BitwiseOr,
- OperatorType::BooleanAnd,
- OperatorType::BooleanOr,
- OperatorType::Assignment,
- OperatorType::AssignmentAdd,
- OperatorType::AssignmentSubtract,
- OperatorType::AssignmentMultiply,
- OperatorType::AssignmentDivide,
- OperatorType::AssignmentBooleanAnd,
- OperatorType::AssignmentBooleanOr,
- OperatorType::AssignmentBitwiseAnd,
- OperatorType::AssignmentBitwiseOr,
- OperatorType::AssignmentBitwiseXor,
- OperatorType::AssignmentLeftShift,
- OperatorType::AssignmentRightShift,
- OperatorType::Colon,
- OperatorType::Namespace,
- OperatorType::Semicolon,
- OperatorType::Comma,
- OperatorType::Hash,
- OperatorType::Percent,
- OperatorType::LeftParenthesis,
- OperatorType::RightParenthesis,
- OperatorType::LeftBracket,
- OperatorType::RightBracket,
- OperatorType::LeftCurly,
- OperatorType::RightCurly,
- OperatorType::Increment,
- OperatorType::Decrement,
- OperatorType::At,
- OperatorType::Ellipsis,
- };
-
- constexpr int num_generated_operators = 10;
- static_assert(sizeof(operators_types) / sizeof(operators_types[0]) + num_generated_operators == static_cast<int>(OperatorType::NumTypes), "Number of types doesn't match number of definitions");
-
- std::vector<std::string> operators;
- for (size_t i = 0; i < sizeof(operators_types) / sizeof(operators_types[0]); ++i) {
- TokenData token { std::string(to_string(operators_types[i])), TokenType::Operator, static_cast<uint8_t>(operators_types[i]) };
- operators.push_back(token.name);
- _hash_to_token.insert(murmur_hash3_string_x64_64(token.name)) = token;
- }
-
- _operator_finder.set_keywords(operators);
-
- // generate instruction lookup
- uint8_t num_instructions = static_cast<uint8_t>(_pseudo_instructions ? InstructionType::NumTypes : InstructionType::NumStandard);
- for (uint8_t i = 0; i < num_instructions; ++i) {
- InstructionType type = static_cast<InstructionType>(i);
- const std::string_view name = to_string(type);
- _instructions.insert(murmur_hash3_string_x64_64(name)) = type;
- }
}
-void Tokenizer::tokenize(const size_t file_index, TokenChain &token_chain, StringRepository &strings)
+void Tokenizer::tokenize(uint32_t file_index, const std::string &filename, const std::string &file_path, TokenChain &token_chain, ProcessorType default_processor, StringRepository &strings)
{
+ _file_index = file_index;
+ _filename = filename;
+
+ _processor = _catalogue.processor(default_processor);
+ _processor_stack.clear();
+ _processor_stack.push_back(_processor);
+
std::string contents;
try {
- contents = load_file(_used_files[file_index]);
- } catch (Exception &e) {
+ contents = core::load_file(file_path);
+ } catch (core::Exception &e) {
std::stringstream ss;
- ss << e.message << "\nwhile loading '" << _used_files[file_index] << "'";
+ ss << e.message << "\nwhile loading '" << _filename << "'";
e.message = ss.str();
throw e;
}
@@ 337,8 55,8 @@ void Tokenizer::tokenize(const size_t fi
std::wstring wide_contents;
try {
wide_contents = core::utf8_to_wide(contents);
- } catch (Exception &) {
- throw FileException("File contents isn't utf8 encoded: " + _used_files[file_index]);
+ } catch (core::Exception &) {
+ throw core::FileException("File contents isn't utf8 encoded: " + _filename);
}
// classify all characters
@@ 349,7 67,7 @@ void Tokenizer::tokenize(const size_t fi
std::locale loc("en_US.utf8");
std::use_facet<std::ctype<wchar_t>>(loc).is(wide_contents.data(), wide_contents.data() + wide_contents.size(), char_masks.data());
// categorize the null termination as "whitespace" to simplify parsing.
- char_masks[contents.size()] =
+ char_masks[wide_contents.size()] =
#if defined(_MSC_VER)
std::ctype<wchar_t>::blank;
#elif defined(__GNUC__)
@@ 358,7 76,7 @@ void Tokenizer::tokenize(const size_t fi
#error "Compiler not supported"
#endif
- PositionTracker tracker(wide_contents.c_str(), char_masks.data());
+ PositionTracker tracker(wide_contents.c_str(), char_masks.data(), _row_locations);
uint64_t hash; // used as out parameter for finder::match
while (tracker.peek_char() != 0) {
if (tracker.is_space()) {
@@ 409,71 127,72 @@ void Tokenizer::tokenize(const size_t fi
if (depth != 0) {
std::stringstream ss;
ss << "Multiline comment was not terminated.";
- throw AssemblyException(_used_files[file_index], row, column, AssemblyErrorCodes::MultilineCommentWasNotTerminated, ss.str());
+ throw AssemblyException(_filename, row, column, AssemblyErrorCodes::MultilineCommentWasNotTerminated, ss.str());
}
} else if (tracker.is_digit()) {
// matched either an int or a floating point number
_temp_wstring.clear();
- tokenize_int_or_float(file_index, tracker, token_chain);
+ tokenize_int_or_float(tracker, token_chain);
} else if (tracker.peek_char() == L'.' && tracker.is_digit(1)) {
// matched a floating point number
_temp_wstring.clear();
- tokenize_float(file_index, tracker, token_chain, tracker.column, tracker.row);
+ tokenize_float(tracker, token_chain, tracker.column, tracker.row);
} else if (tracker.peek_char() == L'$') {
// matched a hex number
- tokenize_hex(file_index, tracker, token_chain);
+ tokenize_hex(tracker, token_chain);
} else if (tracker.peek_char() == L'%' && tracker.is_digit(1)) {
// matched a binary number
- tokenize_binary(file_index, tracker, token_chain);
+ tokenize_binary(tracker, token_chain);
- } else if (_operator_finder.match_beginning(tracker.text, hash)) {
+ } else if (_processor->operators().match_beginning(tracker.text, hash)) {
// matched an operator
- OperatorType op = tokenize_operator(file_index, hash, tracker, token_chain);
+ OperatorType op = tokenize_operator(hash, tracker, token_chain);
// Special case here to treat variables with operator prefixes to not be parsed as
// keywords. .a should be possible to use on Z80 for example or @i.
if (op == OperatorType::Period || op == OperatorType::At) {
if (tracker.is_symbol_start()) {
bool allow_keyword = false;
- tokenize_symbol_or_keyword(file_index, tracker, token_chain, strings, allow_keyword);
+ tokenize_symbol_or_keyword(tracker, token_chain, strings, allow_keyword);
}
}
} else if (tracker.is_symbol_start()) {
// match symbol or keyword
- bool allow_keyword = true;
- tokenize_symbol_or_keyword(file_index, tracker, token_chain, strings, allow_keyword);
+ constexpr bool allow_keyword = true;
+ tokenize_symbol_or_keyword(tracker, token_chain, strings, allow_keyword);
} else if (tracker.peek_char() == L'\'') {
// char literal
- tokenize_char(file_index, tracker, token_chain);
+ tokenize_char(tracker, token_chain);
} else if (tracker.peek_char() == L'\"') {
// string literal
- tokenize_string(file_index, tracker, token_chain, strings);
+ tokenize_string(tracker, token_chain, strings);
} else {
std::stringstream ss;
std::string narrow_character;
core::wide_to_utf8(tracker.peek_char(), narrow_character);
ss << "Unexpected character '" << narrow_character << "'";
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedCharacter, ss.str());
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::UnexpectedCharacter, ss.str());
}
}
- // if this is the main file, add end token to stream
- if (_include_file_history.size() == 1) {
- // add end token to simplify syntax parsing
- Token &t = token_chain.reserve<Token>();
- t.type = TokenType::End;
- t.source_location.file_index = static_cast<uint32_t>(file_index);
- t.source_location.column = tracker.column;
- t.source_location.row = tracker.row;
- t.generic_index = 0;
- t.payload_size = 0;
+ // add end token to simplify syntax parsing
+ Token &t = token_chain.reserve<Token>();
+ t.type = TokenType::End;
+ t.source_location.file_index = static_cast<uint32_t>(file_index);
+ t.source_location.column = tracker.column;
+ t.source_location.row = tracker.row;
+ t.generic_index = 0;
+ t.payload_size = 0;
+
+ if (_contents != nullptr) {
+ *_contents = std::move(wide_contents);
}
}
@@ 484,14 203,6 @@ std::string Tokenizer::to_front_slashes(
return front_slash_path;
}
-size_t Tokenizer::add_used_file(const std::string &file)
-{
- size_t index = _used_files.size();
- // make sure that the file has front slashes to get the output from linux and pc unit tests match
- _used_files.push_back(to_front_slashes(file));
- return index;
-}
-
wchar_t Tokenizer::parse_next_string_character(PositionTracker &tracker)
{
wchar_t value = tracker.peek_char();
@@ 523,7 234,7 @@ wchar_t Tokenizer::parse_next_string_cha
return value;
}
-void Tokenizer::tokenize_int_or_float(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain)
+void Tokenizer::tokenize_int_or_float(PositionTracker &tracker, TokenChain &token_chain)
{
uint32_t column = tracker.column;
uint32_t row = tracker.row;
@@ 533,7 244,7 @@ void Tokenizer::tokenize_int_or_float(co
tracker.consume();
}
if (tracker.peek_char() == L'.' || tracker.peek_char() == L'e') {
- tokenize_float(file_index, tracker, token_chain, column, row);
+ tokenize_float(tracker, token_chain, column, row);
return;
}
@@ 541,7 252,7 @@ void Tokenizer::tokenize_int_or_float(co
if (tracker.is_alpha()) {
std::stringstream ss;
ss << "Letter follows directly after integer number " << temp_string();
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::AlphaFollowingNumberLiteral, ss.str());
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::AlphaFollowingNumberLiteral, ss.str());
}
try {
@@ 549,7 260,7 @@ void Tokenizer::tokenize_int_or_float(co
IntegerToken &t = token_chain.reserve<IntegerToken>();
t.type = TokenType::Integer;
- t.source_location.file_index = static_cast<uint32_t>(file_index);
+ t.source_location.file_index = _file_index;
t.source_location.column = column;
t.source_location.row = row;
t.generic_index = 0;
@@ 559,11 270,11 @@ void Tokenizer::tokenize_int_or_float(co
catch (std::out_of_range &) {
std::stringstream ss;
ss << "Integer number " << temp_string() << " out of range";
- throw AssemblyException(_used_files[file_index], row, column, AssemblyErrorCodes::IntegerOutOfRange, ss.str());
+ throw AssemblyException(_filename, row, column, AssemblyErrorCodes::IntegerOutOfRange, ss.str());
}
}
-void Tokenizer::tokenize_float(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain, uint32_t column, uint32_t row)
+void Tokenizer::tokenize_float(PositionTracker &tracker, TokenChain &token_chain, uint32_t column, uint32_t row)
{
// matched a floating point number
// at this point we know that the next character is a period or an 'e'
@@ 596,7 307,7 @@ void Tokenizer::tokenize_float(const siz
if (!tracker.is_digit()) {
std::stringstream ss;
ss << "Floating point number " << temp_string() << " is missing exponent";
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::MissingExponentInFloatingPoint, ss.str());
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::MissingExponentInFloatingPoint, ss.str());
}
while (tracker.is_digit()) {
@@ 609,7 320,7 @@ void Tokenizer::tokenize_float(const siz
if (tracker.is_alpha()) {
std::stringstream ss;
ss << "Letter follows directly after floating point number " << temp_string();
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::AlphaFollowingNumberLiteral, ss.str());
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::AlphaFollowingNumberLiteral, ss.str());
}
try {
@@ 617,7 328,7 @@ void Tokenizer::tokenize_float(const siz
FloatToken &t = token_chain.reserve<FloatToken>();
t.type = TokenType::Float;
- t.source_location.file_index = static_cast<uint32_t>(file_index);
+ t.source_location.file_index = _file_index;
t.source_location.column = column;
t.source_location.row = row;
t.generic_index = 0;
@@ 627,11 338,11 @@ void Tokenizer::tokenize_float(const siz
catch (std::out_of_range &) {
std::stringstream ss;
ss << "Floating point number " << temp_string() << " out of range";
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::FloatOutOfRange, ss.str());
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::FloatOutOfRange, ss.str());
}
}
-void Tokenizer::tokenize_binary(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain)
+void Tokenizer::tokenize_binary(PositionTracker &tracker, TokenChain &token_chain)
{
uint32_t column = tracker.column;
uint32_t row = tracker.row;
@@ 651,14 362,14 @@ void Tokenizer::tokenize_binary(const si
std::string narrow_c;
core::wide_to_utf8(c, narrow_c);
ss << "Illegal character '" << narrow_c << "' in binary constant";
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::IllegalCharacterInBinaryConstant, ss.str());
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::IllegalCharacterInBinaryConstant, ss.str());
}
tracker.consume();
}
IntegerToken &t = token_chain.reserve<IntegerToken>();
t.type = TokenType::Integer;
- t.source_location.file_index = static_cast<uint32_t>(file_index);
+ t.source_location.file_index = _file_index;
t.source_location.column = column;
t.source_location.row = row;
t.generic_index = 0;
@@ 666,7 377,7 @@ void Tokenizer::tokenize_binary(const si
t.value = value;
}
-void Tokenizer::tokenize_hex(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain)
+void Tokenizer::tokenize_hex(PositionTracker &tracker, TokenChain &token_chain)
{
uint32_t column = tracker.column;
uint32_t row = tracker.row;
@@ 682,7 393,7 @@ void Tokenizer::tokenize_hex(const size_
std::string narrow_c;
core::wide_to_utf8(c, narrow_c);
ss << "Illegal character '" << narrow_c << "' in hexadecimal constant";
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::IllegalCharacterInHexConstant, ss.str());
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::IllegalCharacterInHexConstant, ss.str());
}
value <<= 4;
@@ 697,7 408,7 @@ void Tokenizer::tokenize_hex(const size_
IntegerToken &t = token_chain.reserve<IntegerToken>();
t.type = TokenType::Integer;
- t.source_location.file_index = static_cast<uint32_t>(file_index);
+ t.source_location.file_index = _file_index;
t.source_location.column = column;
t.source_location.row = row;
t.generic_index = 0;
@@ 705,7 416,7 @@ void Tokenizer::tokenize_hex(const size_
t.value = value;
}
-void Tokenizer::tokenize_char(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain)
+void Tokenizer::tokenize_char(PositionTracker &tracker, TokenChain &token_chain)
{
// store source location
uint32_t column = tracker.column;
@@ 713,17 424,17 @@ void Tokenizer::tokenize_char(const size
tracker.consume();
if (tracker.is_newline() || tracker.is_end() || tracker.peek_char() == L'\'')
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::MissingCharacterConstant, "Missing character in character constant");
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::MissingCharacterConstant, "Missing character in character constant");
wchar_t value = parse_next_string_character(tracker);
if (tracker.peek_char() != L'\'')
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::TooLongCharacterConstant, "Character constant longer than one character.");
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::TooLongCharacterConstant, "Character constant longer than one character.");
tracker.consume();
CharToken &t = token_chain.reserve<CharToken>();
t.type = TokenType::Char;
- t.source_location.file_index = static_cast<uint32_t>(file_index);
+ t.source_location.file_index = _file_index;
t.source_location.column = column;
t.source_location.row = row;
t.generic_index = 0;
@@ 731,23 442,23 @@ void Tokenizer::tokenize_char(const size
t.value = static_cast<int32_t>(value);
}
-void Tokenizer::tokenize_string(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain, StringRepository &strings)
+void Tokenizer::tokenize_string(PositionTracker &tracker, TokenChain &token_chain, StringRepository &strings)
{
// store source location
uint32_t column = tracker.column;
uint32_t row = tracker.row;
- parse_quoted_string(file_index, tracker);
+ parse_quoted_string(tracker);
std::string string = temp_string();
// hash characters
- uint64_t string_hash = murmur_hash3_string_x64_64(string);
+ uint64_t string_hash = core::murmur_hash3_string_x64_64(string);
// add string
strings.add(string_hash, string);
StringToken &t = token_chain.reserve<StringToken>();
t.type = TokenType::String;
- t.source_location.file_index = static_cast<uint32_t>(file_index);
+ t.source_location.file_index = _file_index;
t.source_location.column = column;
t.source_location.row = row;
t.generic_index = 0;
@@ 755,14 466,13 @@ void Tokenizer::tokenize_string(const si
t.value = string_hash;
}
-OperatorType Tokenizer::tokenize_operator(const size_t file_index, uint64_t hash, PositionTracker &tracker, TokenChain &token_chain)
+OperatorType Tokenizer::tokenize_operator(uint64_t hash, PositionTracker &tracker, TokenChain &token_chain)
{
- assert(_hash_to_token.find(hash) != _hash_to_token.end());
- const TokenData &td = _hash_to_token[hash];
+ const TokenData &td = _processor->hash_to_token(hash);
Token &t = token_chain.reserve<Token>();
t.type = td.token_type;
- t.source_location.file_index = static_cast<uint32_t>(file_index);
+ t.source_location.file_index = _file_index;
t.source_location.column = tracker.column;
t.source_location.row = tracker.row;
t.generic_index = td.token_type_index; // index is valid for any token type
@@ 772,76 482,40 @@ OperatorType Tokenizer::tokenize_operato
return static_cast<OperatorType>(td.token_type_index);
}
-void Tokenizer::tokenize_symbol_or_keyword(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain, StringRepository &strings, bool allow_keyword)
+void Tokenizer::tokenize_symbol_or_keyword(PositionTracker &tracker, TokenChain &token_chain, StringRepository &strings, bool allow_keyword)
{
// store source location
uint32_t column = tracker.column;
uint32_t row = tracker.row;
- _temp_wstring.clear();
- _temp_wstring.push_back(tracker.peek_char());
- tracker.consume();
-
- while (tracker.is_symbol_content()) {
- _temp_wstring.push_back(tracker.peek_char());
- tracker.consume();
- }
-
+ parse_symbol_name_to_temp(tracker);
+
// check for matching keyword
uint64_t hash;
- if (allow_keyword && _keyword_finder.match(temp_wview(), hash)) {
+ if (allow_keyword && _processor->keywords().match(temp_wview(), hash)) {
// matched an instruction or keyword
- assert(_hash_to_token.find(hash) != _hash_to_token.end());
- const TokenData *td = &_hash_to_token[hash];
+ const TokenData *td = &_processor->hash_to_token(hash);
- if (td->token_type == TokenType::Keyword && td->token_type_index == static_cast<uint8_t>(KeywordType::Incbin)) {
- // preparse the include name and start background loading of the file
- preparse_incbin(file_index, tracker);
- }
+ if (td->token_type == TokenType::Keyword && td->token_type_index == static_cast<uint8_t>(KeywordType::Processor)) {
+ // handle processor statement
+ parse_processor(tracker, token_chain);
+ } else {
- #if SUPPORTS(Z80)
// Special case for z80 registers which can have a prim after a register name.
// The symbol parsing doesn't handle the prim character and I don't want to add
// it there because it will eat up unexpected combinations of characters. It is
// better to handle this special case here and translate the tokens if needed.
if (td->token_type == TokenType::ProcessorKeyword && tracker.peek_char() == L'\'') {
- if (hash == hash_constant(0x85555565f6597889ULL, "a")) {
- hash = hash_constant(0x45a6060b75dcb28bULL, "a'");
- tracker.consume();
- } else if (hash == hash_constant(0x3265a8a124914099ULL, "af")) {
- hash = hash_constant(0x75f0ca5c1761bc10, "af'");
- tracker.consume();
- } else if (hash == hash_constant(0x7a98a957b1d3d1ee, "b")) {
- hash = hash_constant(0x4e710923ab8a5de3, "b'");
- tracker.consume();
- } else if (hash == hash_constant(0x8e38df6c4a1f74d7, "c")) {
- hash = hash_constant(0x51f9b2208ed849be, "c'");
- tracker.consume();
- } else if (hash == hash_constant(0xcb72f2cd8447f776, "d")) {
- hash = hash_constant(0x516d90c3787d85ce, "d'");
+ if (_processor->allow_processor_keyword_with_prim(hash)) {
tracker.consume();
- } else if (hash == hash_constant(0xc5b69249a3d5e994, "e")) {
- hash = hash_constant(0x14115437bd14d165, "e'");
- tracker.consume();
- } else if (hash == hash_constant(0xd6fcb2bb61cb4523, "h")) {
- hash = hash_constant(0x3a55eb6b0fcaef4f, "h'");
- tracker.consume();
- } else if (hash == hash_constant(0xf539fdab7bdf9f62, "l")) {
- hash = hash_constant(0x2729f07f03d07daa, "l'");
- tracker.consume();
+ td = &_processor->hash_to_token(hash);
}
- td = &_hash_to_token[hash];
}
- #endif
- if (td->token_type == TokenType::Keyword && td->token_type_index == static_cast<uint8_t>(KeywordType::Include)) {
- // handle include statement
- parse_include(file_index, tracker, token_chain, strings);
- } else {
// just store the token
Token &t = token_chain.reserve<Token>();
t.type = td->token_type;
- t.source_location.file_index = static_cast<uint32_t>(file_index);
+ t.source_location.file_index = _file_index;
t.source_location.column = column;
t.source_location.row = row;
t.generic_index = td->token_type_index; // index is valid for any token type
@@ 851,13 525,13 @@ void Tokenizer::tokenize_symbol_or_keywo
} else {
std::string symbol = temp_string();
// store string in string repository
- uint64_t symbol_hash = murmur_hash3_string_x64_64(symbol);
+ uint64_t symbol_hash = core::murmur_hash3_string_x64_64(symbol);
strings.add(symbol_hash, symbol);
// write token
SymbolToken &t = token_chain.reserve<SymbolToken>();
t.type = TokenType::Symbol;
- t.source_location.file_index = static_cast<uint32_t>(file_index);
+ t.source_location.file_index = _file_index;
t.source_location.column = column;
t.source_location.row = row;
t.payload_size = sizeof(uint64_t);
@@ 865,17 539,95 @@ void Tokenizer::tokenize_symbol_or_keywo
if (allow_keyword) {
// check if the symbol could be an instruction (since z80 has colliding names)
- auto it = _instructions.find(symbol_hash);
- if (it == _instructions.end()) {
- t.instruction_index = InstructionType::NumTypes;
- } else {
- t.instruction_index = it->second;
+ if (!_processor->is_instruction(symbol_hash, t.instruction_index)) {
+ t.instruction_index = invalid_instruction;
}
}
}
}
-void Tokenizer::parse_quoted_string(const size_t file_index, PositionTracker &tracker)
+void Tokenizer::parse_symbol_name_to_temp(PositionTracker &tracker)
+{
+ _temp_wstring.clear();
+ _temp_wstring.push_back(tracker.peek_char());
+ tracker.consume();
+
+ while (tracker.is_symbol_content()) {
+ _temp_wstring.push_back(tracker.peek_char());
+ tracker.consume();
+ }
+}
+
+void Tokenizer::parse_processor(PositionTracker &tracker, TokenChain &token_chain)
+{
+ // we are past the processor keyword
+ while (tracker.peek_char() != 0 && tracker.is_space()) {
+ tracker.consume();
+ }
+
+ if (tracker.peek_char() == L'\"') {
+ uint32_t row = tracker.row;
+ uint32_t col = tracker.column;
+
+ parse_quoted_string(tracker);
+
+ std::string processor_name = temp_string();
+ ProcessorType processor;
+ if (!is_processor(processor_name, processor)) {
+ std::stringstream ss;
+ ss << "Unsupported processor type \"" << processor_name << "\". Supported types are " << to_string(static_cast<ProcessorType>(1));
+ for(uint32_t i = 2; i < static_cast<uint32_t>(ProcessorType::NumProcessors); ++i) {
+ ss << ", \"" << to_string(static_cast<ProcessorType>(i)) << '"';
+ }
+ throw AssemblyException(_filename, row, col, AssemblyErrorCodes::InvalidProcessorName, ss.str());
+ }
+
+ _processor = _catalogue.processor(processor);
+ _processor_stack.push_back(_processor);
+
+ Token &t = token_chain.reserve<Token>();
+ t.type = TokenType::Processor;
+ t.processor = processor;
+ t.source_location.file_index = _file_index;
+ t.source_location.column = col;
+ t.source_location.row = row;
+ t.payload_size = 0;
+
+ } else if (tracker.is_symbol_start()) {
+ uint32_t row = tracker.row;
+ uint32_t col = tracker.column;
+ parse_symbol_name_to_temp(tracker);
+ if (temp_wview() != L"pop") {
+ std::stringstream ss;
+ ss << "Expected processor string or 'pop' keyword.";
+ throw AssemblyException(_filename, row, col, AssemblyErrorCodes::ExpectedProcessorNameOrPop, ss.str());
+ }
+
+ if (_processor_stack.size() == 1) {
+ std::stringstream ss;
+ ss << "Unmatched processor pop statement.";
+ throw AssemblyException(_filename, row, col, AssemblyErrorCodes::UnmatchedProcessorPop, ss.str());
+ }
+
+ _processor_stack.pop_back();
+ _processor = _processor_stack.back();
+
+ Token &t = token_chain.reserve<Token>();
+ t.type = TokenType::Processor;
+ t.processor = ProcessorType::Unspecified;
+ t.source_location.file_index = _file_index;
+ t.source_location.column = col;
+ t.source_location.row = row;
+ t.payload_size = 0;
+
+ } else {
+ std::stringstream ss;
+ ss << "Expected processor string or 'pop' keyword.";
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::ExpectedProcessorNameOrPop, ss.str());
+ }
+}
+
+void Tokenizer::parse_quoted_string(PositionTracker &tracker)
{
// tracker is assumed to be pointing to the first quote in the string
_temp_wstring.clear();
@@ 885,7 637,7 @@ void Tokenizer::parse_quoted_string(cons
while (tracker.peek_char() != L'\"') {
if (tracker.is_newline() || tracker.is_end())
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::MissingClosingStringQuote, "Missing closing string quote.");
+ throw AssemblyException(_filename, tracker.row, tracker.column, AssemblyErrorCodes::MissingClosingStringQuote, "Missing closing string quote.");
wchar_t c = parse_next_string_character(tracker);
_temp_wstring.push_back(c);
@@ 894,124 646,4 @@ void Tokenizer::parse_quoted_string(cons
tracker.consume();
}
-void Tokenizer::parse_include(const size_t file_index, PositionTracker &tracker, TokenChain &token_chain, StringRepository &strings)
-{
- // we are past the include keyword and should parse a path string
- while (tracker.peek_char() != 0 && tracker.is_space())
- tracker.consume();
-
- if (tracker.peek_char() != L'\"') {
- std::stringstream ss;
- ss << "Expected path string after include keyword.";
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::ExpectedPathString, ss.str());
- }
-
- uint32_t column = tracker.column;
- uint32_t row = tracker.row;
-
- parse_quoted_string(file_index, tracker);
-
- FileId fid;
- std::string file_path;
- std::string include_file(temp_string());
- if (!match_include_dir_and_file(include_file, _include_dirs, file_path)) {
- std::stringstream ss;
- ss << "Failed to find include file '" << include_file << "'";
- throw AssemblyException(_used_files[file_index], row, column, AssemblyErrorCodes::CantFindIncludeFile, ss.str());
- }
-
- if (!file_id(file_path, fid)) {
- std::stringstream ss;
- ss << "Failed to open '" << temp_string() << "'";
- throw FileException(ss.str());
- }
- if (std::find(_include_id_history.begin(), _include_id_history.end(), fid) != _include_id_history.end()) {
- std::stringstream ss;
- ss << "Include file recursion. '" << _used_files[file_index] << "' is included twice from:";
- for (auto it = _include_file_history.rbegin(); it != _include_file_history.rend(); ++it)
- ss << "\n " << to_front_slashes(*it);
- throw AssemblyException(_used_files[file_index], 1, 1, AssemblyErrorCodes::RecursiveIncludes, ss.str());
- }
-
- _include_id_history.push_back(fid);
- _include_file_history.push_back(include_file);
-
- size_t include_index = add_used_file(file_path);
-
- // recurse!
- tokenize(include_index, token_chain, strings);
-
- _include_file_history.pop_back();
- _include_id_history.pop_back();
-}
-
-void Tokenizer::preparse_incbin(const size_t file_index, PositionTracker tracker)
-{
- // we are past the include keyword and should parse a path string
- while (tracker.peek_char() != 0 && tracker.is_space())
- tracker.consume();
-
- if (tracker.peek_char() != L'\"') {
- std::stringstream ss;
- ss << "Expected path string after incbin keyword.";
- throw AssemblyException(_used_files[file_index], tracker.row, tracker.column, AssemblyErrorCodes::ExpectedPathString, ss.str());
- }
-
- parse_quoted_string(file_index, tracker);
-
- _data_reader.queue_load(temp_string());
-}
-
-const std::string_view to_string(TokenType type)
-{
- static const std::string_view names[] = {
- std::string_view("whitespace"),
- std::string_view("newline"),
- std::string_view("boolean"),
- std::string_view("char"),
- std::string_view("integer"),
- std::string_view("float"),
- std::string_view("string"),
- std::string_view("operator"),
- std::string_view("keyword"),
- std::string_view("type"),
- std::string_view("symbol"),
- std::string_view("end of file"),
- std::string_view("processor keyword"),
- };
- static_assert(sizeof(names) / sizeof(names[0]) == static_cast<size_t>(TokenType::NumTypes), "Number of tokens doesn't match number of strings");
-
- assert(type < TokenType::NumTypes);
- return names[static_cast<size_t>(type)];
-}
-
-bool is_instruction(const Token &t)
-{
- return t.type == TokenType::Symbol && t.instruction_index != InstructionType::NumTypes;
-}
-
-bool is_instruction(const Token &t, InstructionType &type)
-{
- if (t.type != TokenType::Symbol) {
- return false;
- }
-
- if (t.instruction_index == InstructionType::NumTypes) {
- return false;
- }
-
- type = t.instruction_index;
- return true;
-}
-
-TokenChain tokenize(bool pseudo_instructions, const std::string &input_file, StringRepository &strings, const std::vector<std::string> &include_dirs, std::vector<std::string> &used_files, DataReader &data_reader)
-{
- TimerScope timer("Tokenizer");
- Tokenizer t(pseudo_instructions, include_dirs, used_files, data_reader);
- TokenChain tc(4096);
- t.init(input_file);
- t.tokenize(0, tc, strings);
- return tc;
-}
-
} // namespace jasm
M jasm/parsing/tokenizer.h => jasm/tokenize/tokenizer.h +84 -79
@@ 1,100 1,105 @@
#pragma once
-#include <assembling/instructions.h>
-#include <core/collections/hash_map.h>
-#include <parsing/source_location.h>
-#include <parsing/keywords.h>
-#include <parsing/processor_keywords_6502.h>
-#include <parsing/processor_keywords_z80.h>
-#include <parsing/operators.h>
-#include <parsing/types.h>
+#include <core/strings/utf8.h>
+#include <processor/processor.h>
namespace jasm
{
-class DataReader;
+class PositionTracker;
+class StringRepository;
class TokenChain;
-class StringRepository;
/// @addtogroup tokenize
/// @{
-enum class TokenType : uint8_t
+class Tokenizer
{
- Whitespace,
- Newline,
- Boolean,
- Char,
- Integer,
- Float,
- String,
- Operator,
- Keyword,
- Typename,
- Symbol,
- End,
- ProcessorKeyword,
+ Tokenizer &operator=(const Tokenizer &) = delete;
- NumTypes,
-};
+public:
+ Tokenizer(
+ const ProcessorCatalogue &catalogue,
+ std::vector<size_t> *row_locations,
+ std::wstring *contents
+ )
+ : _catalogue(catalogue)
+ , _processor(nullptr)
+ , _row_locations(row_locations)
+ , _contents(contents)
+ {
+ }
+
+ void init();
-struct Token
-{
- TokenType type;
- union
- {
- OperatorType operator_index;
- InstructionType instruction_index; // this is set for symbol tokens
- KeywordType keyword_index;
- ProcessorKeywordType processor_keyword_index;
- TypenameType typename_index;
- BooleanType boolean_index;
- uint8_t generic_index;
- };
- uint16_t payload_size;
- SourceLocation source_location;
- // 8 byte aligned
- /// ... payload following ...
-};
+ /// @param file_index File index to write in SourceLocations for this file.
+ /// @param filename Used for printouts.
+ /// @param file_path Used to load the file contents.
+ void tokenize(
+ uint32_t file_index,
+ const std::string &filename,
+ const std::string &file_path,
+ TokenChain &token_chain,
+ ProcessorType default_processor,
+ StringRepository &strings
+ );
+
+private:
+ /// Turn backslashes into frontslashes in a string.
+ static std::string to_front_slashes(const std::string &path);
+
+ /// Tokenize either an int or a floating point number. Use temp as string storage and read from tracker.
+ void tokenize_int_or_float(PositionTracker &tracker, TokenChain &token_chain);
+ /// Tokenize a floating point number. Use temp as string storage and read from tracker.
+ void tokenize_float(PositionTracker &tracker, TokenChain &token_chain, uint32_t column, uint32_t row);
-struct SymbolToken : public Token
-{
- uint64_t symbol_hash;
-};
+ void tokenize_binary(PositionTracker &tracker, TokenChain &token_chain);
+ void tokenize_hex(PositionTracker &tracker, TokenChain &token_chain);
+ void tokenize_char(PositionTracker &tracker, TokenChain &token_chain);
+ void tokenize_string(PositionTracker &tracker, TokenChain &token_chain, StringRepository &strings);
+ OperatorType tokenize_operator(uint64_t hash, PositionTracker &tracker, TokenChain &token_chain);
+ void tokenize_symbol_or_keyword(PositionTracker &tracker, TokenChain &token_chain, StringRepository &strings, bool allow_keyword);
+ /// Assuming next character is the beginning of a symbol, parse it to _temp_wstring.
+ void parse_symbol_name_to_temp(PositionTracker &tracker);
-struct IntegerToken : public Token
-{
- int32_t value;
-};
-
-struct FloatToken : public Token
-{
- double value;
-};
-
-struct CharToken : public IntegerToken
-{
-};
+ void parse_processor(PositionTracker &tracker, TokenChain &token_chain);
+
+ /// Parse quoted string into _temp_wstring.
+ void parse_quoted_string(PositionTracker &tracker);
+ /// Convert _temp_wstring into a string.
+ std::string temp_string()
+ {
+ _temp_string = core::wide_to_utf8(std::wstring_view(_temp_wstring.data(), _temp_wstring.size()));
+ return _temp_string;
+ }
+ std::wstring_view temp_wview()
+ {
+ return std::wstring_view(_temp_wstring.data(), _temp_wstring.size());
+ }
-struct StringToken : public Token
-{
- uint64_t value;
-};
-
-/// Return true if the token is an instruction.
-bool is_instruction(const Token &t);
-/// Return true if the token is an instruction.
-bool is_instruction(const Token &t, InstructionType &type);
+ /// Parse one character in a string or character literal. Handles newline, tab, return, quote, single quote and backslash.
+ /// \return The parsed character.
+ wchar_t parse_next_string_character(PositionTracker &tracker);
-const std::string_view to_string(TokenType type);
-
-/// Tokenize the source file and any included files to produce one single stream of tokens.
-/// The tokens have information about where in the source they are located to allow error
-/// messages to refer back to the source code lines. The token chain is a list of memory
-/// blocks which can be read in order by the syntax parser.
-/// @throw FileException is thrown if a file operation failed.
-/// @throw AssemblyException is thrown if assembly failed.
-TokenChain tokenize(bool pseudo_instructions, const std::string &input_file, StringRepository &strings, const std::vector<std::string> &include_dirs, std::vector<std::string> &used_files, DataReader &data_reader);
+ /// Temporary string used when parsing.
+ std::vector<wchar_t> _temp_wstring;
+ /// Temporary string used when parsing.
+ std::string _temp_string;
+ /// Catalogue with all processor types.
+ const ProcessorCatalogue &_catalogue;
+ /// Stack of processor scopes. The top one is the current.
+ std::vector<const Processor *> _processor_stack;
+ /// Current processor in use.
+ const Processor *_processor;
+ /// This is the index of the file to write into SourceLocation instances.
+ uint32_t _file_index;
+ /// This is the name of the file we're tokenizing.
+ std::string _filename;
+ /// Optional structure to collect indices into the file contents for all rows.
+ std::vector<size_t> *_row_locations;
+ /// Optional string to collect the file contents.
+ std::wstring *_contents;
+};
/// @}
A => jasm/tokenize/tokens.cpp +52 -0
@@ 0,0 1,52 @@
+#include "pch.h"
+
+#include <processor/instructions.h>
+#include <tokenize/tokens.h>
+
+namespace jasm
+{
+
+std::string_view to_string(TokenType type)
+{
+ static const std::string_view names[] = {
+ std::string_view("whitespace"),
+ std::string_view("newline"),
+ std::string_view("boolean"),
+ std::string_view("char"),
+ std::string_view("integer"),
+ std::string_view("float"),
+ std::string_view("string"),
+ std::string_view("operator"),
+ std::string_view("keyword"),
+ std::string_view("type"),
+ std::string_view("symbol"),
+ std::string_view("end of file"),
+ std::string_view("processor keyword"),
+ std::string_view("processor specifier"),
+ };
+ static_assert(sizeof(names) / sizeof(names[0]) == static_cast<size_t>(TokenType::NumTypes), "Number of tokens doesn't match number of strings");
+
+ assert(type < TokenType::NumTypes);
+ return names[static_cast<size_t>(type)];
+}
+
+bool is_instruction(const Token &t)
+{
+ return t.type == TokenType::Symbol && t.instruction_index != invalid_instruction;
+}
+
+bool is_instruction(const Token &t, uint8_t &type)
+{
+ if (t.type != TokenType::Symbol) {
+ return false;
+ }
+
+ if (t.instruction_index == invalid_instruction) {
+ return false;
+ }
+
+ type = t.instruction_index;
+ return true;
+}
+
+}
A => jasm/tokenize/tokens.h +69 -0
@@ 0,0 1,69 @@
+#pragma once
+
+#include <processor/processor.h>
+#include <tokenize/keywords.h>
+#include <tokenize/operators.h>
+#include <tokenize/source_location.h>
+#include <tokenize/token_type.h>
+#include <tokenize/types.h>
+
+namespace jasm
+{
+
+/// @addtogroup tokenize
+/// @{
+
+struct Token
+{
+ TokenType type;
+ union
+ {
+ OperatorType operator_index;
+ uint8_t instruction_index; // this is set for symbol tokens (InstructionType)
+ KeywordType keyword_index;
+ uint8_t processor_keyword_index; // ProcessorKeywordType under a processor namespace
+ TypenameType typename_index;
+ BooleanType boolean_index;
+ ProcessorType processor; // ProcessorType::Unspecified means 'pop'
+ uint8_t generic_index;
+ };
+ uint16_t payload_size;
+ SourceLocation source_location;
+ // 8 byte aligned
+ /// ... payload following ...
+};
+
+struct SymbolToken : public Token
+{
+ uint64_t symbol_hash;
+};
+
+struct IntegerToken : public Token
+{
+ int32_t value;
+};
+
+struct FloatToken : public Token
+{
+ double value;
+};
+
+struct CharToken : public IntegerToken
+{
+};
+
+struct StringToken : public Token
+{
+ uint64_t value;
+};
+
+/// Return true if the token is an instruction.
+bool is_instruction(const Token &t);
+/// Return true if the token is an instruction.
+bool is_instruction(const Token &t, uint8_t &type);
+
+std::string_view to_string(TokenType type);
+
+/// @}
+
+} // namespace jasm
M jasm/parsing/types.cpp => jasm/tokenize/types.cpp +2 -2
@@ 1,10 1,10 @@
#include "pch.h"
-#include <parsing/types.h>
+#include <tokenize/types.h>
namespace jasm {
-const std::string_view to_string(TypenameType type)
+std::string_view to_string(TypenameType type)
{
static const std::string_view names[] = {
std::string_view("byte"),
M jasm/parsing/types.h => jasm/tokenize/types.h +1 -1
@@ 13,7 13,7 @@ enum class TypenameType : uint8_t
NumTypes,
};
-const std::string_view to_string(TypenameType type);
+std::string_view to_string(TypenameType type);
/// @}
M jasm/unit_test.py +17 -11
@@ 3,15 3,15 @@ import re
import subprocess
-def find_exe(processor_str):
+def find_exe():
"Find exe to use in unit tests or return None if none was found."
sources = [
- "../x64/debug-jasm-%s/jasm-%s.exe" % (processor_str, processor_str),
- "../x64/release-jasm-%s/jasm-%s.exe" % (processor_str, processor_str),
- "bin/debug/jasm-%s" % processor_str,
- "bin/release/jasm-%s" % processor_str,
- "../build/jasm-%s/jasm-%s" % (processor_str, processor_str)
+ "../x64/debug-jasm/jasm.exe",
+ "../x64/release-jasm/jasm.exe",
+ "bin/debug/jasm",
+ "bin/release/jasm",
+ "../build/jasm/jasm"
]
for exe in sources:
@@ 21,7 21,7 @@ def find_exe(processor_str):
return None
-def run_test(input_path, stdout_path, stderr_path, binary_path, exes):
+def run_test(input_path, stdout_path, stderr_path, binary_path, exe):
"""Run one test and store output in files.
exes: dictionary from processor name to jasm executable path
returns: True if the test was run and False if the processor was
@@ 40,14 40,20 @@ def run_test(input_path, stdout_path, st
processor = match.group(1)
command_line_arguments = match.group(2).split(" ")
- if processor not in exes:
- raise Exception("Unsupported processor %s in unit test %s" % (processor, input_path))
- exe = exes[processor]
if exe == None:
return False
+ processors = ["6502", "z80", "unspecified"]
+ if not processor in processors:
+ raise Exception("Unsupported processor %s." % processor)
+
+ if processor == "unspecified":
+ processor_arguments = []
+ else:
+ processor_arguments = ["-p", processor]
+
# construct command line
- command_line = [exe] + command_line_arguments + [input_path, binary_path]
+ command_line = [exe] + processor_arguments + command_line_arguments + [input_path, binary_path]
process = subprocess.Popen(command_line, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = process.communicate()
error_code = process.returncode
M jasm/unit_test_generate.py +6 -9
@@ 13,15 13,12 @@ for path in os.listdir("unit_tests"):
input_files_to_run.append(os.path.join("unit_tests", path))
# collect exe files to use for all processors
-processors = ["6502", "z80"]
exes = {}
-for processor in processors:
- exe = unit_test.find_exe(processor)
- exes[processor] = exe
- if exe != None:
- print("Processor %s uses %s" % (processor, exe))
- else:
- print("Processor %s lacks compiled exe! Tests will not run." % processor)
+exe = unit_test.find_exe()
+if exe != None:
+ print("Test uses %s" % exe)
+else:
+ print("Test lacks compiled exe! Tests will not run.")
for input_file in input_files_to_run:
(dirname, filename) = os.path.split(input_file)
@@ 30,6 27,6 @@ for input_file in input_files_to_run:
stderr_file = os.path.join(dirname, "results", base_name + ".stderr")
output_file = os.path.join(dirname, "results", base_name + ".bin")
- test_ran = unit_test.run_test(input_file, stdout_file, stderr_file, output_file, exes)
+ test_ran = unit_test.run_test(input_file, stdout_file, stderr_file, output_file, exe)
if test_ran:
print("Generating unit test for " + input_file)
M jasm/unit_test_run.py +6 -11
@@ 54,16 54,11 @@ for path in os.listdir("unit_tests"):
input_files_to_run.sort()
-# collect exe files to use for all processors
-processors = ["6502", "z80"]
-exes = {}
-for processor in processors:
- exe = unit_test.find_exe(processor)
- exes[processor] = exe
- if exe != None:
- print("Processor %s uses %s" % (processor, exe))
- else:
- print("Processor %s lacks compiled exe! Tests will not run." % processor)
+exe = unit_test.find_exe()
+if exe != None:
+ print("Test uses %s" % exe)
+else:
+ print("Test lacks compiled exe! Tests will not run.")
num_tests = 0
num_failed = 0
@@ 81,7 76,7 @@ for input_file in input_files_to_run:
stderr_test = os.path.join(temp_dir, "output.stderr")
output_test = os.path.join(temp_dir, "output.bin")
- test_ran = unit_test.run_test(input_file, stdout_test, stderr_test, output_test, exes)
+ test_ran = unit_test.run_test(input_file, stdout_test, stderr_test, output_test, exe)
if not test_ran:
continue
A => jasm/unit_tests/code_6502.jasm +1 -0
A => jasm/unit_tests/code_z80_with_processor.jasm +2 -0
@@ 0,0 1,2 @@
+processor "z80"
+ret
M jasm/unit_tests/results/test_expected_include_string_error.stdout +2 -1
@@ 1,1 1,2 @@
-unit_tests/test_expected_include_string_error.asm(2,9) : Error 1013 : Expected path string after include keyword.
+unit_tests/test_expected_include_string_error.asm(2,9) : Error 3004 : Reference to undefined symbol nothing
+Assembly ended with errors.
M jasm/unit_tests/results/test_incbin_requires_string_argument.stdout +2 -1
@@ 1,1 1,2 @@
-unit_tests/test_incbin_requires_string_argument.asm(4,9) : Error 1013 : Expected path string after incbin keyword.
+unit_tests/test_incbin_requires_string_argument.asm(4,9) : Error 3048 : Incbin expects a filename string but got integer.
+Assembly ended with errors.
M jasm/unit_tests/results/test_incbin_throws_on_missing_file.stdout +1 -0
@@ 1,1 1,2 @@
unit_tests/test_incbin_throws_on_missing_file.asm(4,9) : Error 3047 : Failed to find include file 'missing'
+Assembly ended with errors.
A => jasm/unit_tests/results/test_incbin_with_constructed_filename.bin +1 -0
@@ 0,0 1,1 @@
+ABCD0123
No newline at end of file
A => jasm/unit_tests/results/test_incbin_with_missing_unimportant_file.bin +1 -0
@@ 0,0 1,1 @@
+`
No newline at end of file
A => jasm/unit_tests/results/test_include_requires_string_argument.stdout +2 -0
@@ 0,0 1,2 @@
+unit_tests/test_include_requires_string_argument.asm(5,10) : Error 3048 : Include expects a filename string but got integer.
+Assembly ended with errors.
A => jasm/unit_tests/results/test_include_with_constructed_filename.bin +1 -0
@@ 0,0 1,1 @@
+`
No newline at end of file
A => jasm/unit_tests/results/test_include_with_missing_unimportant_file.bin +1 -0
@@ 0,0 1,1 @@
+`
No newline at end of file
A => jasm/unit_tests/results/test_include_with_valid_file.bin +1 -0
@@ 0,0 1,1 @@
+`
No newline at end of file
A => jasm/unit_tests/results/test_include_within_macro.stdout +1 -0
@@ 0,0 1,1 @@
+unit_tests/test_include_within_macro.asm(7,3) : Error 2054 : Includes are not allowed within macros
A => jasm/unit_tests/results/test_include_within_subroutine.stdout +1 -0
@@ 0,0 1,1 @@
+unit_tests/test_include_within_subroutine.asm(7,3) : Error 2055 : Includes are not allowed withing subroutines.
M jasm/unit_tests/results/test_missing_include_file_error.stdout +2 -1
@@ 1,1 1,2 @@
-unit_tests/test_missing_include_file_error.asm(2,9) : Error 1011 : Failed to find include file 'missing.asm'
+unit_tests/test_missing_include_file_error.asm(2,9) : Error 1011 : Failed to open 'missing.asm'
+Assembly ended with errors.
A => jasm/unit_tests/results/test_processor_change_without_pop.bin +1 -0
@@ 0,0 1,1 @@
+`
No newline at end of file
A => jasm/unit_tests/results/test_processor_is_inherited_in_included_files.bin +1 -0
@@ 0,0 1,1 @@
+`
No newline at end of file
A => jasm/unit_tests/results/test_processor_is_unspecified_after_last_pop.stdout +2 -0
@@ 0,0 1,2 @@
+unit_tests/test_processor_is_unspecified_after_last_pop.asm(8,2) : Error 3004 : Reference to undefined symbol rts
+Assembly ended with errors.
A => jasm/unit_tests/results/test_processor_isnt_affecting_includer.bin +1 -0
@@ 0,0 1,1 @@
+`
No newline at end of file
A => jasm/unit_tests/results/test_processor_pop_brings_back_previous_processor.bin +1 -0
@@ 0,0 1,1 @@
+``
No newline at end of file
A => jasm/unit_tests/results/test_processor_pop_without_push.stdout +1 -0
@@ 0,0 1,1 @@
+unit_tests/test_processor_pop_without_push.asm(3,11) : Error 1012 : Unmatched processor pop statement.
A => jasm/unit_tests/results/test_processor_pop_without_push_with_default_processor.stdout +1 -0
@@ 0,0 1,1 @@
+unit_tests/test_processor_pop_without_push_with_default_processor.asm(3,11) : Error 1012 : Unmatched processor pop statement.
A => jasm/unit_tests/results/test_processor_with_invalid_keyword.stdout +1 -0
@@ 0,0 1,1 @@
+unit_tests/test_processor_with_invalid_keyword.asm(3,11) : Error 1014 : Expected processor string or 'pop' keyword.
A => jasm/unit_tests/results/test_processor_with_invalid_name.stdout +1 -0
@@ 0,0 1,1 @@
+unit_tests/test_processor_with_invalid_name.asm(3,11) : Error 1013 : Unsupported processor type "test". Supported types are 6502, "6510", "8502", "z80"
A => jasm/unit_tests/results/test_processor_with_invalid_type.stdout +1 -0
@@ 0,0 1,1 @@
+unit_tests/test_processor_with_invalid_type.asm(3,11) : Error 1014 : Expected processor string or 'pop' keyword.
M jasm/unit_tests/results/test_recursive_includes_error.stdout +1 -1
@@ 1,2 1,2 @@
-unit_tests/test_recursive_includes_error.asm(1,1) : Error 1012 : Include file recursion. 'unit_tests/test_recursive_includes_error.asm' is included twice from:
+unit_tests/test_recursive_includes_error.asm(1,1) : Error 3111 : Include file recursion. 'unit_tests/test_recursive_includes_error.asm' is included twice from:
unit_tests/test_recursive_includes_error.asm
M jasm/unit_tests/test_incbin_requires_string_argument.asm +1 -1
@@ 1,5 1,5 @@
// assembler command line arguments: 6502 [-v0 -hla]
section code, "main", $1000, $2000 {
- incbin .
+ incbin 5
}
A => jasm/unit_tests/test_incbin_with_constructed_filename.asm +6 -0
@@ 0,0 1,6 @@
+// assembler command line arguments: 6502 [-v0]
+
+section code, "main", $8000
+{
+ incbin "unit_tests/" + "test_binary.bin"
+}
A => jasm/unit_tests/test_incbin_with_missing_unimportant_file.asm +10 -0
@@ 0,0 1,10 @@
+// assembler command line arguments: 6502 [-v0]
+
+section code, "main", $8000
+{
+ if (no) {
+ incbin "todelo"
+ }
+ const no = false
+ rts
+}
A => jasm/unit_tests/test_include_requires_string_argument.asm +6 -0
@@ 0,0 1,6 @@
+// assembler command line arguments: 6502 [-v0]
+
+section code, "main", $8000
+{
+ include 5
+}
A => jasm/unit_tests/test_include_with_constructed_filename.asm +7 -0
@@ 0,0 1,7 @@
+// assembler command line arguments: 6502 [-v0]
+
+section code, "main", $8000
+{
+ nop
+ include "unit_tests/" + "code_6502.jasm"
+}
A => jasm/unit_tests/test_include_with_missing_unimportant_file.asm +10 -0
@@ 0,0 1,10 @@
+// assembler command line arguments: 6502 [-v0]
+
+section code, "main", $8000
+{
+ if (no) {
+ include "todelo"
+ }
+ const no = false
+ rts
+}
A => jasm/unit_tests/test_include_with_valid_file.asm +6 -0
@@ 0,0 1,6 @@
+// assembler command line arguments: 6502 [-v0]
+
+section code, "main", $8000
+{
+ include "unit_tests/code_6502.jasm"
+}
A => jasm/unit_tests/test_include_within_macro.asm +9 -0
@@ 0,0 1,9 @@
+// assembler command line arguments: 6502 [-v0]
+
+section code, "main", $8000
+{
+ macro m()
+ {
+ include "unit_tests/code_6502.jasm"
+ }
+}
A => jasm/unit_tests/test_include_within_subroutine.asm +9 -0
@@ 0,0 1,9 @@
+// assembler command line arguments: 6502 [-v0]
+
+section code, "main", $8000
+{
+ subroutine m
+ {
+ include "unit_tests/code_6502.jasm"
+ }
+}
A => jasm/unit_tests/test_processor_change_without_pop.asm +9 -0
@@ 0,0 1,9 @@
+// assembler command line arguments: unspecified [-v0]
+
+section code, "main", 0
+{
+ processor "6502"
+ rts
+ processor "z80"
+ ret
+}
A => jasm/unit_tests/test_processor_is_inherited_in_included_files.asm +8 -0
@@ 0,0 1,8 @@
+// assembler command line arguments: unspecified [-v0]
+
+section code, "main", 0
+{
+ processor "6502"
+ include "unit_tests/code_6502.jasm"
+ processor pop
+}
A => jasm/unit_tests/test_processor_is_unspecified_after_last_pop.asm +9 -0
@@ 0,0 1,9 @@
+// assembler command line arguments: unspecified [-v0]
+
+section code, "main", 0
+{
+ processor "6502"
+ rts
+ processor pop
+ rts
+}
A => jasm/unit_tests/test_processor_isnt_affecting_includer.asm +9 -0
@@ 0,0 1,9 @@
+// assembler command line arguments: unspecified [-v0]
+
+section code, "main", 0
+{
+ processor "6502"
+ include "unit_tests/code_z80_with_processor.jasm"
+ rts
+ processor pop
+}
A => jasm/unit_tests/test_processor_pop_brings_back_previous_processor.asm +12 -0
@@ 0,0 1,12 @@
+// assembler command line arguments: unspecified [-v0]
+
+section code, "main", 0
+{
+ processor "6502"
+ rts
+ processor "z80"
+ ret
+ processor pop
+ rts
+ processor pop
+}
A => jasm/unit_tests/test_processor_pop_without_push.asm +8 -0
@@ 0,0 1,8 @@
+// assembler command line arguments: unspecified [-v0]
+
+processor pop
+
+section code, "main", 0
+{
+
+}
A => jasm/unit_tests/test_processor_pop_without_push_with_default_processor.asm +8 -0
@@ 0,0 1,8 @@
+// assembler command line arguments: z80 [-v0]
+
+processor pop
+
+section code, "main", 0
+{
+
+}
A => jasm/unit_tests/test_processor_with_invalid_keyword.asm +8 -0
@@ 0,0 1,8 @@
+// assembler command line arguments: unspecified [-v0]
+
+processor bop
+
+section code, "main", 0
+{
+
+}
A => jasm/unit_tests/test_processor_with_invalid_name.asm +8 -0
@@ 0,0 1,8 @@
+// assembler command line arguments: unspecified [-v0]
+
+processor "test"
+
+section code, "main", 0
+{
+
+}
A => jasm/unit_tests/test_processor_with_invalid_type.asm +8 -0
@@ 0,0 1,8 @@
+// assembler command line arguments: unspecified [-v0]
+
+processor 4
+
+section code, "main", 0
+{
+
+}
M jasm/parsing/hasharray_repository.cpp => jasm/utility/hasharray_repository.cpp +2 -1
@@ 1,6 1,7 @@
#include "pch.h"
+
#include <core/strings/murmur_hash.h>
-#include <parsing/hasharray_repository.h>
+#include <utility/hasharray_repository.h>
// This implementation is dead simple and efficient if there isn't
// tons of namespace references everywhere and no 'using' statements.
M jasm/parsing/hasharray_repository.h => jasm/utility/hasharray_repository.h +1 -1
@@ 6,7 6,7 @@
namespace jasm
{
-/// @addtogroup syntax
+/// @addtogroup utility
/// @{
/// This class stores hash arrays and allows lookup using an integer value.
M jasm/parsing/token_chain.cpp => jasm/utility/token_chain.cpp +2 -69
@@ 1,26 1,12 @@
#include "pch.h"
-#include <algorithm>
#include <core/strings/murmur_hash.h>
#include <cstring>
-#include <parsing/token_chain.h>
+#include <utility/token_chain.h>
namespace jasm
{
-using namespace core;
-
-TokenChainScope::~TokenChainScope()
-{
- rewind();
-}
-
-void TokenChainScope::rewind()
-{
- if (_reader != nullptr)
- *_reader = _state;
-}
-
TokenChain::TokenChain(uint32_t buffer_size)
: _buffer_size(buffer_size)
{
@@ 59,7 45,7 @@ uint64_t TokenChain::hash() const
uint64_t hash = 0;
for (const auto &buffer : _buffers) {
if (LIKELY(!buffer.empty()))
- hash = murmur_hash3_x64_64(&*buffer.begin(), static_cast<int>(buffer.size()), hash);
+ hash = core::murmur_hash3_x64_64(&*buffer.begin(), static_cast<int>(buffer.size()), hash);
}
return hash;
}
@@ 72,57 58,4 @@ void TokenChain::allocate_buffer(uint32_
}
-void TokenReader::reset_read()
-{
- _read_position.u.parts.buffer_index = 0;
- _read_position.u.parts.buffer_position = 0;
-}
-
-
-const void *TokenReader::next_token() const
-{
- const TokenChain::Buffer ¤t_buffer = _chain->_buffers[_read_position.u.parts.buffer_index];
- bool at_last_buffer = _read_position.u.parts.buffer_index == _chain->_buffers.size() - 1;
- bool at_end_of_buffer = _read_position.u.parts.buffer_position >= current_buffer.size();
-
- // check for end of tokens
- if (at_last_buffer && at_end_of_buffer)
- return nullptr;
-
- const unsigned char *result = ¤t_buffer[_read_position.u.parts.buffer_position];
- return static_cast<const void *>(result);
-}
-
-const void *TokenReader::random_access_token(TokenReadPosition p) const
-{
- TokenReadPosition corrected = end_of_buffer_corrected_position(p);
- const TokenChain::Buffer &buffer = _chain->_buffers[corrected.u.parts.buffer_index];
- const unsigned char *result = &buffer[corrected.u.parts.buffer_position];
- return static_cast<const void *>(result);
-}
-
-void TokenReader::set_position_value(TokenReadPosition handle)
-{
- _read_position = end_of_buffer_corrected_position(handle);
-}
-
-void TokenReader::advance_read(uint32_t size)
-{
- _read_position.u.parts.buffer_position += TokenChain::align(size);
-
- // move to next buffer if at buffer end
- const TokenChain::Buffer ¤t_buffer = _chain->_buffers[_read_position.u.parts.buffer_index];
- bool at_end_of_buffer = _read_position.u.parts.buffer_position >= current_buffer.size();
- if (at_end_of_buffer) {
- bool at_last_buffer = _read_position.u.parts.buffer_index == _chain->_buffers.size() - 1;
- if (at_last_buffer) {
- // set pointer to end (possibly not moving at all)
- _read_position.u.parts.buffer_position = static_cast<uint32_t>(current_buffer.size());
- return;
- }
- ++_read_position.u.parts.buffer_index;
- _read_position.u.parts.buffer_position = 0;
- }
-}
-
} // namespace jasm
M jasm/parsing/token_chain.h => jasm/utility/token_chain.h +3 -129
@@ 1,33 1,13 @@
#pragma once
+#include <utility/token_read_position.h>
+
namespace jasm
{
-/// @addtogroup tokenize
+/// @addtogroup utility
/// @{
-/// This simply stores a chain index and position value that can be stored in a 64-bit int.
-struct TokenReadPosition
-{
- TokenReadPosition() {
- u.parts.buffer_index = 0;
- u.parts.buffer_position = 0;
- }
- TokenReadPosition(uint32_t index, uint32_t position) {
- u.parts.buffer_index = index;
- u.parts.buffer_position = position;
- }
-
- union
- {
- struct Parts {
- uint32_t buffer_index;
- uint32_t buffer_position;
- } parts;
- uint64_t value;
- } u;
-};
-
/// This class stores an array of token buffers and provides an interface
/// to append tokens to the chain. The data in the chain isn't reallocated
/// so pointers are valid during its entire lifetime.
@@ 95,112 75,6 @@ private:
std::vector<Buffer> _buffers;
};
-/// A TokenReader is used to read tokens from a TokenChain.
-/// The reader keeps a pointer to the token chain so it is not allowed to
-/// reallocate the token chain while reading. The assembler should not need
-/// that since the read happens after the write is complete and the chain
-/// is stable.
-class TokenReader
-{
-public:
- TokenReader() : _chain(nullptr) {}
- TokenReader(const TokenChain &chain) : _chain(&chain) {}
-
- /// Start reading from the beginning of the token chain.
- void reset_read();
-
- /// Set the position based on a single handle value.
- void set_position_value(TokenReadPosition handle);
-
- /// Convert the location to a single handle value.
- TokenReadPosition position_value() const
- {
- return _read_position;
- }
-
- /// After a read, you must advance the read head with the size of the token.
- void advance_read(uint32_t size);
-
- /// Get a pointer to the next token without moving the read position.
- /// You need to call @a advance_head to update the read position.
- /// @return A pointer to a token or nullptr if no more token exists.
- const void *next_token() const;
-
- /// Get a pointer to next token and static cast it to a specific type.
- template<typename T>
- const T *next_type() const {
- return static_cast<const T *>(next_token());
- }
-
- /// Random access a token and static cast it to a specific type.
- /// The token position must be valid and not at the end of the stream.
- template<typename T>
- const T &next_type(TokenReadPosition p) const {
- return *static_cast<const T *>(random_access_token(p));
- }
-
-private:
- /// Handle the case where the position is fetched, data is added and the
- /// data ends up in a new buffer so the position points to the wrong
- /// buffer.
- inline TokenReadPosition end_of_buffer_corrected_position(TokenReadPosition &p) const
- {
- const TokenChain::Buffer &buffer = _chain->_buffers[p.u.parts.buffer_index];
- bool at_end_of_buffer = p.u.parts.buffer_position >= buffer.size();
-
- if (at_end_of_buffer)
- return TokenReadPosition(p.u.parts.buffer_index + 1, 0);
- else
- return p;
- }
-
- /// Random access read in the token stream. If the position points
- /// outside the contents, the returned pointer is undefined.
- const void *random_access_token(TokenReadPosition p) const;
-
- const TokenChain *_chain; ///< A pointer to the token chain to read from.
- TokenReadPosition _read_position; ///< Buffer index and position.
-};
-
-/// A scope for keeping a read pointer and restoring it when going out of scope.
-class TokenChainScope
-{
-public:
- TokenChainScope(TokenChainScope &) = delete;
- TokenChainScope &operator=(TokenChainScope &) = delete;
-
- TokenChainScope(TokenReader &reader)
- : _reader(&reader)
- , _state(reader)
- {
- }
-
- TokenChainScope(TokenChainScope &&other)
- : _reader(other._reader)
- , _state(other._state)
- {
- other._reader = nullptr;
- }
-
- TokenChainScope &operator=(TokenChainScope &&other)
- {
- _reader = other._reader;
- _state = other._state;
-
- other._reader = nullptr;
- return *this;
- }
-
- ~TokenChainScope();
-
- /// Manual rewind of the state.
- void rewind();
-
-private:
- TokenReader *_reader;
- TokenReader _state;
-};
-
/// @}
} // namespace jasm
A => jasm/utility/token_chain_scope.cpp +19 -0
@@ 0,0 1,19 @@
+#include "pch.h"
+
+#include <utility/token_chain_scope.h>
+
+namespace jasm
+{
+
+TokenChainScope::~TokenChainScope()
+{
+ rewind();
+}
+
+void TokenChainScope::rewind()
+{
+ if (_reader != nullptr)
+ *_reader = _state;
+}
+
+} // namespace jasm
A => jasm/utility/token_chain_scope.h +52 -0
@@ 0,0 1,52 @@
+#pragma once
+
+#include <utility/token_reader.h>
+
+namespace jasm
+{
+
+/// @addtogroup utility
+/// @{
+
+/// A scope for keeping a read pointer and restoring it when going out of scope.
+class TokenChainScope
+{
+public:
+ TokenChainScope(TokenChainScope &) = delete;
+ TokenChainScope &operator=(TokenChainScope &) = delete;
+
+ TokenChainScope(TokenReader &reader)
+ : _reader(&reader)
+ , _state(reader)
+ {
+ }
+
+ TokenChainScope(TokenChainScope &&other)
+ : _reader(other._reader)
+ , _state(other._state)
+ {
+ other._reader = nullptr;
+ }
+
+ TokenChainScope &operator=(TokenChainScope &&other)
+ {
+ _reader = other._reader;
+ _state = other._state;
+
+ other._reader = nullptr;
+ return *this;
+ }
+
+ ~TokenChainScope();
+
+ /// Manual rewind of the state.
+ void rewind();
+
+private:
+ TokenReader *_reader;
+ TokenReader _state;
+};
+
+/// @}
+
+} // namespace jasm
A => jasm/utility/token_read_position.h +33 -0
@@ 0,0 1,33 @@
+#pragma once
+
+namespace jasm
+{
+
+/// @addtogroup utility
+/// @{
+
+/// This simply stores a chain index and position value that can be stored in a 64-bit int.
+struct TokenReadPosition
+{
+ TokenReadPosition() {
+ u.parts.buffer_index = 0;
+ u.parts.buffer_position = 0;
+ }
+ TokenReadPosition(uint32_t index, uint32_t position) {
+ u.parts.buffer_index = index;
+ u.parts.buffer_position = position;
+ }
+
+ union
+ {
+ struct Parts {
+ uint32_t buffer_index;
+ uint32_t buffer_position;
+ } parts;
+ uint64_t value;
+ } u;
+};
+
+/// @}
+
+} // namespace jasm
A => jasm/utility/token_reader.cpp +60 -0
@@ 0,0 1,60 @@
+#include "pch.h"
+
+#include <utility/token_reader.h>
+
+namespace jasm
+{
+
+void TokenReader::reset_read()
+{
+ _read_position.u.parts.buffer_index = 0;
+ _read_position.u.parts.buffer_position = 0;
+}
+
+const void *TokenReader::next_token() const
+{
+ const TokenChain::Buffer ¤t_buffer = _chain->_buffers[_read_position.u.parts.buffer_index];
+ bool at_last_buffer = _read_position.u.parts.buffer_index == _chain->_buffers.size() - 1;
+ bool at_end_of_buffer = _read_position.u.parts.buffer_position >= current_buffer.size();
+
+ // check for end of tokens
+ if (at_last_buffer && at_end_of_buffer)
+ return nullptr;
+
+ const unsigned char *result = ¤t_buffer[_read_position.u.parts.buffer_position];
+ return static_cast<const void *>(result);
+}
+
+const void *TokenReader::random_access_token(TokenReadPosition p) const
+{
+ TokenReadPosition corrected = end_of_buffer_corrected_position(p);
+ const TokenChain::Buffer &buffer = _chain->_buffers[corrected.u.parts.buffer_index];
+ const unsigned char *result = &buffer[corrected.u.parts.buffer_position];
+ return static_cast<const void *>(result);
+}
+
+void TokenReader::set_position_value(TokenReadPosition handle)
+{
+ _read_position = end_of_buffer_corrected_position(handle);
+}
+
+void TokenReader::advance_read(uint32_t size)
+{
+ _read_position.u.parts.buffer_position += TokenChain::align(size);
+
+ // move to next buffer if at buffer end
+ const TokenChain::Buffer ¤t_buffer = _chain->_buffers[_read_position.u.parts.buffer_index];
+ bool at_end_of_buffer = _read_position.u.parts.buffer_position >= current_buffer.size();
+ if (at_end_of_buffer) {
+ bool at_last_buffer = _read_position.u.parts.buffer_index == _chain->_buffers.size() - 1;
+ if (at_last_buffer) {
+ // set pointer to end (possibly not moving at all)
+ _read_position.u.parts.buffer_position = static_cast<uint32_t>(current_buffer.size());
+ return;
+ }
+ ++_read_position.u.parts.buffer_index;
+ _read_position.u.parts.buffer_position = 0;
+ }
+}
+
+} // namespace jasm
A => jasm/utility/token_reader.h +81 -0
@@ 0,0 1,81 @@
+#pragma once
+
+#include <utility/token_chain.h>
+#include <utility/token_read_position.h>
+
+namespace jasm
+{
+
+/// @addtogroup utility
+/// @{
+
+/// A TokenReader is used to read tokens from a TokenChain.
+/// The reader keeps a pointer to the token chain so it is not allowed to
+/// reallocate the token chain while reading. The assembler should not need
+/// that since the read happens after the write is complete and the chain
+/// is stable.
+class TokenReader
+{
+public:
+ TokenReader() : _chain(nullptr) {}
+ TokenReader(const TokenChain &chain) : _chain(&chain) {}
+
+ /// Start reading from the beginning of the token chain.
+ void reset_read();
+
+ /// Set the position based on a single handle value.
+ void set_position_value(TokenReadPosition handle);
+
+ /// Convert the location to a single handle value.
+ TokenReadPosition position_value() const
+ {
+ return _read_position;
+ }
+
+ /// After a read, you must advance the read head with the size of the token.
+ void advance_read(uint32_t size);
+
+ /// Get a pointer to the next token without moving the read position.
+ /// You need to call @a advance_head to update the read position.
+ /// @return A pointer to a token or nullptr if no more token exists.
+ const void *next_token() const;
+
+ /// Get a pointer to next token and static cast it to a specific type.
+ template<typename T>
+ const T *next_type() const {
+ return static_cast<const T *>(next_token());
+ }
+
+ /// Random access a token and static cast it to a specific type.
+ /// The token position must be valid and not at the end of the stream.
+ template<typename T>
+ const T &next_type(TokenReadPosition p) const {
+ return *static_cast<const T *>(random_access_token(p));
+ }
+
+private:
+ /// Handle the case where the position is fetched, data is added and the
+ /// data ends up in a new buffer so the position points to the wrong
+ /// buffer.
+ inline TokenReadPosition end_of_buffer_corrected_position(TokenReadPosition &p) const
+ {
+ const TokenChain::Buffer &buffer = _chain->_buffers[p.u.parts.buffer_index];
+ bool at_end_of_buffer = p.u.parts.buffer_position >= buffer.size();
+
+ if (at_end_of_buffer)
+ return TokenReadPosition(p.u.parts.buffer_index + 1, 0);
+ else
+ return p;
+ }
+
+ /// Random access read in the token stream. If the position points
+ /// outside the contents, the returned pointer is undefined.
+ const void *random_access_token(TokenReadPosition p) const;
+
+ const TokenChain *_chain; ///< A pointer to the token chain to read from.
+ TokenReadPosition _read_position; ///< Buffer index and position.
+};
+
+/// @}
+
+} // namespace jasm
M jasm/version.h +1 -1
@@ 1,1 1,1 @@
-1,24
+1,25
M jasm/website/site/docs/index.html +214 -81
@@ 4,6 4,8 @@
<title>jAsm Documentation</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="description" content="This is the documentation for the jAsm assembler.">
+ <meta name="keywords" content="jAsm,6502,z80,assembler,asm,cross-assembler">
+ <meta name="author" content="Jonas Hultén">
<link rel="shortcut icon" href="images/favicon.ico">
<link href="jasm.css" rel="stylesheet">
</head>
@@ 58,27 60,29 @@
<ul>
<li><a href="#fetching-source-code">Fetching Source Code</a></li>
<li><a href="#compiling-using-cmake">Compiling Using CMake</a></li>
-<li><a href="#compiling-using-vs">Compiling Using Visual Studio</a></li>
</ul></li>
<li><a href="#starting-jasm">Starting jAsm</a>
<ul>
<li><a href="#bank-mode">Bank Mode</a></li>
<li><a href="#predefined-constants">Predefined Constants</a></li>
<li><a href="#symbol-dumps">Symbol Dumps</a></li>
+<li><a href="#hex-output">Hex Output</a></li>
<li><a href="#binary-header">Binary Header</a></li>
<li><a href="#include-paths">Include Paths</a></li>
<li><a href="#max-errors">Max Errors</a></li>
<li><a href="#output-files-and-sections">Output Files and Sections</a></li>
-<li><a href="#verboseness">Verboseness</a></li>
+<li><a href="#default-processor">Default Processor</a></li>
<li><a href="#pseudo-instructions">Pseudo Instructions</a>
<ul>
<li><a href="#6502-pseudo-instructions">6502 Pseudo Instructions</a></li>
<li><a href="#z80-pseudo-instructions">Z80 Pseudo Instructions</a></li>
</ul></li>
+<li><a href="#verboseness">Verboseness</a></li>
<li><a href="#return-codes">Return Codes</a></li>
</ul></li>
<li><a href="#language-reference">Language Reference</a>
<ul>
+<li><a href="#selecting-processor">Selecting Processor</a></li>
<li><a href="#input-format">Input Format</a></li>
<li><a href="#comments">Comments</a></li>
<li><a href="#assembler-instruction-syntax">Assembler Instruction Syntax</a></li>
@@ 154,6 158,11 @@
<span class="instruction">sta</span> <span class="literal">$d020</span>
</code></pre>
+<p>Due to the large amount of source code with upper case instruction keywords, a python script is provided to convert upper case keywords in all .asm files in a directory. Run that like this.</p>
+
+<pre><code>python3 tools/convert_6502_keyword_case.py <my_source_directory>
+</code></pre>
+
<div id="z80"></div>
<h2>Z80</h2>
@@ 164,9 173,9 @@
<span class="instruction">ld</span> <span class="special">(</span>hl<span class="special">)</span>, a
</code></pre>
-<p>Due to the large amount of source code with upper case instruction keywords, a python script is provided to convert upper case keywords in all .asm files in a directory. Run that like this.</p>
-
-<pre><code>python3 jasm-z80/convert_z80_keyword_case.py <my_source_directory>
+<p>There's also a script to convert Z80 uppercase keywords to lowercase. Run that like this.</p>
+
+<pre><code>python3 tools/convert_z80_keyword_case.py <my_source_directory>
</code></pre>
<div id="starter-guide"></div>
@@ 177,7 186,9 @@
<p>We'll start by creating a small program in a text file.</p>
-<pre><code><span class="keyword">section</span> <span class="keyword">code</span>, <span class="literal">"main"</span>, <span class="literal">$8000</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">section</span> <span class="keyword">code</span>, <span class="literal">"main"</span>, <span class="literal">$8000</span>
<span class="special">{</span>
<span class="instruction">inc</span> <span class="literal">$d020</span>
<span class="instruction">rts</span>
@@ 186,7 197,7 @@
<p>Save this to a file named main.jasm. Use utf-8 format, because this is what jAsm expects. 7-bit ASCII is also ok since that is compatible with the utf-8 format. Now we'll assemble it into a binary. Open a command line window and change the current directory to where the main.jasm file is. Type this on the command line.</p>
-<pre><code>jasm-6502 -hla main.jasm main.prg
+<pre><code>jasm -hla main.jasm main.prg
</code></pre>
<p>Now you have a program that changes the border color on a Commodore 64. Load it into an emulator or onto a real machine.</p>
@@ 207,7 218,9 @@
<p>If you want to start it on a Commodore 64 with a BASIC line, you need to add the necessary data to produce a SYS line at the BASIC start. This is specific to the Commodore BASIC v2. This example shows how to do that in jAsm.</p>
-<pre><code><span class="keyword">section</span> <span class="keyword">code</span>, <span class="literal">"main"</span>, <span class="literal">$0801</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">section</span> <span class="keyword">code</span>, <span class="literal">"main"</span>, <span class="literal">$0801</span>
<span class="special">{</span>
<span class="keyword">define</span> <span class="keyword">word</span> <span class="operator">=</span> .next_basic_line <span class="comment">// next BASIC line</span>
<span class="keyword">define</span> <span class="keyword">word</span> <span class="operator">=</span> <span class="literal">2016</span> <span class="comment">// line number</span>
@@ 235,7 248,9 @@
<p>This BASIC line thing will be used a lot in programs since almost all programs loaded from disk will need it. Let's break out this code into a handy macro that we can reuse. The macro will need two arguments, one is the line number and one is the address to start the program from.</p>
-<pre><code><span class="keyword">macro</span> basic_sys_line<span class="special">(</span>.line_number, .sys_address<span class="special">)</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">macro</span> basic_sys_line<span class="special">(</span>.line_number, .sys_address<span class="special">)</span>
<span class="special">{</span>
<span class="keyword">define</span> <span class="keyword">word</span> <span class="operator">=</span> .next_basic_line <span class="comment">// next BASIC line</span>
<span class="keyword">define</span> <span class="keyword">word</span> <span class="operator">=</span> .line_number
@@ 266,7 281,9 @@
<p>Move the macro code into a file called macros.jasm and place it where main.jasm lies. We can now include the macros in main.jasm.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">section</span> <span class="keyword">code</span>, <span class="literal">"main"</span>, <span class="literal">$0801</span>
<span class="special">{</span>
@@ 284,7 301,9 @@
<p>The border color changing address isn't exactly self explanatory. The BASIC start address is also a naked constant that isn't exactly self explained. Let's make this a bit better.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">const</span> BASIC_START <span class="operator">=</span> <span class="literal">$0801</span>
<span class="keyword">const</span> BORDER_COLOR <span class="operator">=</span> <span class="literal">$d020</span>
@@ 301,7 320,9 @@
<p>I use uppercase characters for fixed address constants (basically any naked constant) to make it easy to identify them. <code>BASIC_START</code> and <code>BORDER_COLOR</code> can now be used instead of the naked constants. Let's move the constants out into their own file as well. Call this c64.jasm since they describe constants specific to Commodore 64. We'll include this as well in the program.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">include</span> <span class="literal">"c64.jasm"</span>
<span class="keyword">section</span> <span class="keyword">code</span>, <span class="literal">"main"</span>, BASIC_START
@@ 326,7 347,9 @@
<p>Now, what we need is a way to include either the c64.jasm or vic20.jasm file based on an option somewhere. Let's add the selection first.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">if</span> <span class="special">(</span>C64_BUILD<span class="special">)</span> <span class="special">{</span>
<span class="keyword">include</span> <span class="literal">"c64.jasm"</span>
<span class="special">}</span> <span class="keyword">else</span> <span class="special">{</span>
@@ 349,8 372,8 @@
<p>The <code><span class="keyword">if</span></code> statement wants a boolean expression within the parentheses and if true the first block of code is used, otherwise the second block is used. We can feed constants from the command line to solve this. The command line option is <code>-d</code> and it needs to be followed by an assignment. In this case we want to assign <code>C64_BUILD</code> to <code><span class="literal">true</span></code> or <code><span class="literal">false</span></code>.</p>
-<pre><code>jasm-6502 -d C64_BUILD=true main.jasm main.prg
-jasm-6502 -d C64_BUILD=false main.jasm main.prg
+<pre><code>jasm -d C64_BUILD=true main.jasm main.prg
+jasm -d C64_BUILD=false main.jasm main.prg
</code></pre>
<div id="starter-guide-definining-data"></div>
@@ 364,7 387,9 @@ jasm-6502 -d C64_BUILD=false main.jasm m
<p>Now we'll add the loop to print the text.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">include</span> <span class="literal">"c64.jasm"</span>
<span class="keyword">section</span> <span class="keyword">code</span>, <span class="literal">"main"</span>, BASIC_START
@@ 393,7 418,9 @@ jasm-6502 -d C64_BUILD=false main.jasm m
<p>This works but is hard to read. It isn't obvious where the loop starts and ends unless we read the instructions. Let's improve it using indentation.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">include</span> <span class="literal">"c64.jasm"</span>
<span class="keyword">section</span> <span class="keyword">code</span>, <span class="literal">"main"</span>, BASIC_START
@@ 420,7 447,9 @@ jasm-6502 -d C64_BUILD=false main.jasm m
<p>This is better but can be improved further. jAsm supports an automatic <code>@loop</code> label at the beginning of a scope defined by curly braces.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">include</span> <span class="literal">"c64.jasm"</span>
<span class="keyword">section</span> <span class="keyword">code</span>, <span class="literal">"main"</span>, BASIC_START
@@ 450,7 479,9 @@ jasm-6502 -d C64_BUILD=false main.jasm m
<p>If we want to print more text we need to move the loop into a subroutine which can be called with a jsr instruction and some parameters in registers.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">include</span> <span class="literal">"c64.jasm"</span>
<span class="keyword">section</span> <span class="keyword">code</span>, <span class="literal">"main"</span>, BASIC_START
@@ 493,10 524,10 @@ jasm-6502 -d C64_BUILD=false main.jasm m
<p><code><span class="operator">*</span></code> in the subroutine represents the current program counter. <code><span class="operator">*</span> <span class="operator">+</span> <span class="literal">1</span></code> points one byte into the next instruction, which is where the instruction argument is. All is well, except that it doesn't assemble!</p>
-<pre><code>main.jasm(23,7) : Error 3004 : Reference to undefined symbol .addr
-main.jasm(24,7) : Error 3004 : Reference to undefined symbol .addr
-main.jasm(24,13) : Error 3000 : Operator + is not defined for left hand side unknown type.
-main.jasm(25,7) : Error 3004 : Reference to undefined symbol .size
+<pre><code>main.jasm(25,7) : Error 3004 : Reference to undefined symbol .addr
+main.jasm(26,7) : Error 3004 : Reference to undefined symbol .addr
+main.jasm(26,13) : Error 3000 : Operator + is not defined for left hand side unknown type.
+main.jasm(27,7) : Error 3004 : Reference to undefined symbol .size
</code></pre>
<div id="starter-guide-declaring-symbols"></div>
@@ 507,7 538,9 @@ main.jasm(25,7) : Error 3004 : Reference
<p>To solve this we can declare the symbol names in the subroutine scope but define the constants inside the loop. This is the working subroutine.</p>
-<pre><code><span class="comment">// -> xa: address to text</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="comment">// -> xa: address to text</span>
<span class="comment">// -> y: size of text</span>
<span class="keyword">subroutine</span> print_text
<span class="special">{</span>
@@ 536,7 569,9 @@ main.jasm(25,7) : Error 3004 : Reference
<p>There is a more intuitive way to declare the <code>.addr</code> and <code>.size</code> addresses. Instruction data labels can point directly to the instruction argument by placing a label definition between the instruction and the argument.</p>
-<pre><code><span class="comment">// -> xa: address to text</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="comment">// -> xa: address to text</span>
<span class="comment">// -> y: size of text</span>
<span class="keyword">subroutine</span> print_text
<span class="special">{</span>
@@ 563,7 598,9 @@ main.jasm(25,7) : Error 3004 : Reference
<p>This subroutine can be reused so let's move it to its own file. Name a new file screen_io.jasm and paste the subroutine into it. Now we'll modify the main file to include this new file. Note that we now must include the file inside the section because otherwise generated code or data would lie outside any section and that isn't allowed. Only code sections can contain code or data. The other include files only contain constant definitions and macros and they don't directly produce any code or data themselves. That's why they can be outside a section.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">include</span> <span class="literal">"c64.jasm"</span>
<span class="keyword">section</span> <span class="keyword">code</span>, <span class="literal">"main"</span>, BASIC_START
@@ 589,7 626,9 @@ main.jasm(25,7) : Error 3004 : Reference
<p>Self modifying code is handy and can improve efficiency but it doesn't work if the code is in a cartridge ROM, because it can't be modified. Let's try modifying the code to use the zero page instead. To do this we need to reserve some space for variables in the zero page area. This is done with a bss section. BSS stands for "Block Started by Symbol" and means a static memory block that is part of the program, but without its content stored in the executable file. The bss section doesn't generate any code or data, it just reserves uninitialized space. I reserved the last 5 bytes in the zero page area from $fb to, but not including, $100.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">include</span> <span class="literal">"c64.jasm"</span>
<span class="keyword">section</span> <span class="keyword">bss</span>, <span class="literal">"zero page"</span>, <span class="literal">$fb</span>, <span class="literal">$100</span>
@@ 645,7 684,9 @@ main.jasm(25,7) : Error 3004 : Reference
<p>It would also be nice to avoid having to specify the length of the string when printing it. The code became a bit kludgy when swapping registers. We can solve this by removing the need for the size argument. If we zero terminate the string we can get rid of it (or swap argument registers).</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">include</span> <span class="literal">"c64.jasm"</span>
<span class="keyword">section</span> <span class="keyword">bss</span>, <span class="literal">"zero page"</span>, <span class="literal">$fb</span>, <span class="literal">$100</span>
@@ 699,7 740,9 @@ main.jasm(25,7) : Error 3004 : Reference
<p>This is what main.jasm looks like after the change.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">include</span> <span class="literal">"c64.jasm"</span>
<span class="keyword">section</span> <span class="keyword">bss</span>, <span class="literal">"zero page"</span>, <span class="literal">$fb</span>, <span class="literal">$100</span>
@@ 800,7 843,9 @@ main.jasm(25,7) : Error 3004 : Reference
<p>If <code>print_text</code> is used a lot in one place it is also possible to specify that a namespace should be used in a scope. As long as other names don't start to collide, this is just as good.</p>
-<pre><code><span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+
+<span class="keyword">include</span> <span class="literal">"macros.jasm"</span>
<span class="keyword">include</span> <span class="literal">"c64.jasm"</span>
<span class="keyword">section</span> <span class="keyword">bss</span>, <span class="literal">"zero page"</span>, <span class="literal">$fb</span>, <span class="literal">$100</span>
@@ 871,7 916,7 @@ main.jasm(25,7) : Error 3004 : Reference
<p>jAsm can assist debugging in the VICE emulator by exporting the names of addresses for use in the emulator. Add <a href="#symbol-dumps">--dump-vice-symbols</a> and a filename to the command line arguments to export this information.</p>
-<pre><code>jasm-6502 --dump-vice-symbols main.vs main.jasm main.prg
+<pre><code>jasm --dump-vice-symbols main.vs main.jasm main.prg
</code></pre>
<p>Now, a symbol file will be created called <code>main.vs</code>. Let's start the emulator (install it first if you don't have it) and use the file.</p>
@@ 987,12 1032,12 @@ main.jasm(25,7) : Error 3004 : Reference
<h2>Fetching Source Code</h2>
-<p>You need to fetch the source code from BitBucket to get started. If you have a command line Mercurial client you can clone the repository like this.</p>
-
-<pre><code>hg clone ssh://hg@bitbucket.org/bjonte/jasm
+<p>You need to fetch the source code from SourceHut to get started. If you have a command line Mercurial client you can clone the repository like this.</p>
+
+<pre><code>hg clone https://hg.sr.ht/~bjonte/jasm
</code></pre>
-<p>jAsm compiles using CMake and Clang or using Code::Blocks or Visual Studio.</p>
+<p>jAsm compiles using CMake and Clang.</p>
<div id="compiling-using-cmake"></div>
@@ 1005,7 1050,7 @@ main.jasm(25,7) : Error 3004 : Reference
<p>Clone the repository into a directory called 'jasm' and build it like this.</p>
-<pre><code>hg clone ssh://hg@bitbucket.org/bjonte/jasm
+<pre><code>hg clone https://hg.sr.ht/~bjonte/jasm
cd jasm
export CXX=/usr/bin/clang++
mkdir build
@@ 1021,7 1066,7 @@ sudo make install
<p>Cross compile like this.</p>
-<pre><code>hg clone ssh://hg@bitbucket.org/bjonte/jasm
+<pre><code>hg clone https://hg.sr.ht/~bjonte/jasm
cd jasm
mkdir build
cd build
@@ 1029,20 1074,12 @@ cmake -DCMAKE_TOOLCHAIN_FILE=../win64_cr
make
</code></pre>
-<p>You will find the binaries in build/jasm-6502 and build/jasm-z80. You will also need the MingW dynamic link libraries found here in Linux Mint.</p>
+<p>You will find the binaries in build/jasm. You will also need the MingW dynamic link libraries found here in Linux Mint.</p>
<pre><code>/usr/lib/gcc/x86_64-w64-mingw32/7.3-win32/libgcc_s_seh-1.dll
/usr/lib/gcc/x86_64-w64-mingw32/7.3-win32/libstdc++-6.dll
</code></pre>
-<div id="compiling-using-vs"></div>
-
-<h2>Compiling Using Visual Studio</h2>
-
-<p><i>The Visual Studio solution is no longer actively maintained and may not work. Cross compiling on Linux is the supported method to build Windows binaries.</i></p>
-
-<p>Download Visual Studio 2015 from www.microsoft.com and install it. Double click on the <code>jasm.sln</code> file to open the project. Select the <code>Release</code> configuration and build the solution. You will get a <code>jasm.exe</code> binary in <code>x64\Release</code>.</p>
-
<div id="starting-jasm"></div>
<h1>Starting jAsm</h1>
@@ 1051,12 1088,7 @@ make
<p>jAsm is a command line tool. It will print its arguments if started without any. Basically it needs an input file and an output file.</p>
-<pre><code>jasm-6502 input.jasm output.bin
-</code></pre>
-
-<p>If you are assembling for Z80, use that version of the assembler instead.</p>
-
-<pre><code>jasm-z80 input.jasm output.bin
+<pre><code>jasm input.jasm output.bin
</code></pre>
<p>There are some flags to tweak how the assembler behaves.</p>
@@ 1067,7 1099,7 @@ make
<p>When working with several memory banks it is handy to place them after each other in memory. That way it is possible to check which bank code or data belongs to just looking at the address. For example, cartridge bank 0 could be located at $08000-$0a000 and bank 1 at $18000-$1a000. However, jAsm will generate an error when trying to reference bank 1 in data definitions or instructions because the addresses exceeds 16 bits. This can be overridden with the <code>--bank-mode</code> flag, which automatically truncates long addresses.</p>
-<pre><code>jasm-6502 --bank-mode input.jasm output.bin
+<pre><code>jasm --bank-mode input.jasm output.bin
</code></pre>
<p><em>A shortcut alternative is <code>-bm</code>.</em></p>
@@ 1080,8 1112,8 @@ make
<p>You can instruct the assembler to create some initial constants that can be accessed in the source code with the <code>--define</code> flag.</p>
-<pre><code>jasm-6502 --define INFINITE_LIVES=true --define STARTING_LIVES=3 input.jasm output.bin
-jasm-6502 --define DEFAULT_NAME=bobo input.jasm output.bin
+<pre><code>jasm --define INFINITE_LIVES=true --define STARTING_LIVES=3 input.jasm output.bin
+jasm --define DEFAULT_NAME=bobo input.jasm output.bin
</code></pre>
<p>You can feed it with integers, booleans and strings, like in the examples above.</p>
@@ 1105,32 1137,68 @@ jasm-6502 --define DEFAULT_NAME=bobo inp
<p>Dump jAsm symbols like this.</p>
-<pre><code>jasm-6502 --dump-symbols symbols.txt input.jasm output.bin
+<pre><code>jasm --dump-symbols symbols.txt input.jasm output.bin
</code></pre>
<p><em>A shortcut alternative is <code>-ds</code>.</em></p>
<p>Dump VICE symbols like this.</p>
-<pre><code>jasm-6502 --dump-vice-symbols symbols.vs input.jasm output.bin
+<pre><code>jasm --dump-vice-symbols symbols.vs input.jasm output.bin
</code></pre>
<p><em>A shortcut alternative is <code>-dv</code>.</em></p>
<p>Dump No$GBA symbols like this.</p>
-<pre><code>jasm-6502 --dump-gba-symbols symbols.sym input.jasm output.bin
+<pre><code>jasm --dump-gba-symbols symbols.sym input.jasm output.bin
</code></pre>
<p><em>A shortcut alternative is <code>-dg</code>.</em></p>
+<div id="hex-output"></div>
+
+<h2>Hex Output</h2>
+
+<p>The assembled program can be written as a hex file interleaved with embedded source lines that produced the output to help understanding what the assembler produced.</p>
+
+<p>Write hex output like this.</p>
+
+<pre><code>jasm --dump-hex hex_output.txt input.jasm output.bin
+</code></pre>
+
+<p><em>A shortcut alternative is <code>-dh</code>.</em></p>
+
+<p>The file will output all source lines that generate data. The first column is the program counter, then up to four columns of binary data. This is followed by a line number and then the source code that produced the generated data.</p>
+
+<pre><code> ./source/main_loop.jasm
+--------------------------------------------------------------------------------
+ 0400: 20 17 04 7: jsr setup_cpu
+ 8:
+ 0403: 20 46 04 9: jsr blank_screen
+ 10:
+ 0406: 20 00 1f 11: jsr mmu::setup
+ 0409: 20 6b 04 12: jsr init_reset_vector
+</code></pre>
+
+<p>When the source file changes, the file name and a line with dashes will be added. In case there is a longer jump in line numbers or a jump backwards, a partially dashed line is printed.</p>
+
+<pre><code> 046b: ad 06 d5 51: lda MMURCR
+ 046e: 48 52: pha
+ -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
+ 046f: ad 06 d5 67: lda MMURCR
+ 0472: 29 f7 68: and #~MMURCR_COMMON_TOP
+ 0474: 09 04 69: ora #MMURCR_COMMON_BOTTOM
+ 0476: 8d 06 d5 70: sta MMURCR
+</code></pre>
+
<div id="binary-header"></div>
<h2>Binary Header</h2>
<p>By default, jAsm outputs only the binary data without any header. To generate a program file for Commodore 64 that can be loaded from BASIC, a two byte header must be added containing the load address in little endian format. You can add this header using <code>--header-little-endian-address</code>.</p>
-<pre><code>jasm-6502 --header-little-endian-address input.jasm output.prg
+<pre><code>jasm --header-little-endian-address input.jasm output.prg
</code></pre>
<p><em>A shortcut alternative is <code>-hla</code>.</em></p>
@@ 1141,7 1209,7 @@ jasm-6502 --define DEFAULT_NAME=bobo inp
<p>You can add include paths using the <code>--include-dir</code> flag. jAsm will look in these for included files.</p>
-<pre><code>jasm-6502 --include-dir some/dir --include-dir other/dir input.jasm output.bin
+<pre><code>jasm --include-dir some/dir --include-dir other/dir input.jasm output.bin
</code></pre>
<p><em>A shortcut alternative is <code>-i</code>.</em></p>
@@ 1152,7 1220,7 @@ jasm-6502 --define DEFAULT_NAME=bobo inp
<p>With the <code>--max-errors</code> flag, you can specify the number of errors that will be printed before jAsm stops assembling.</p>
-<pre><code>jasm-6502 --max-errors 4 input.jasm output.bin
+<pre><code>jasm --max-errors 4 input.jasm output.bin
</code></pre>
<p><em>A shortcut alternative is <code>-me</code>.</em></p>
@@ 1163,39 1231,38 @@ jasm-6502 --define DEFAULT_NAME=bobo inp
<p>The default output mode will merge all code sections into one big binary and pad the inbetween space with zero. With the flag <code>--output-multiple-files</code>, this can be changed to store one file per section instead. Each file will be named after the output file but add the section name before the file extension.</p>
-<pre><code>jasm-6502 --output-multiple-files input.jasm output.bin
+<pre><code>jasm --output-multiple-files input.jasm output.bin
</code></pre>
<p><em>A shortcut alternative is <code>-om</code>.</em></p>
<p>You can choose to have jAsm name the files after the sections by not specifying an output file name.</p>
-<pre><code>jasm-6502 --output-multiple-files input.jasm
+<pre><code>jasm --output-multiple-files input.jasm
</code></pre>
<p>You may want to add an extension to the section names when using them as file names. Use the option <code>--file-extension</code> to do that.</p>
-<pre><code>jasm-6502 --output-multiple-files --file-extension prg input.jasm
+<pre><code>jasm --output-multiple-files --file-extension prg input.jasm
</code></pre>
<p><em>A shortcut alternative is <code>-ext</code>.</em></p>
-<div id="verboseness"></div>
-
-<h2>Verboseness</h2>
-
-<p>jAsm supports several levels of output during assembly. This is controlled by the <code>-v0</code>, <code>-v1</code>, <code>-v2</code> and <code>-v3</code> flags.</p>
-
-<pre><code>jasm-6502 -v2 input.jasm output.bin
+<div id="default-processor"></div>
+
+<h2>Default Processor</h2>
+
+<p>You can set the default processor to use when assembling the source code using the option <code>--processor</code>. If you do this you won't need to specify the processor in the source code, unless you need to switch it.</p>
+
+<pre><code>jasm --processor 6502 input.jasm output.bin
</code></pre>
-<table>
- <tr><th>Flag</th><th>Meaning</th></tr>
- <tr><td><code>-v0</code></td><td>Show errors</td></tr>
- <tr><td><code>-v1</code></td><td>Show errors and warnings</td></tr>
- <tr><td><code>-v2</code></td><td>Show errors, warnings, printouts and general information</td></tr>
- <tr><td><code>-v3</code></td><td>Show errors, warnings, general information and debugging information</td></tr>
-</table>
+<p>or</p>
+
+<pre><code>jasm --processor z80 input.jasm output.bin
+</code></pre>
+
+<p><em>A shortcut alternative is <code>-p</code>.</em></p>
<div id="pseudo-instructions"></div>
@@ 1233,6 1300,23 @@ jasm-6502 --define DEFAULT_NAME=bobo inp
<p>They are implemented using two instructions under the hood. First the high register part is loaded and then the low.</p>
+<div id="verboseness"></div>
+
+<h2>Verboseness</h2>
+
+<p>jAsm supports several levels of output during assembly. This is controlled by the <code>-v0</code>, <code>-v1</code>, <code>-v2</code> and <code>-v3</code> flags.</p>
+
+<pre><code>jasm -v2 input.jasm output.bin
+</code></pre>
+
+<table>
+ <tr><th>Flag</th><th>Meaning</th></tr>
+ <tr><td><code>-v0</code></td><td>Show errors</td></tr>
+ <tr><td><code>-v1</code></td><td>Show errors and warnings</td></tr>
+ <tr><td><code>-v2</code></td><td>Show errors, warnings, printouts and general information</td></tr>
+ <tr><td><code>-v3</code></td><td>Show errors, warnings, general information and debugging information</td></tr>
+</table>
+
<div id="return-codes"></div>
<h2>Return Codes</h2>
@@ 1247,6 1331,55 @@ jasm-6502 --define DEFAULT_NAME=bobo inp
<p>This section documents the entire syntax. Have a look at the starter guide first to get a grasp of the basics before digging into this.</p>
+<div id="selecting-processor"></div>
+
+<p>To assemble instructions jAsm needs to know what processor to target. This is done by either specifying the processor using <a href="#default-processor">command line flags</a> or by a keyword in the source code. Specify the processor in a source file like this.</p>
+
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+</code></pre>
+
+<p>After this statement, the assembler can handle 6502 processor instructions. You can switch processor in a source file several times.</p>
+
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+ <span class="instruction">rts</span>
+<span class="keyword">processor</span> <span class="literal">"z80"</span>
+ ret
+</code></pre>
+
+<p>It is also possible to momentarily change the processor and switch back to whatever it was before. The <code><span class="keyword">processor</span> <span class="keyword">pop</span></code> statement is used to change back to the previously set processor.</p>
+
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+ <span class="instruction">rts</span>
+ <span class="keyword">processor</span> <span class="literal">"z80"</span>
+ ret
+ <span class="keyword">processor</span> <span class="keyword">pop</span>
+ <span class="instruction">rts</span>
+<span class="keyword">processor</span> <span class="keyword">pop</span>
+</code></pre>
+
+<p>Included files inherit the processor from the file with the include statement but the processor set in the included file won't affect the file where the include statement is.</p>
+
+<p>Suppose we have a file named test.jasm:</p>
+
+<pre><code><span class="comment">// processor 6502 inherited from main.jasm</span>
+rts
+
+<span class="keyword">processor</span> <span class="literal">"z80"</span>
+<span class="comment">// processor is now set to z80</span>
+<span class="instruction">ret</span>
+</code></pre>
+
+<p>and a file named main.jasm:</p>
+
+<pre><code><span class="keyword">processor</span> <span class="literal">"6502"</span>
+ <span class="comment">// processor is now set to 6502</span>
+ <span class="keyword">include</span> <span class="literal">"test.jasm"</span>
+ <span class="comment">// processor is still 6502</span>
+ <span class="instruction">lda</span> <span class="operator">#</span><span class="literal">0</span>
+</code></pre>
+
+<p>When including <code>test.jasm</code>, the <code><span class="instruction">rts</span></code> instruction is assembled using 6502 because it was inherited from <code>main.jasm</code>. The <code><span class="instruction">ret</span></code> instruction is assembled as z80 since the processor was changed in the included file before the instruction. After the included file the processor is 6502 since the included file won't affect the file it is included from.</p>
+
<div id="input-format"></div>
<h2>Input Format</h2>
@@ 2040,10 2173,10 @@ aa
<td><code><span class="keyword">var</span> aa <span class="operator">=</span> list<span class="special">(</span><span class="literal">1</span>, <span class="literal">2</span>, <span class="literal">3</span><span class="special">)</span></code><br/><code>aa.push<span class="special">(</span><span class="literal">4</span><span class="special">)</span> <span class="comment">// [1, 2, 3, 4]</span></code></td>
</tr>
<tr>
- <td><code>pop<span class="special">(</span><span class="special">)</span></code></td>
+ <td><code><span class="keyword">pop</span><span class="special">(</span><span class="special">)</span></code></td>
<td></td>
<td>Removes the last element in the list and returns the list.</td>
- <td><code><span class="keyword">var</span> aa <span class="operator">=</span> list<span class="special">(</span><span class="literal">1</span>, <span class="literal">2</span>, <span class="literal">3</span><span class="special">)</span></code><br/><code>aa.pop<span class="special">(</span><span class="special">)</span> <span class="comment">// [1, 2]</span></code></td>
+ <td><code><span class="keyword">var</span> aa <span class="operator">=</span> list<span class="special">(</span><span class="literal">1</span>, <span class="literal">2</span>, <span class="literal">3</span><span class="special">)</span></code><br/><code>aa.<span class="keyword">pop</span><span class="special">(</span><span class="special">)</span> <span class="comment">// [1, 2]</span></code></td>
</tr>
<tr>
<td><code>insert<span class="special">(</span>position, value<span class="special">)</span></code></td>
@@ 2147,7 2280,7 @@ aa
<pre><code><span class="keyword">const</span> aa <span class="operator">=</span> list<span class="special">(</span><span class="literal">1</span>, <span class="literal">2</span>, <span class="literal">3</span><span class="special">)</span>
<span class="keyword">var</span> bb <span class="operator">=</span> aa
-bb.pop<span class="special">(</span><span class="special">)</span>
+bb.<span class="keyword">pop</span><span class="special">(</span><span class="special">)</span>
<span class="function">print</span><span class="special">(</span><span class="literal">"{} {}\n"</span>, aa, bb<span class="special">)</span> <span class="comment">// [1, 2, 3] [1, 2]</span>
</code></pre>
M jasm/website/site/index.html +15 -4
@@ 4,7 4,8 @@
<title>jAsm</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="description" content="This is the official home for the jAsm assembler.">
- <meta name="keywords" content="jAsm, 6502, z80, assembler, asm, cross-assembler">
+ <meta name="keywords" content="jAsm,6502,z80,assembler,asm,cross-assembler">
+ <meta name="author" content="Jonas Hultén">
<link rel="shortcut icon" href="images/favicon.ico">
<style>
@@ 80,10 81,10 @@
<h1>The Binaries</h1>
<ul>
<li>
- <a href="binaries/jasm_1.24_linux64.7z">jAsm 1.24 for 64-bit Linux</a>
+ <a href="binaries/jasm_1.25_linux64.7z">jAsm 1.25 for 64-bit Linux</a>
</li>
<li>
- <a href="binaries/jasm_1.24_win64.7z">jAsm 1.24 for 64-bit Windows</a>
+ <a href="binaries/jasm_1.25_win64.7z">jAsm 1.25 for 64-bit Windows</a>
</li>
</ul>
<h1>The Source</h1>
@@ 91,13 92,23 @@
As a tribute to Matthew Dillon, who created <a href="http://dasm-dillon.sourceforge.net/">DAsm</a> and released the source code to the public, I am releasing the source code to jAsm as well. I have used DAsm for many years and I have made smaller adjustments to suit my needs over the years. That wouldn't have been possible without access to the source code. I am very grateful and I am giving back to the community by releasing the source for free.
</p>
<p>
- The source code is available on BitBucket at <a href="https://bitbucket.org/bjonte/jasm">https://bitbucket.org/bjonte/jasm</a>.
+ The source code is available on SourceHut at <a href="https://hg.sr.ht/~bjonte/jasm">https://hg.sr.ht/~bjonte/jasm</a>.
</p>
</div>
<div class="section even">
<h1>Version History</h1>
<ul>
<li>
+ 1.25
+ <ul>
+ <li>Fixed a serious memory overwrite bug when multibyte characters are used in the source code.</li>
+ <li>Unified all processors into one executable. The processor keyword is used to select processor.</li>
+ <li>Include and incbin has moved from the tokenizer pass to the assemble pass to allow constructing include filenames programmatically.</li>
+ <li>Added optional hex output.</li>
+ <li>Project files for CodeBlocks and Visual Studio has been removed.</li>
+ </ul>
+ </li>
+ <li>
1.24
<ul>
<li>Added pseudo instructions for 6502 and Z80.</li>
M release.py +4 -12
@@ 50,12 50,8 @@ def build_mingw(version):
# create an archive and add the relevant files
print(" compressing")
os.makedirs("../jasm/website/site/binaries", exist_ok=True)
- os.chdir("jasm-6502")
- error_code = run(["7z", "a", "../../jasm/website/site/binaries/jasm_%s_win64.7z" % version, "jasm-6502.exe"])
- if error_code != 0:
- raise Exception("Failed to compress")
- os.chdir("../jasm-z80")
- error_code = run(["7z", "a", "../../jasm/website/site/binaries/jasm_%s_win64.7z" % version, "jasm-z80.exe"])
+ os.chdir("jasm")
+ error_code = run(["7z", "a", "../../jasm/website/site/binaries/jasm_%s_win64.7z" % version, "jasm.exe"])
if error_code != 0:
raise Exception("Failed to compress")
os.chdir("..")
@@ 95,12 91,8 @@ def build_linux(version):
# create an archive and add the relevant files
print(" compressing")
os.makedirs("../jasm/website/site/binaries", exist_ok=True)
- os.chdir("jasm-6502")
- error_code = run(["7z", "a", "../../jasm/website/site/binaries/jasm_%s_linux64.7z" % version, "jasm-6502"])
- if error_code != 0:
- raise Exception("Failed to compress")
- os.chdir("../jasm-z80")
- error_code = run(["7z", "u", "../../jasm/website/site/binaries/jasm_%s_linux64.7z" % version, "jasm-z80"])
+ os.chdir("jasm")
+ error_code = run(["7z", "a", "../../jasm/website/site/binaries/jasm_%s_linux64.7z" % version, "jasm"])
if error_code != 0:
raise Exception("Failed to compress")
finally:
M sublime/m6502/jAsm.sublime-syntax +1 -1
@@ 149,7 149,7 @@ contexts:
push: parenthesis
- match: \b(byte|word|long)\b
scope: storage.jasm
- - match: \b(address|align|basic|const|declare|define|dynamic|elif|else|enum|export|fill|for|function|if|import|incbin|include|macro|module|namespace|optimize|reserve|return|section|struct|subroutine|using|var)\b
+ - match: \b(address|align|basic|const|declare|define|dynamic|elif|else|enum|export|fill|for|function|if|import|incbin|include|macro|module|namespace|optimize|part|pop|processor|reserve|return|section|struct|subroutine|using|var)\b
scope: keyword.control.jasm
- match: \b(bss|code|mapping|part)\b
scope: keyword.control.jasm
M sublime/z80/jAsm.sublime-syntax +1 -1
@@ 151,7 151,7 @@ contexts:
scope: storage.jasm
- match: \b(a|b|c|d|e|h|l|i|r|bc|de|hl|sp|ix|iy|af|af\'|m|nc|nz|p|po|pe|z)\b
scope: support.class.jasm
- - match: \b(address|align|basic|const|declare|define|dynamic|elif|else|enum|export|fill|for|function|if|import|incbin|include|macro|module|namespace|optimize|reserve|return|section|struct|subroutine|using|var)\b
+ - match: \b(address|align|basic|const|declare|define|dynamic|elif|else|enum|export|fill|for|function|if|import|incbin|include|macro|module|namespace|optimize|part|pop|processor|reserve|return|section|struct|subroutine|using|var)\b
scope: keyword.control.jasm
- match: \b(bss|code|mapping|part)\b
scope: keyword.control.jasm
M jasm-6502/convert_6502_keyword_case.py => tools/convert_6502_keyword_case.py +0 -0
M jasm-z80/convert_z80_keyword_case.py => tools/convert_z80_keyword_case.py +0 -0