From 8f3b2dce5398434227c710f08ce7b719b240ac29 Mon Sep 17 00:00:00 2001 From: Spartan322 Date: Wed, 9 Apr 2025 06:04:01 -0400 Subject: [PATCH] Add MSVC support --- .github/workflows/ci.yml | 50 ++++ include/lauf/config.h | 18 +- include/lauf/runtime/builtin.h | 2 + include/lauf/runtime/memory.h | 1 - include/lauf/runtime/value.h | 7 +- src/CMakeLists.txt | 20 +- src/lauf/asm/builder.cpp | 6 +- src/lauf/asm/builder.hpp | 4 +- src/lauf/asm/instruction.hpp | 109 +++++++- src/lauf/backend/dump.cpp | 48 ++-- src/lauf/backend/qbe.cpp | 52 ++-- src/lauf/compiler_instrinsics.hpp | 374 ++++++++++++++++++++++++++++ src/lauf/lib/debug.cpp | 2 +- src/lauf/lib/int.cpp | 85 +++---- src/lauf/lib/memory.cpp | 3 +- src/lauf/runtime/process.cpp | 9 +- src/lauf/runtime/value.cpp | 3 +- src/lauf/support/align.hpp | 6 +- src/lauf/support/array_list.hpp | 5 +- src/lauf/support/page_allocator.cpp | 39 ++- src/lauf/vm_execute.cpp | 44 ++-- src/lauf/vm_execute.hpp | 11 +- src/lauf/writer.cpp | 6 +- src/lauf/writer.hpp | 6 +- tests/integration/CMakeLists.txt | 47 ++-- tests/integration/runtime.c | 10 + tests/lauf/asm/builder.cpp | 113 +++++---- tests/lauf/reader.cpp | 2 +- 28 files changed, 841 insertions(+), 241 deletions(-) create mode 100644 src/lauf/compiler_instrinsics.hpp diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 49994607..64eb666f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,3 +37,53 @@ jobs: working-directory: build/ run: ctest --output-on-failure + windows: + strategy: + fail-fast: false + matrix: + build_type: [Debug, Release] + + runs-on: windows-2022 + + steps: + - uses: actions/checkout@v2 + - name: Create Build Environment + run: cmake -E make_directory build + + - name: Install Chocolatey and YASM + uses: crazy-max/ghaction-chocolatey@v3 + with: + args: install yasm -y + - name: Verify YASM Installation + run: yasm --version + + - name: Set up MSYS2 + uses: msys2/setup-msys2@v2 + with: + msystem: MINGW64 + update: true + install: make mingw-w64-x86_64-gcc mingw-w64-x86_64-gdb + - name: Download QBE + shell: msys2 {0} + run: curl https://c9x.me/compile/release/qbe-1.0.tar.xz -o qbe.tar.xz && tar xf qbe.tar.xz + - name: Build QBE + working-directory: qbe-1.0/ + shell: msys2 {0} + run: | + make CC="cc" CFLAGS="-std=c99 -g -Wall -Wextra -Wpedantic -O2 -static" + mv qbe $GITHUB_WORKSPACE/qbe.exe + echo "$GITHUB_WORKSPACE" >> "$GITHUB_PATH" + - name: Verify QBE Outside MSYS2 + run: qbe.exe -h + + - name: Configure + shell: bash + working-directory: build/ + run: cmake $GITHUB_WORKSPACE -G"Visual Studio 17 2022" + - name: Build + working-directory: build/ + run: cmake --build . --config ${{matrix.build_type}} + - name: Test + working-directory: build/ + run: ctest -C ${{matrix.build_type}} --output-on-failure + diff --git a/include/lauf/config.h b/include/lauf/config.h index ce324cf2..179ef039 100644 --- a/include/lauf/config.h +++ b/include/lauf/config.h @@ -43,6 +43,9 @@ typedef uint64_t lauf_uint; # if defined(__has_cpp_attribute) # if __has_cpp_attribute(clang::musttail) # define LAUF_TAIL_CALL [[clang::musttail]] +# ifndef LAUF_HAS_TAIL_CALL_ELIMINATION +# define LAUF_HAS_TAIL_CALL_ELIMINATION 1 +# endif # elif defined(__clang__) # define LAUF_TAIL_CALL [[clang::musttail]] # else @@ -52,6 +55,13 @@ typedef uint64_t lauf_uint; # define LAUF_NOINLINE [[gnu::noinline]] # define LAUF_FORCE_INLINE [[gnu::always_inline]] inline # define LAUF_UNREACHABLE __builtin_unreachable() +#elif defined(_MSC_VER) +# define LAUF_LIKELY(Cond) (Cond) +# define LAUF_UNLIKELY(Cond) (Cond) +# define LAUF_TAIL_CALL +# define LAUF_NOINLINE __declspec(noinline) +# define LAUF_FORCE_INLINE __forceinline +# define LAUF_UNREACHABLE __assume(0) #endif //=== configurations ===// @@ -60,7 +70,7 @@ typedef uint64_t lauf_uint; #endif #ifndef LAUF_HAS_TAIL_CALL_ELIMINATION -# define LAUF_HAS_TAIL_CALL_ELIMINATION 1 +# define LAUF_HAS_TAIL_CALL_ELIMINATION 0 #endif //=== warnings ===// @@ -70,6 +80,12 @@ typedef uint64_t lauf_uint; _Pragma("GCC diagnostic ignored \"-Wconversion\""); \ __VA_ARGS__; \ _Pragma("GCC diagnostic pop") +#elif defined(_MSC_VER) +# define LAUF_BITFIELD_CONVERSION(...) \ + _Pragma("warning(push)"); \ + _Pragma("warning(disable : 4267)"); \ + __VA_ARGS__; \ + _Pragma("warning(pop)") #else # define LAUF_BITFIELD_CONVERSION(...) __VA_ARGS__ #endif diff --git a/include/lauf/runtime/builtin.h b/include/lauf/runtime/builtin.h index 5841367e..b60f5b87 100644 --- a/include/lauf/runtime/builtin.h +++ b/include/lauf/runtime/builtin.h @@ -16,6 +16,8 @@ LAUF_HEADER_START # endif #elif defined(__GNUC__) || defined(__GNUG__) # define LAUF_RUNTIME_BUILTIN_IMPL __attribute__((section(".text.lauf_builtin"), aligned(8))) +#elif defined(_MSC_VER) +# define LAUF_RUNTIME_BUILTIN_IMPL __declspec(code_seg(".text.lauf_builtin")) #else # define LAUF_RUNTIME_BUILTIN_IMPL #endif diff --git a/include/lauf/runtime/memory.h b/include/lauf/runtime/memory.h index defd1566..23bf4acd 100644 --- a/include/lauf/runtime/memory.h +++ b/include/lauf/runtime/memory.h @@ -162,4 +162,3 @@ bool lauf_runtime_undeclare_weak(lauf_runtime_process* p, lauf_runtime_address a LAUF_HEADER_END #endif // LAUF_RUNTIME_MEMORY_H_INCLUDED - diff --git a/include/lauf/runtime/value.h b/include/lauf/runtime/value.h index eb0e2aed..3ec026a1 100644 --- a/include/lauf/runtime/value.h +++ b/include/lauf/runtime/value.h @@ -14,9 +14,9 @@ typedef struct lauf_runtime_address // acess to allocation is an AND, acess to offset a SHIFT, access to generation SHIFT + AND // (which is the one only necessary for checks). In addition, treating it as an integer and e.g. // incrementing it changes allocation first, not offset. That way, bugs are caught earlier. - uint64_t allocation : 30; - uint64_t generation : 2; - uint64_t offset : 32; + uint32_t allocation : 30; + uint32_t generation : 2; + uint32_t offset; } lauf_runtime_address; static const lauf_runtime_address lauf_runtime_address_null = {0x3FFFFFFF, 0x3, 0xFFFFFFFF}; @@ -43,4 +43,3 @@ typedef union lauf_runtime_value LAUF_HEADER_END #endif // LAUF_RUNTIME_VALUE_H_INCLUDED - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c849d42b..801344f9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -34,18 +34,28 @@ target_include_directories(lauf_core SYSTEM INTERFACE ../include) target_include_directories(lauf_core PRIVATE ../include .) target_link_libraries(lauf_core PRIVATE lauf_warnings foonathan::lexy) -if(NOT LAUF_DISPATCH_JUMP_TABLE) +if(MSVC) + if(NOT LAUF_DISPATCH_JUMP_TABLE) + messsage(WARNING "MSVC does not support disabling of jump tables") + endif() +elseif(NOT LAUF_DISPATCH_JUMP_TABLE) target_compile_definitions(lauf_core PUBLIC LAUF_CONFIG_DISPATCH_JUMP_TABLE=0) target_compile_options(lauf_core PRIVATE -fno-jump-tables) endif() # Since we're using tail calls for dispatching, we don't want to add frame pointers, ever. # They would record all previously executed instructions in the call stack. -target_compile_options(lauf_core PRIVATE -fomit-frame-pointer) +if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") + target_compile_options(lauf_core PRIVATE -fomit-frame-pointer) +elseif(MSVC) + target_compile_options(lauf_core PRIVATE /Oy) +endif() if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") - if(CMAKE_BUILD_TYPE STREQUAL "Debug") - target_compile_definitions(lauf_core PUBLIC LAUF_HAS_TAIL_CALL_ELIMINATION=0) - endif() + # Lacking musttail, GCC will not optimize tail calls without optimizations enabled + target_compile_definitions(lauf_core PUBLIC "LAUF_HAS_TAIL_CALL_ELIMINATION=$>") +elseif(MSVC) + # MSVC does not appear capable of properly optimizing tail calls + target_compile_definitions(lauf_core PUBLIC "LAUF_HAS_TAIL_CALL_ELIMINATION=0") endif() target_sources(lauf_core PUBLIC diff --git a/src/lauf/asm/builder.cpp b/src/lauf/asm/builder.cpp index ec3f970b..f4dd64bc 100644 --- a/src/lauf/asm/builder.cpp +++ b/src/lauf/asm/builder.cpp @@ -373,7 +373,7 @@ LAUF_NOINLINE lauf_asm_inst* emit_body(lauf_asm_inst* ip, lauf_asm_builder* b, assert(insts[dest->offset].op() == lauf::asm_op::block); auto dest_offset = dest->offset + 1; - LAUF_BITFIELD_CONVERSION(jump->jump.offset = std::int32_t(dest_offset - cur_offset)); + jump->jump.offset(std::int32_t(dest_offset - cur_offset)); } return ip; @@ -459,7 +459,7 @@ void emit_debug_location(lauf_asm_builder* b) bool lauf_asm_build_finish(lauf_asm_builder* b) { - constexpr auto context = LAUF_BUILD_ASSERT_CONTEXT; + static constexpr auto context = LAUF_BUILD_ASSERT_CONTEXT; auto insts = [&] { auto inst_count = estimate_inst_count(context, b); @@ -673,7 +673,7 @@ const lauf_asm_block* lauf_asm_inst_branch(lauf_asm_builder* b, const lauf_asm_b else if (!b->cur->insts.empty() && b->cur->insts.back().op() == lauf::asm_op::cc) { // Remove the cc instruction. - auto cc = b->cur->insts.back().cc.value; + auto cc = b->cur->insts.back().cc.value(); b->cur->insts.pop_back(); switch (cc) diff --git a/src/lauf/asm/builder.hpp b/src/lauf/asm/builder.hpp index fca9a9a8..151b48da 100644 --- a/src/lauf/asm/builder.hpp +++ b/src/lauf/asm/builder.hpp @@ -283,7 +283,7 @@ struct lauf_asm_builder : lauf::intrinsic_arena [&](const char* context, std::ptrdiff_t offset) { \ lauf_asm_inst result; \ LAUF_BITFIELD_CONVERSION(result.Name = {lauf::asm_op::Name, std::int32_t(offset)}); \ - if (result.Name.offset != offset) \ + if (result.Name.offset() != offset) \ b->error(context, "offset too big"); \ return result; \ }(LAUF_BUILD_ASSERT_CONTEXT, static_cast(Offset)) @@ -316,7 +316,7 @@ struct lauf_asm_builder : lauf::intrinsic_arena [&](const char* context, std::size_t value) { \ lauf_asm_inst result; \ LAUF_BITFIELD_CONVERSION(result.Name = {lauf::asm_op::Name, std::uint32_t(value)}); \ - if (value != result.Name.value) \ + if (value != result.Name.value()) \ b->error(context, "invalid value"); \ return result; \ }(LAUF_BUILD_ASSERT_CONTEXT, Value) diff --git a/src/lauf/asm/instruction.hpp b/src/lauf/asm/instruction.hpp index 2fbe3b97..5502d714 100644 --- a/src/lauf/asm/instruction.hpp +++ b/src/lauf/asm/instruction.hpp @@ -48,25 +48,60 @@ struct asm_inst_none asm_op op; }; +#pragma pack(1) struct asm_inst_offset { - asm_op op : 8; - std::int32_t offset : 24; + asm_op op : 8; + + constexpr std::int32_t offset() const + { +#if _MSC_VER + return (static_cast(static_cast(_offset[2])) << 16) | (_offset[1] << 8) + | _offset[0]; + +#else + return _offset; +#endif + } + constexpr std::int32_t offset(std::int32_t value) + { +#if _MSC_VER + _offset[0] = static_cast(value & 0xFF); + _offset[1] = static_cast((value >> 8) & 0xFF); + _offset[2] = static_cast((value >> 16) & 0xFF); + return value; +#else + LAUF_BITFIELD_CONVERSION(return _offset = value); +#endif + } + + asm_inst_offset() = default; + constexpr asm_inst_offset(asm_op op, std::int32_t offset) : op(op), _offset{} + { + this->offset(offset); + } + +private: +#if _MSC_VER + std::uint8_t _offset[3]; +#else + std::int32_t _offset : 24; +#endif }; +#pragma pack() template std::ptrdiff_t compress_pointer_offset(CurType* _cur, DestType* _dest) { - auto cur = (void*)(_cur); - auto dest = (void*)(_dest); - assert(is_aligned(cur, alignof(void*)) && is_aligned(dest, alignof(void*))); - return (void**)dest - (void**)cur; + auto cur = (char*)(_cur); + auto dest = (char*)(_dest); + return _dest ? dest - cur : 0; } template const DestType* uncompress_pointer_offset(CurType* cur, std::ptrdiff_t offset) { - return (const DestType*)(reinterpret_cast(cur) + offset); + return (DestType*)(reinterpret_cast(cur) + offset); } struct asm_inst_signature @@ -89,11 +124,66 @@ struct asm_inst_layout } }; +#pragma pack(1) struct asm_inst_value { - asm_op op : 8; - std::uint32_t value : 24; + asm_op op : 8; + + constexpr std::uint32_t value() const + { +#if _MSC_VER + if (__builtin_is_constant_evaluated()) + { + std::uint8_t array[sizeof(std::uint32_t)]{_value[0], _value[1], _value[2]}; + return __builtin_bit_cast(std::uint32_t, array); + } + else + { + return *reinterpret_cast(_value) + | (*reinterpret_cast(_value + 1) << 8); + } +#else + return _value; +#endif + } + constexpr std::uint32_t value(std::uint32_t value) + { +#if _MSC_VER + if (__builtin_is_constant_evaluated()) + { + struct array_type + { + std::uint8_t array[sizeof(value)]; + }; + auto array = __builtin_bit_cast(array_type, value); + _value[0] = array.array[0]; + _value[1] = array.array[1]; + _value[2] = array.array[2]; + return value; + } + else + { + return *reinterpret_cast(_value) = value; + } +#else + LAUF_BITFIELD_CONVERSION(return _value = value); +#endif + } + + asm_inst_value() = default; + constexpr asm_inst_value(asm_op op, std::uint32_t value) : op(op), _value{} + { + this->value(value); + } + +private: +#if _MSC_VER + std::uint8_t _value[3]; +#else + std::uint32_t _value : 24; +#endif }; +#pragma pack() struct asm_inst_stack_idx { @@ -127,4 +217,3 @@ union lauf_asm_inst }; #endif // SRC_LAUF_ASM_INSTRUCTION_HPP_INCLUDED - diff --git a/src/lauf/backend/dump.cpp b/src/lauf/backend/dump.cpp index f98b5bc0..43f39690 100644 --- a/src/lauf/backend/dump.cpp +++ b/src/lauf/backend/dump.cpp @@ -141,28 +141,28 @@ void dump_function(lauf_writer* writer, lauf_backend_dump_options opts, const la writer->write("return"); break; case lauf::asm_op::return_free: - writer->format("return_free %d", ip->return_free.value); + writer->format("return_free %d", ip->return_free.value()); break; case lauf::asm_op::jump: - writer->format("jump <%04zx>", ip + ip->jump.offset - fn->insts); + writer->format("jump <%04zx>", ip + ip->jump.offset() - fn->insts); break; case lauf::asm_op::branch_eq: - writer->format("branch.eq <%04zx>", ip + ip->branch_eq.offset - fn->insts); + writer->format("branch.eq <%04zx>", ip + ip->branch_eq.offset() - fn->insts); break; case lauf::asm_op::branch_ne: - writer->format("branch.ne <%04zx>", ip + ip->branch_ne.offset - fn->insts); + writer->format("branch.ne <%04zx>", ip + ip->branch_ne.offset() - fn->insts); break; case lauf::asm_op::branch_lt: - writer->format("branch.lt <%04zx>", ip + ip->branch_lt.offset - fn->insts); + writer->format("branch.lt <%04zx>", ip + ip->branch_lt.offset() - fn->insts); break; case lauf::asm_op::branch_le: - writer->format("branch.le <%04zx>", ip + ip->branch_le.offset - fn->insts); + writer->format("branch.le <%04zx>", ip + ip->branch_le.offset() - fn->insts); break; case lauf::asm_op::branch_ge: - writer->format("branch.ge <%04zx>", ip + ip->branch_ge.offset - fn->insts); + writer->format("branch.ge <%04zx>", ip + ip->branch_ge.offset() - fn->insts); break; case lauf::asm_op::branch_gt: - writer->format("branch.gt <%04zx>", ip + ip->branch_gt.offset - fn->insts); + writer->format("branch.gt <%04zx>", ip + ip->branch_gt.offset() - fn->insts); break; case lauf::asm_op::panic: writer->write("panic"); @@ -175,7 +175,7 @@ void dump_function(lauf_writer* writer, lauf_backend_dump_options opts, const la break; case lauf::asm_op::call: { - auto callee = lauf::uncompress_pointer_offset(fn, ip->call.offset); + auto callee = lauf::uncompress_pointer_offset(fn, ip->call.offset()); writer->format("call @'%s'", callee->name); break; } @@ -186,7 +186,7 @@ void dump_function(lauf_writer* writer, lauf_backend_dump_options opts, const la case lauf::asm_op::call_builtin: case lauf::asm_op::call_builtin_no_regs: { auto callee = lauf::uncompress_pointer_offset // - (&lauf_runtime_builtin_dispatch, ip->call_builtin.offset); + (&lauf_runtime_builtin_dispatch, ip->call_builtin.offset()); if (auto name = find_builtin_name(opts, callee); !name.empty()) writer->format("$'%s'", name.c_str()); else @@ -213,23 +213,24 @@ void dump_function(lauf_writer* writer, lauf_backend_dump_options opts, const la break; case lauf::asm_op::push: - writer->format("push 0x%X", ip->push.value); + writer->format("push 0x%X", ip->push.value()); break; case lauf::asm_op::push2: - writer->format("push2 0x%X", ip->push2.value); + writer->format("push2 0x%X", ip->push2.value()); break; case lauf::asm_op::push3: - writer->format("push3 0x%X", ip->push3.value); + writer->format("push3 0x%X", ip->push3.value()); break; case lauf::asm_op::pushn: - writer->format("pushn 0x%X", ip->pushn.value); + writer->format("pushn 0x%X", ip->pushn.value()); break; case lauf::asm_op::global_addr: { - writer->format("global_addr @%s", find_global_name(mod, ip->global_addr.value).c_str()); + writer->format("global_addr @%s", + find_global_name(mod, ip->global_addr.value()).c_str()); break; } case lauf::asm_op::function_addr: { - auto callee = lauf::uncompress_pointer_offset(fn, ip->call.offset); + auto callee = lauf::uncompress_pointer_offset(fn, ip->call.offset()); writer->format("function_addr @'%s'", callee->name); break; } @@ -239,7 +240,7 @@ void dump_function(lauf_writer* writer, lauf_backend_dump_options opts, const la break; } case lauf::asm_op::cc: { - switch (lauf_asm_inst_condition_code(ip->cc.value)) + switch (lauf_asm_inst_condition_code(ip->cc.value())) { case LAUF_ASM_INST_CC_EQ: writer->write("cc eq"); @@ -280,7 +281,7 @@ void dump_function(lauf_writer* writer, lauf_backend_dump_options opts, const la break; case lauf::asm_op::setup_local_alloc: - writer->format("setup_local_alloc %u", ip->setup_local_alloc.value); + writer->format("setup_local_alloc %u", ip->setup_local_alloc.value()); break; case lauf::asm_op::local_alloc: writer->format("local_alloc (%u, %zu)", ip->local_alloc.size, @@ -291,7 +292,7 @@ void dump_function(lauf_writer* writer, lauf_backend_dump_options opts, const la ip->local_alloc_aligned.alignment()); break; case lauf::asm_op::local_storage: - writer->format("local_storage (%u, 8)", ip->local_storage.value); + writer->format("local_storage (%u, 8)", ip->local_storage.value()); break; case lauf::asm_op::deref_const: writer->format("deref_const (%u, %zu)", ip->deref_const.size, @@ -301,10 +302,10 @@ void dump_function(lauf_writer* writer, lauf_backend_dump_options opts, const la writer->format("deref_mut (%u, %zu)", ip->deref_mut.size, ip->deref_mut.alignment()); break; case lauf::asm_op::array_element: - writer->format("array_element [%u]", ip->array_element.value); + writer->format("array_element [%u]", ip->array_element.value()); break; case lauf::asm_op::aggregate_member: - writer->format("aggregate_member %u", ip->aggregate_member.value); + writer->format("aggregate_member %u", ip->aggregate_member.value()); break; case lauf::asm_op::load_local_value: writer->format("load_local_value %u <%zx>", ip->load_local_value.index, @@ -316,11 +317,11 @@ void dump_function(lauf_writer* writer, lauf_backend_dump_options opts, const la break; case lauf::asm_op::load_global_value: writer->format("load_global_value @%s", - find_global_name(mod, ip->load_global_value.value).c_str()); + find_global_name(mod, ip->load_global_value.value()).c_str()); break; case lauf::asm_op::store_global_value: writer->format("store_global_value @%s", - find_global_name(mod, ip->store_global_value.value).c_str()); + find_global_name(mod, ip->store_global_value.value()).c_str()); break; case lauf::asm_op::count: @@ -369,4 +370,3 @@ void lauf_backend_dump_chunk(lauf_writer* writer, lauf_backend_dump_options opti dump_module_header(writer, mod); dump_function(writer, options, mod, chunk->fn); } - diff --git a/src/lauf/backend/qbe.cpp b/src/lauf/backend/qbe.cpp index 07c535eb..61e712cc 100644 --- a/src/lauf/backend/qbe.cpp +++ b/src/lauf/backend/qbe.cpp @@ -182,35 +182,35 @@ void codegen_function(lauf::qbe_writer& writer, const lauf_backend_qbe_options& break; case lauf::asm_op::jump: - writer.jmp(block_id(ip + ip->jump.offset)); + writer.jmp(block_id(ip + ip->jump.offset())); break; case lauf::asm_op::branch_eq: - writer.jnz(pop_reg(), block_id(ip + 1), block_id(ip + ip->branch_eq.offset)); + writer.jnz(pop_reg(), block_id(ip + 1), block_id(ip + ip->branch_eq.offset())); break; case lauf::asm_op::branch_ne: - writer.jnz(pop_reg(), block_id(ip + ip->branch_ne.offset), block_id(ip + 1)); + writer.jnz(pop_reg(), block_id(ip + ip->branch_ne.offset()), block_id(ip + 1)); break; case lauf::asm_op::branch_lt: writer.comparison(lauf::qbe_reg::tmp, lauf::qbe_cc::slt, lauf::qbe_type::value, pop_reg(), std::uintmax_t(0)); - writer.jnz(lauf::qbe_reg::tmp, block_id(ip + ip->branch_lt.offset), block_id(ip + 1)); + writer.jnz(lauf::qbe_reg::tmp, block_id(ip + ip->branch_lt.offset()), block_id(ip + 1)); break; case lauf::asm_op::branch_le: writer.comparison(lauf::qbe_reg::tmp, lauf::qbe_cc::sle, lauf::qbe_type::value, pop_reg(), std::uintmax_t(0)); - writer.jnz(lauf::qbe_reg::tmp, block_id(ip + ip->branch_le.offset), block_id(ip + 1)); + writer.jnz(lauf::qbe_reg::tmp, block_id(ip + ip->branch_le.offset()), block_id(ip + 1)); break; case lauf::asm_op::branch_ge: writer.comparison(lauf::qbe_reg::tmp, lauf::qbe_cc::sge, lauf::qbe_type::value, pop_reg(), std::uintmax_t(0)); - writer.jnz(lauf::qbe_reg::tmp, block_id(ip + ip->branch_ge.offset), block_id(ip + 1)); + writer.jnz(lauf::qbe_reg::tmp, block_id(ip + ip->branch_ge.offset()), block_id(ip + 1)); break; case lauf::asm_op::branch_gt: writer.comparison(lauf::qbe_reg::tmp, lauf::qbe_cc::sgt, lauf::qbe_type::value, pop_reg(), std::uintmax_t(0)); - writer.jnz(lauf::qbe_reg::tmp, block_id(ip + ip->branch_gt.offset), block_id(ip + 1)); + writer.jnz(lauf::qbe_reg::tmp, block_id(ip + ip->branch_gt.offset()), block_id(ip + 1)); break; case lauf::asm_op::panic: @@ -233,7 +233,7 @@ void codegen_function(lauf::qbe_writer& writer, const lauf_backend_qbe_options& } case lauf::asm_op::call: { - auto callee = lauf::uncompress_pointer_offset(fn, ip->call.offset); + auto callee = lauf::uncompress_pointer_offset(fn, ip->call.offset()); write_call(callee->name, callee->sig.input_count, callee->sig.output_count); break; } @@ -245,7 +245,7 @@ void codegen_function(lauf::qbe_writer& writer, const lauf_backend_qbe_options& case lauf::asm_op::call_builtin_no_regs: { assert(ip[1].op() == lauf::asm_op::call_builtin_sig); auto callee = lauf::uncompress_pointer_offset // - (&lauf_runtime_builtin_dispatch, ip->call_builtin.offset); + (&lauf_runtime_builtin_dispatch, ip->call_builtin.offset()); auto metadata = ip[1].call_builtin_sig; //=== VM directives ===// @@ -653,31 +653,31 @@ void codegen_function(lauf::qbe_writer& writer, const lauf_backend_qbe_options& break; case lauf::asm_op::push: { - auto value = std::uint64_t(ip->push.value); + auto value = std::uint64_t(ip->push.value()); if (ip[1].op() == lauf::asm_op::push2) { - value |= std::uint64_t(ip[1].push2.value) << 24; + value |= std::uint64_t(ip[1].push2.value()) << 24; if (ip[2].op() == lauf::asm_op::push3) - value |= std::uint64_t(ip[2].push3.value) << 48; + value |= std::uint64_t(ip[2].push3.value()) << 48; } else if (ip[1].op() == lauf::asm_op::push3) { - value |= std::uint64_t(ip[1].push3.value) << 48; + value |= std::uint64_t(ip[1].push3.value()) << 48; } writer.copy(push_reg(), lauf::qbe_type::value, value); break; } case lauf::asm_op::pushn: { - auto value = ~std::uint64_t(ip->pushn.value); + auto value = ~std::uint64_t(ip->pushn.value()); if (ip[1].op() == lauf::asm_op::push2) { - value |= std::uint64_t(ip[1].push2.value) << 24; + value |= std::uint64_t(ip[1].push2.value()) << 24; if (ip[2].op() == lauf::asm_op::push3) - value |= std::uint64_t(ip[2].push3.value) << 48; + value |= std::uint64_t(ip[2].push3.value()) << 48; } else if (ip[1].op() == lauf::asm_op::push3) { - value |= std::uint64_t(ip[1].push3.value) << 48; + value |= std::uint64_t(ip[1].push3.value()) << 48; } writer.copy(push_reg(), lauf::qbe_type::value, value); break; @@ -688,11 +688,12 @@ void codegen_function(lauf::qbe_writer& writer, const lauf_backend_qbe_options& break; case lauf::asm_op::global_addr: - writer.copy(push_reg(), lauf::qbe_type::value, lauf::qbe_data(ip->global_addr.value)); + writer.copy(push_reg(), lauf::qbe_type::value, lauf::qbe_data(ip->global_addr.value())); break; case lauf::asm_op::function_addr: { auto callee - = lauf::uncompress_pointer_offset(fn, ip->function_addr.offset); + = lauf::uncompress_pointer_offset(fn, + ip->function_addr.offset()); writer.copy(push_reg(), lauf::qbe_type::value, callee->name); break; } @@ -703,7 +704,7 @@ void codegen_function(lauf::qbe_writer& writer, const lauf_backend_qbe_options& case lauf::asm_op::cc: { auto top = pop_reg(); auto dest = push_reg(); - switch (lauf_asm_inst_condition_code(ip->cc.value)) + switch (lauf_asm_inst_condition_code(ip->cc.value())) { case LAUF_ASM_INST_CC_EQ: writer.comparison(dest, lauf::qbe_cc::ieq, lauf::qbe_type::value, top, @@ -805,20 +806,20 @@ void codegen_function(lauf::qbe_writer& writer, const lauf_backend_qbe_options& } break; case lauf::asm_op::local_storage: - writer.alloc8(next_alloc(), std::uintmax_t(ip->local_storage.value)); + writer.alloc8(next_alloc(), std::uintmax_t(ip->local_storage.value())); break; case lauf::asm_op::array_element: { auto index = pop_reg(); auto ptr = lauf::qbe_reg(vstack - 1); writer.binary_op(lauf::qbe_reg::tmp, lauf::qbe_type::value, "mul", - std::uintmax_t(ip->array_element.value), index); + std::uintmax_t(ip->array_element.value()), index); writer.binary_op(ptr, lauf::qbe_type::value, "add", ptr, lauf::qbe_reg::tmp); break; } case lauf::asm_op::aggregate_member: { auto ptr = lauf::qbe_reg(vstack - 1); writer.binary_op(ptr, lauf::qbe_type::value, "add", ptr, - std::uintmax_t(ip->aggregate_member.value)); + std::uintmax_t(ip->aggregate_member.value())); break; } case lauf::asm_op::deref_const: @@ -835,11 +836,11 @@ void codegen_function(lauf::qbe_writer& writer, const lauf_backend_qbe_options& break; case lauf::asm_op::load_global_value: writer.load(push_reg(), lauf::qbe_type::value, - lauf::qbe_data(ip->load_global_value.value)); + lauf::qbe_data(ip->load_global_value.value())); break; case lauf::asm_op::store_global_value: writer.store(lauf::qbe_type::value, pop_reg(), - lauf::qbe_data(ip->store_global_value.value)); + lauf::qbe_data(ip->store_global_value.value())); break; case lauf::asm_op::exit: @@ -866,4 +867,3 @@ void lauf_backend_qbe(lauf_writer* _writer, lauf_backend_qbe_options options, std::move(writer).finish(_writer); } - diff --git a/src/lauf/compiler_instrinsics.hpp b/src/lauf/compiler_instrinsics.hpp new file mode 100644 index 00000000..40891597 --- /dev/null +++ b/src/lauf/compiler_instrinsics.hpp @@ -0,0 +1,374 @@ +// Copyright (C) 2022-2023 Jonathan Müller and lauf contributors +// SPDX-License-Identifier: BSL-1.0 + +#ifndef SRC_LAUF_COMPILER_INTRINSICS_HPP_INCLUDED +#define SRC_LAUF_COMPILER_INTRINSICS_HPP_INCLUDED + +#include + +#if !defined(_MSC_VER) +template +LAUF_FORCE_INLINE static bool lauf_add_overflow(IntT a, IntT b, T* out) noexcept +{ + return __builtin_add_overflow(a, b, out); +} + +template +LAUF_FORCE_INLINE static bool lauf_sub_overflow(IntT a, IntT b, T* out) noexcept +{ + return __builtin_sub_overflow(a, b, out); +} + +template +LAUF_FORCE_INLINE static bool lauf_mul_overflow(IntT a, IntT b, T* out) noexcept +{ + return __builtin_mul_overflow(a, b, out); +} + +template +LAUF_FORCE_INLINE static int lauf_countr_zero(T a) noexcept +{ + return __builtin_ctzll(a); +} + +template +LAUF_FORCE_INLINE static constexpr int lauf_countr_zero_constexpr(const T val) noexcept +{ + return __builtin_ctzll(val); +} + +#else +# include +# include + +template +LAUF_FORCE_INLINE static bool lauf_add_overflow(IntT a, IntT b, T* out) +{ +# if defined(_M_IX86) || defined(_M_X64) + if constexpr (std::is_unsigned_v) + { + return _addcarry_u64(0, a, b, out); + } + else +# endif + { + *out = a + b; + return ((a ^ *out) & (b ^ *out)) < 0; + } +} + +template +LAUF_FORCE_INLINE static bool lauf_sub_overflow(IntT a, IntT b, T* out) +{ +# if defined(_M_IX86) || defined(_M_X64) + if constexpr (std::is_unsigned_v) + { + return _subborrow_u64(0, a, b, out); + } + else +# endif + { + *out = a - b; + return ((a ^ b) < 0) && ((a ^ *out) < 0); + } +} + +extern "C" +{ + extern int __isa_available; +} + +template +constexpr int lauf_digits = sizeof(T) * CHAR_BIT; + +template +constexpr int lauf_countl_zero_fallback(T val) noexcept +{ + T yx = 0; + + unsigned int nx = lauf_digits; + unsigned int cx = lauf_digits / 2; + do + { + yx = static_cast(val >> cx); + if (yx != 0) + { + nx -= cx; + val = yx; + } + cx >>= 1; + } while (cx != 0); + return static_cast(nx) - static_cast(val); +} + +# if (defined(_M_IX86) && !defined(_M_HYBRID_X86_ARM64)) \ + || (defined(_M_X64) && !defined(_M_ARM64EC)) +template +int lauf_countl_zero_lzcnt(const T val) noexcept +{ + constexpr int digits = lauf_digits; + + if constexpr (digits <= 16) + { + return static_cast(__lzcnt16(val) - (16 - digits)); + } + else if constexpr (digits == 32) + { + return static_cast(__lzcnt(val)); + } + else + { +# ifdef _M_IX86 + const unsigned int high = val >> 32; + const auto low = static_cast(val); + if (high == 0) + { + return 32 + lauf_countl_zero_lzcnt(low); + } + else + { + return lauf_countl_zero_lzcnt(high); + } +# else // ^^^ defined(_M_IX86) / !defined(_M_IX86) vvv + return static_cast(__lzcnt64(val)); +# endif // ^^^ !defined(_M_IX86) ^^^ + } +} + +template +int lauf_countl_zero_bsr(const T val) noexcept +{ + constexpr int digits = lauf_digits; + + unsigned long result; + if constexpr (digits <= 32) + { + if (!_BitScanReverse(&result, val)) + { + return digits; + } + } + else + { +# ifdef _M_IX86 + const unsigned int high = val >> 32; + if (_BitScanReverse(&result, high)) + { + return static_cast(31 - result); + } + + const auto low = static_cast(val); + if (!_BitScanReverse(&result, low)) + { + return digits; + } +# else // ^^^ defined(_M_IX86) / !defined(_M_IX86) vvv + if (!_BitScanReverse64(&result, val)) + { + return digits; + } +# endif // ^^^ !defined(_M_IX86) ^^^ + } + return static_cast(digits - 1 - result); +} + +template +int lauf_checked_x86_x64_countl_zero(const T val) noexcept +{ +# ifdef __AVX2__ + return lauf_countl_zero_lzcnt(val); +# else // ^^^ defined(__AVX2__) / !defined(__AVX2__) vvv + constexpr int _isa_available_avx2 = 5; + const bool has_lzcnt = __isa_available >= _isa_available_avx2; + if (has_lzcnt) + { + return lauf_countl_zero_lzcnt(val); + } + else + { + return lauf_countl_zero_bsr(val); + } +# endif // ^^^ !defined(__AVX2__) ^^^ +} +# endif // (defined(_M_IX86) && !defined(_M_HYBRID_X86_ARM64)) || (defined(_M_X64) && + // !defined(_M_ARM64EC)) + +# if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) || defined(_M_HYBRID_X86_ARM64) +template +int lauf_checked_arm_arm64_countl_zero(const T val) noexcept +{ + constexpr int digits = lauf_digits; + if (val == 0) + { + return digits; + } + + if constexpr (digits <= 32) + { + return static_cast(_CountLeadingZeros(val)) - (lauf_digits - digits); + } + else + { + return static_cast(_CountLeadingZeros64(val)); + } +} +# endif // defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) || + // defined(_M_HYBRID_X86_ARM64) + +template +constexpr int lauf_countl_zero(const T val) noexcept +{ +# if (defined(_M_IX86) && !defined(_M_HYBRID_X86_ARM64)) \ + || (defined(_M_X64) && !defined(_M_ARM64EC)) + return lauf_checked_x86_x64_countl_zero(val); +# elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) \ + || defined(_M_HYBRID_X86_ARM64) + return lauf_checked_arm_arm64_countl_zero(val); +# endif // defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) || + // defined(_M_HYBRID_X86_ARM64) + + return lauf_countl_zero_fallback(val); +} + +template +LAUF_FORCE_INLINE static bool lauf_mul_overflow(IntT a, IntT b, T* out) +{ + *out = a * b; + // This test isnt exact, but avoids doing integer division + return ((lauf_countl_zero(a) + lauf_countl_zero(b)) < 64); +} + +template +constexpr int lauf_countr_zero_fallback(const T val) noexcept +{ + constexpr int digits = lauf_digits; + return digits + - lauf_countl_zero_fallback( + static_cast(static_cast(~val) & static_cast(val - 1))); +} + +# if ((defined(_M_IX86) && !defined(_M_HYBRID_X86_ARM64)) \ + || (defined(_M_X64) && !defined(_M_ARM64EC))) \ + && !defined(_M_CEE_PURE) && !defined(__CUDACC__) +# define _LAUF_HAS_TZCNT_BSF_INTRINSICS 1 +# else // ^^^ intrinsics available / intrinsics unavailable vvv +# define _LAUF_HAS_TZCNT_BSF_INTRINSICS 0 +# endif // ^^^ intrinsics unavailable ^^^ + +# if _LAUF_HAS_TZCNT_BSF_INTRINSICS +template +int lauf_countr_zero_tzcnt(const T val) noexcept +{ + constexpr int digits = lauf_digits; + constexpr T max = static_cast(-1); // equal to (numeric_limits::max)() + + if constexpr (digits <= 32) + { + // Intended widening to int. This operation means that a narrow 0 will widen + // to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros + // of the wider type. + return static_cast(_tzcnt_u32(static_cast(~max | val))); + } + else + { +# ifdef _M_IX86 + const auto low = static_cast(val); + if (low == 0) + { + const unsigned int high = val >> 32; + return static_cast(32 + _tzcnt_u32(high)); + } + else + { + return static_cast(_tzcnt_u32(low)); + } +# else // ^^^ defined(_M_IX86) / !defined(_M_IX86) vvv + return static_cast(_tzcnt_u64(val)); +# endif // ^^^ !defined(_M_IX86) ^^^ + } +} + +template +int lauf_countr_zero_bsf(const T val) noexcept +{ + constexpr int digits = lauf_digits; + constexpr T max = static_cast(-1); // equal to (numeric_limits::max)() + + unsigned long result; + if constexpr (digits <= 32) + { + // Intended widening to int. This operation means that a narrow 0 will widen + // to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros + // of the wider type. + if (!_BitScanForward(&result, static_cast(~max | val))) + { + return digits; + } + } + else + { +# ifdef _M_IX86 + const auto low = static_cast(val); + if (_BitScanForward(&result, low)) + { + return static_cast(result); + } + + const unsigned int high = val >> 32; + if (!_BitScanForward(&result, high)) + { + return digits; + } + else + { + return static_cast(result + 32); + } +# else // ^^^ defined(_M_IX86) / !defined(_M_IX86) vvv + if (!_BitScanForward64(&result, val)) + { + return digits; + } +# endif // ^^^ !defined(_M_IX86) ^^^ + } + return static_cast(result); +} + +template +int lauf_checked_x86_x64_countr_zero(const T val) noexcept +{ +# ifdef __AVX2__ + return lauf_countr_zero_tzcnt(val); +# else // ^^^ defined(__AVX2__) / !defined(__AVX2__) vvv + constexpr int _isa_available_avx2 = 5; + const bool has_tzcnt = __isa_available >= _isa_available_avx2; + if (has_tzcnt) + { + return lauf_countr_zero_tzcnt(val); + } + else + { + return lauf_countr_zero_bsf(val); + } +# endif // ^^^ !defined(__AVX2__) ^^^ +} + +# endif // _LAUF_HAS_TZCNT_BSF_INTRINSICS + +template +LAUF_FORCE_INLINE static int lauf_countr_zero(const T val) noexcept +{ +# if _HAS_TZCNT_BSF_INTRINSICS + return lauf_checked_x86_x64_countr_zero(val); +# endif // _HAS_TZCNT_BSF_INTRINSICS + return lauf_countr_zero_fallback(val); +} + +template +LAUF_FORCE_INLINE static constexpr int lauf_countr_zero_constexpr(const T val) noexcept +{ + return lauf_countr_zero_fallback(val); +} + +#endif + +#endif // SRC_LAUF_COMPILER_INTRINSICS_HPP_INCLUDED \ No newline at end of file diff --git a/src/lauf/lib/debug.cpp b/src/lauf/lib/debug.cpp index 500c6842..a08166f8 100644 --- a/src/lauf/lib/debug.cpp +++ b/src/lauf/lib/debug.cpp @@ -53,7 +53,7 @@ void lauf::debug_print_cstack(lauf_runtime_process* process, const lauf_runtime_ else { auto addr = lauf_asm_get_instruction_index(fn, ip); - std::fprintf(stderr, " at <%04lx>\n", addr); + std::fprintf(stderr, " at <%04zx>\n", addr); } ++index; diff --git a/src/lauf/lib/int.cpp b/src/lauf/lib/int.cpp index 572e3746..e706c0cd 100644 --- a/src/lauf/lib/int.cpp +++ b/src/lauf/lib/int.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -37,21 +38,21 @@ namespace { LAUF_RUNTIME_BUILTIN(sadd_flag, 2, 2, no_panic_flags, "sadd_flag", nullptr) { - auto overflow = __builtin_add_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, - &vstack_ptr[1].as_sint); + auto overflow + = lauf_add_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); vstack_ptr[0].as_uint = overflow ? 1 : 0; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(sadd_wrap, 2, 1, no_panic_flags, "sadd_wrap", &sadd_flag) { - __builtin_add_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); + lauf_add_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); ++vstack_ptr; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(sadd_sat, 2, 1, no_panic_flags, "sadd_sat", &sadd_wrap) { - auto overflow = __builtin_add_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, - &vstack_ptr[1].as_sint); + auto overflow + = lauf_add_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); if (overflow) { if (vstack_ptr[0].as_sint < 0) @@ -64,8 +65,8 @@ LAUF_RUNTIME_BUILTIN(sadd_sat, 2, 1, no_panic_flags, "sadd_sat", &sadd_wrap) } LAUF_RUNTIME_BUILTIN(sadd_panic, 2, 1, panic_flags, "sadd_panic", &sadd_sat) { - auto overflow = __builtin_add_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, - &vstack_ptr[1].as_sint); + auto overflow + = lauf_add_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); if (overflow) LAUF_BUILTIN_RETURN(lauf_runtime_panic(process, "integer overflow")); ++vstack_ptr; @@ -79,21 +80,21 @@ namespace { LAUF_RUNTIME_BUILTIN(ssub_flag, 2, 2, no_panic_flags, "ssub_flag", &sadd_panic) { - auto overflow = __builtin_sub_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, - &vstack_ptr[1].as_sint); + auto overflow + = lauf_sub_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); vstack_ptr[0].as_uint = overflow ? 1 : 0; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(ssub_wrap, 2, 1, no_panic_flags, "ssub_wrap", &ssub_flag) { - __builtin_sub_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); + lauf_sub_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); ++vstack_ptr; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(ssub_sat, 2, 1, no_panic_flags, "ssub_sat", &ssub_wrap) { - auto overflow = __builtin_sub_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, - &vstack_ptr[1].as_sint); + auto overflow + = lauf_sub_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); if (overflow) { if (vstack_ptr[0].as_sint < 0) @@ -106,8 +107,8 @@ LAUF_RUNTIME_BUILTIN(ssub_sat, 2, 1, no_panic_flags, "ssub_sat", &ssub_wrap) } LAUF_RUNTIME_BUILTIN(ssub_panic, 2, 1, panic_flags, "ssub_panic", &ssub_sat) { - auto overflow = __builtin_sub_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, - &vstack_ptr[1].as_sint); + auto overflow + = lauf_sub_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); if (overflow) LAUF_BUILTIN_RETURN(lauf_runtime_panic(process, "integer overflow")); ++vstack_ptr; @@ -121,22 +122,22 @@ namespace { LAUF_RUNTIME_BUILTIN(smul_flag, 2, 2, no_panic_flags, "smul_flag", &ssub_panic) { - auto overflow = __builtin_mul_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, - &vstack_ptr[1].as_sint); + auto overflow + = lauf_mul_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); vstack_ptr[0].as_uint = overflow ? 1 : 0; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(smul_wrap, 2, 1, no_panic_flags, "smul_wrap", &smul_flag) { - __builtin_mul_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); + lauf_mul_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); ++vstack_ptr; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(smul_sat, 2, 1, no_panic_flags, "smul_sat", &smul_wrap) { auto different_signs = (vstack_ptr[1].as_sint < 0) != (vstack_ptr[0].as_sint < 0); - auto overflow = __builtin_mul_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, - &vstack_ptr[1].as_sint); + auto overflow + = lauf_mul_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); if (overflow) { if (different_signs) @@ -149,8 +150,8 @@ LAUF_RUNTIME_BUILTIN(smul_sat, 2, 1, no_panic_flags, "smul_sat", &smul_wrap) } LAUF_RUNTIME_BUILTIN(smul_panic, 2, 1, panic_flags, "smul_panic", &smul_sat) { - auto overflow = __builtin_mul_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, - &vstack_ptr[1].as_sint); + auto overflow + = lauf_mul_overflow(vstack_ptr[1].as_sint, vstack_ptr[0].as_sint, &vstack_ptr[1].as_sint); if (overflow) LAUF_BUILTIN_RETURN(lauf_runtime_panic(process, "integer overflow")); ++vstack_ptr; @@ -164,21 +165,21 @@ namespace { LAUF_RUNTIME_BUILTIN(uadd_flag, 2, 2, no_panic_flags, "uadd_flag", &smul_panic) { - auto overflow = __builtin_add_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, - &vstack_ptr[1].as_uint); + auto overflow + = lauf_add_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); vstack_ptr[0].as_uint = overflow ? 1 : 0; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(uadd_wrap, 2, 1, no_panic_flags, "uadd_wrap", &uadd_flag) { - __builtin_add_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); + lauf_add_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); ++vstack_ptr; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(uadd_sat, 2, 1, no_panic_flags, "uadd_sat", &uadd_wrap) { - auto overflow = __builtin_add_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, - &vstack_ptr[1].as_uint); + auto overflow + = lauf_add_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); if (overflow) vstack_ptr[1].as_uint = UINT64_MAX; ++vstack_ptr; @@ -186,8 +187,8 @@ LAUF_RUNTIME_BUILTIN(uadd_sat, 2, 1, no_panic_flags, "uadd_sat", &uadd_wrap) } LAUF_RUNTIME_BUILTIN(uadd_panic, 2, 1, panic_flags, "uadd_panic", &uadd_sat) { - auto overflow = __builtin_add_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, - &vstack_ptr[1].as_uint); + auto overflow + = lauf_add_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); if (overflow) LAUF_BUILTIN_RETURN(lauf_runtime_panic(process, "integer overflow")); ++vstack_ptr; @@ -201,21 +202,21 @@ namespace { LAUF_RUNTIME_BUILTIN(usub_flag, 2, 2, no_panic_flags, "usub_flag", &uadd_panic) { - auto overflow = __builtin_sub_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, - &vstack_ptr[1].as_uint); + auto overflow + = lauf_sub_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); vstack_ptr[0].as_uint = overflow ? 1 : 0; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(usub_wrap, 2, 1, no_panic_flags, "usub_wrap", &usub_flag) { - __builtin_sub_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); + lauf_sub_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); ++vstack_ptr; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(usub_sat, 2, 1, no_panic_flags, "usub_sat", &usub_wrap) { - auto overflow = __builtin_sub_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, - &vstack_ptr[1].as_uint); + auto overflow + = lauf_sub_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); if (overflow) vstack_ptr[1].as_uint = 0; ++vstack_ptr; @@ -223,8 +224,8 @@ LAUF_RUNTIME_BUILTIN(usub_sat, 2, 1, no_panic_flags, "usub_sat", &usub_wrap) } LAUF_RUNTIME_BUILTIN(usub_panic, 2, 1, panic_flags, "usub_panic", &usub_sat) { - auto overflow = __builtin_sub_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, - &vstack_ptr[1].as_uint); + auto overflow + = lauf_sub_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); if (overflow) LAUF_BUILTIN_RETURN(lauf_runtime_panic(process, "integer overflow")); ++vstack_ptr; @@ -238,21 +239,21 @@ namespace { LAUF_RUNTIME_BUILTIN(umul_flag, 2, 2, no_panic_flags, "umul_flag", &usub_panic) { - auto overflow = __builtin_mul_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, - &vstack_ptr[1].as_uint); + auto overflow + = lauf_mul_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); vstack_ptr[0].as_uint = overflow ? 1 : 0; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(umul_wrap, 2, 1, no_panic_flags, "umul_wrap", &umul_flag) { - __builtin_mul_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); + lauf_mul_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); ++vstack_ptr; LAUF_RUNTIME_BUILTIN_DISPATCH; } LAUF_RUNTIME_BUILTIN(umul_sat, 2, 1, no_panic_flags, "umul_sat", &umul_wrap) { - auto overflow = __builtin_mul_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, - &vstack_ptr[1].as_uint); + auto overflow + = lauf_mul_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); if (overflow) vstack_ptr[1].as_uint = UINT64_MAX; ++vstack_ptr; @@ -260,8 +261,8 @@ LAUF_RUNTIME_BUILTIN(umul_sat, 2, 1, no_panic_flags, "umul_sat", &umul_wrap) } LAUF_RUNTIME_BUILTIN(umul_panic, 2, 1, panic_flags, "umul_panic", &umul_sat) { - auto overflow = __builtin_mul_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, - &vstack_ptr[1].as_uint); + auto overflow + = lauf_mul_overflow(vstack_ptr[1].as_uint, vstack_ptr[0].as_uint, &vstack_ptr[1].as_uint); if (overflow) LAUF_BUILTIN_RETURN(lauf_runtime_panic(process, "integer overflow")); ++vstack_ptr; diff --git a/src/lauf/lib/memory.cpp b/src/lauf/lib/memory.cpp index 0fcb3c59..d4720b66 100644 --- a/src/lauf/lib/memory.cpp +++ b/src/lauf/lib/memory.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -95,7 +96,7 @@ namespace std::uint32_t addr_offset(lauf_runtime_address addr, lauf_sint offset) { lauf_sint result; - auto overflow = __builtin_add_overflow(lauf_sint(addr.offset), offset, &result); + auto overflow = lauf_add_overflow(lauf_sint(addr.offset), offset, &result); if (LAUF_UNLIKELY(overflow || result < 0 || result > UINT32_MAX)) result = UINT32_MAX; diff --git a/src/lauf/runtime/process.cpp b/src/lauf/runtime/process.cpp index 650212cd..a7057e2d 100644 --- a/src/lauf/runtime/process.cpp +++ b/src/lauf/runtime/process.cpp @@ -330,10 +330,11 @@ bool lauf_runtime_destroy_fiber(lauf_runtime_process* process, lauf_runtime_fibe for (auto frame_ptr = fiber->suspension_point.frame_ptr; frame_ptr != &fiber->trampoline_frame; frame_ptr = frame_ptr->prev) { - auto first_inst = frame_ptr->function->insts; - auto local_alloc_count = first_inst->op() == lauf::asm_op::setup_local_alloc - ? first_inst->setup_local_alloc.value - : 0u; + auto first_inst = frame_ptr->function->insts; + auto local_alloc_count + = first_inst->op() == lauf::asm_op::setup_local_alloc + ? static_cast(first_inst->setup_local_alloc.value()) + : 0u; for (auto i = 0u; i != local_alloc_count; ++i) { auto index = frame_ptr->first_local_alloc + i; diff --git a/src/lauf/runtime/value.cpp b/src/lauf/runtime/value.cpp index 6750ffb9..8476c9d3 100644 --- a/src/lauf/runtime/value.cpp +++ b/src/lauf/runtime/value.cpp @@ -3,5 +3,4 @@ #include -static_assert(sizeof(lauf_runtime_value) == sizeof(uint64_t)); - +static_assert(sizeof(lauf_runtime_value) == sizeof(uint64_t)); \ No newline at end of file diff --git a/src/lauf/support/align.hpp b/src/lauf/support/align.hpp index 99b84ac2..3b394f24 100644 --- a/src/lauf/support/align.hpp +++ b/src/lauf/support/align.hpp @@ -5,6 +5,9 @@ #define SRC_LAUF_SUPPORT_ALIGN_HPP_INCLUDED #include +#include +#include +#include #include namespace lauf @@ -17,7 +20,7 @@ constexpr bool is_valid_alignment(std::size_t alignment) noexcept constexpr std::uint8_t align_log2(std::size_t alignment) noexcept { assert(is_valid_alignment(alignment)); - return std::uint8_t(__builtin_ctzll(alignment)); + return std::uint8_t(lauf_countr_zero_constexpr(alignment)); } constexpr std::size_t align_offset(std::uintptr_t address, std::size_t alignment) noexcept @@ -49,4 +52,3 @@ constexpr std::size_t round_to_multiple_of_alignment(std::size_t size, } // namespace lauf #endif // SRC_LAUF_SUPPORT_ALIGN_HPP_INCLUDED - diff --git a/src/lauf/support/array_list.hpp b/src/lauf/support/array_list.hpp index 7d6b54d9..b30ac5ae 100644 --- a/src/lauf/support/array_list.hpp +++ b/src/lauf/support/array_list.hpp @@ -31,8 +31,8 @@ class array_list array_list() : _first_block(nullptr), _cur_block(nullptr), _next_idx(0), _block_count(0) {} // For simplicity for now. - array_list(const array_list&) = delete; - array_list& operator=(const array_list&) = delete; + array_list(const array_list&) = default; + array_list& operator=(const array_list&) = default; // Arena takes care of deallaction. ~array_list() = default; @@ -306,4 +306,3 @@ class array_list } // namespace lauf #endif // SRC_LAUF_SUPPORT_ARRAY_LIST_HPP_INCLUDED - diff --git a/src/lauf/support/page_allocator.cpp b/src/lauf/support/page_allocator.cpp index 8fb4b737..1433a32f 100644 --- a/src/lauf/support/page_allocator.cpp +++ b/src/lauf/support/page_allocator.cpp @@ -4,8 +4,19 @@ #include #include -#include -#include +#ifdef _WIN32 +# ifndef NOMINMAX +# define NOMINMAX +# endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include +# define MAP_FAILED ((void*)-1) +#else +# include +# include +#endif // #define LAUF_PAGE_ALLOCATOR_LOG #ifdef LAUF_PAGE_ALLOCATOR_LOG @@ -39,12 +50,18 @@ struct lauf::page_allocator::free_list_node namespace { const auto real_page_size = [] { +#ifdef _WIN32 + auto info = SYSTEM_INFO{}; + ::GetSystemInfo(&info); + auto result = static_cast(info.dwPageSize); +#else auto result = static_cast(::sysconf(_SC_PAGE_SIZE)); +#endif assert(lauf::page_allocator::page_size <= result); assert(result % lauf::page_allocator::page_size == 0); return result; }(); -} +} // namespace lauf::page_block lauf::page_allocator::allocate(std::size_t size) { @@ -67,8 +84,12 @@ lauf::page_block lauf::page_allocator::allocate(std::size_t size) return {cur, cur->size}; } - // Allocate new set of pages. +// Allocate new set of pages. +#ifdef _WIN32 + auto pages = VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); +#else auto pages = ::mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +#endif assert(pages != MAP_FAILED); // NOLINT: macro _allocated_bytes += size; @@ -85,7 +106,12 @@ std::size_t lauf::page_allocator::try_extend(page_block block, std::size_t new_s new_size = round_to_multiple_of_alignment(new_size, real_page_size); +#if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) + // Windows and Mac does not support the functionality of mremap + auto ptr = MAP_FAILED; +#else auto ptr = ::mremap(block.ptr, block.size, new_size, 0); +#endif if (ptr == MAP_FAILED) // NOLINT: macro { LAUF_PAGE_ALLOCATOR_DO_LOG("try_extend({%p, %zu}, %zu): failed", block.ptr, block.size, @@ -127,7 +153,11 @@ std::size_t lauf::page_allocator::release() auto size = cur->size; auto next = cur->next; +#ifdef _WIN32 + VirtualFree(cur, 0, MEM_RELEASE); +#else ::munmap(cur, size); +#endif _allocated_bytes -= size; cur = next; @@ -135,4 +165,3 @@ std::size_t lauf::page_allocator::release() return _allocated_bytes; } - diff --git a/src/lauf/vm_execute.cpp b/src/lauf/vm_execute.cpp index 89081ce3..2d439a3f 100644 --- a/src/lauf/vm_execute.cpp +++ b/src/lauf/vm_execute.cpp @@ -123,8 +123,8 @@ LAUF_NOINLINE LAUF_BUILTIN_RETURN_TYPE call_undefined_function(const lauf_asm_in lauf_runtime_stack_frame* frame_ptr, lauf_runtime_process* process) { - auto callee - = lauf::uncompress_pointer_offset(frame_ptr->function, ip->call.offset); + auto callee = lauf::uncompress_pointer_offset(frame_ptr->function, + ip->call.offset()); assert(ip->op() == lauf::asm_op::call && callee->insts == nullptr); auto definition = [&] { @@ -225,7 +225,7 @@ LAUF_VM_EXECUTE(return_) } LAUF_VM_EXECUTE(return_free) { - for (auto i = 0u; i != ip->return_free.value; ++i) + for (auto i = 0u; i != ip->return_free.value(); ++i) { auto index = frame_ptr->first_local_alloc + i; auto& alloc = process->memory[index]; @@ -244,7 +244,7 @@ LAUF_VM_EXECUTE(return_free) LAUF_VM_EXECUTE(jump) { - ip += ip->jump.offset; + ip += ip->jump.offset(); LAUF_VM_DISPATCH; } @@ -255,7 +255,7 @@ LAUF_VM_EXECUTE(jump) ++vstack_ptr; \ \ if (condition Comp 0) \ - ip += ip->branch_##CC.offset; \ + ip += ip->branch_##CC.offset(); \ else \ ++ip; \ \ @@ -343,7 +343,7 @@ LAUF_VM_EXECUTE(call_builtin_no_regs) auto callee = lauf::uncompress_pointer_offset(&lauf_runtime_builtin_dispatch, ip->call_builtin_no_regs - .offset); + .offset()); #if !defined(__clang__) && (defined(__GNUC__) || defined(__GNUG__)) # define lauf_runtime_builtin_dispatch lauf_runtime_builtin_dispatch_inline #endif @@ -359,8 +359,8 @@ LAUF_VM_EXECUTE(call_builtin_sig) LAUF_VM_EXECUTE(call) { - auto callee - = lauf::uncompress_pointer_offset(frame_ptr->function, ip->call.offset); + auto callee = lauf::uncompress_pointer_offset(frame_ptr->function, + ip->call.offset()); // Call an extern implementation if necessary. if (LAUF_UNLIKELY(callee->insts == nullptr)) @@ -486,7 +486,7 @@ LAUF_VM_EXECUTE(fiber_suspend) LAUF_VM_EXECUTE(push) { --vstack_ptr; - vstack_ptr[0].as_uint = ip->push.value; + vstack_ptr[0].as_uint = ip->push.value(); ++ip; LAUF_VM_DISPATCH; @@ -495,7 +495,7 @@ LAUF_VM_EXECUTE(push) LAUF_VM_EXECUTE(pushn) { --vstack_ptr; - vstack_ptr[0].as_uint = ~lauf_uint(ip->push.value); + vstack_ptr[0].as_uint = ~lauf_uint(ip->push.value()); ++ip; LAUF_VM_DISPATCH; @@ -503,7 +503,7 @@ LAUF_VM_EXECUTE(pushn) LAUF_VM_EXECUTE(push2) { - vstack_ptr[0].as_uint |= lauf_uint(ip->push2.value) << 24; + vstack_ptr[0].as_uint |= lauf_uint(ip->push2.value()) << 24; ++ip; LAUF_VM_DISPATCH; @@ -511,7 +511,7 @@ LAUF_VM_EXECUTE(push2) LAUF_VM_EXECUTE(push3) { - vstack_ptr[0].as_uint |= lauf_uint(ip->push2.value) << 48; + vstack_ptr[0].as_uint |= lauf_uint(ip->push2.value()) << 48; ++ip; LAUF_VM_DISPATCH; @@ -522,8 +522,8 @@ LAUF_VM_EXECUTE(global_addr) --vstack_ptr; LAUF_BITFIELD_CONVERSION( - vstack_ptr[0].as_address.allocation - = get_global_allocation_idx(frame_ptr, process, ip->global_addr.value)); + vstack_ptr[0].as_address.allocation = static_cast( + get_global_allocation_idx(frame_ptr, process, ip->global_addr.value()))); vstack_ptr[0].as_address.offset = 0; vstack_ptr[0].as_address.generation = 0; // Always true for globals. @@ -534,7 +534,7 @@ LAUF_VM_EXECUTE(global_addr) LAUF_VM_EXECUTE(function_addr) { auto fn = lauf::uncompress_pointer_offset(frame_ptr->function, - ip->function_addr.offset); + ip->function_addr.offset()); --vstack_ptr; vstack_ptr[0].as_function_address.index = fn->function_idx; @@ -560,7 +560,7 @@ LAUF_VM_EXECUTE(local_addr) LAUF_VM_EXECUTE(cc) { - switch (lauf_asm_inst_condition_code(ip->cc.value)) + switch (lauf_asm_inst_condition_code(ip->cc.value())) { case LAUF_ASM_INST_CC_EQ: if (vstack_ptr[0].as_sint == 0) @@ -689,7 +689,7 @@ LAUF_VM_EXECUTE(select) LAUF_VM_EXECUTE(setup_local_alloc) { // If necessary, grow the allocation array - this will then tail call back here. - if (LAUF_UNLIKELY(process->memory.needs_to_grow(ip->setup_local_alloc.value))) + if (LAUF_UNLIKELY(process->memory.needs_to_grow(ip->setup_local_alloc.value()))) LAUF_TAIL_CALL return grow_allocation_array(ip, vstack_ptr, frame_ptr, process); // Setup the necessary metadata. @@ -732,7 +732,7 @@ LAUF_VM_EXECUTE(local_alloc_aligned) } LAUF_VM_EXECUTE(local_storage) { - frame_ptr->next_offset += ip->local_storage.value; + frame_ptr->next_offset += ip->local_storage.value(); ++ip; LAUF_VM_DISPATCH; @@ -791,7 +791,7 @@ LAUF_VM_EXECUTE(array_element) auto address = vstack_ptr[1].as_address; auto index = vstack_ptr[0].as_sint; - address.offset += static_cast(lauf_sint(ip->array_element.value) * index); + address.offset += static_cast(lauf_sint(ip->array_element.value()) * index); ++vstack_ptr; vstack_ptr[0].as_address = address; @@ -803,7 +803,7 @@ LAUF_VM_EXECUTE(array_element) LAUF_VM_EXECUTE(aggregate_member) { auto address = vstack_ptr[0].as_address; - address.offset += ip->aggregate_member.value; + address.offset += ip->aggregate_member.value(); vstack_ptr[0].as_address = address; ++ip; @@ -834,7 +834,7 @@ LAUF_VM_EXECUTE(store_local_value) LAUF_VM_EXECUTE(load_global_value) { - auto allocation = get_global_allocation_idx(frame_ptr, process, ip->load_global_value.value); + auto allocation = get_global_allocation_idx(frame_ptr, process, ip->load_global_value.value()); auto memory = process->memory[allocation].ptr; --vstack_ptr; @@ -846,7 +846,7 @@ LAUF_VM_EXECUTE(load_global_value) LAUF_VM_EXECUTE(store_global_value) { - auto allocation = get_global_allocation_idx(frame_ptr, process, ip->store_global_value.value); + auto allocation = get_global_allocation_idx(frame_ptr, process, ip->store_global_value.value()); auto memory = process->memory[allocation].ptr; *reinterpret_cast(memory) = vstack_ptr[0]; diff --git a/src/lauf/vm_execute.hpp b/src/lauf/vm_execute.hpp index 091df827..0fa2e0dd 100644 --- a/src/lauf/vm_execute.hpp +++ b/src/lauf/vm_execute.hpp @@ -51,8 +51,8 @@ constexpr lauf_asm_inst trampoline_code[3] = { [] { // We first want to call the function specified in the trampoline stack frame. lauf_asm_inst result; - result.call.op = lauf::asm_op::call; - result.call.offset = 0; + result.call.op = lauf::asm_op::call; + result.call.offset(0); return result; }(), [] { @@ -79,10 +79,9 @@ extern "C" [[gnu::always_inline]] inline LAUF_BUILTIN_RETURN_TYPE lauf_runtime_stack_frame* frame_ptr, lauf_runtime_process* process) #else -inline LAUF_BUILTIN_RETURN_TYPE lauf_runtime_builtin_dispatch(const lauf_asm_inst* ip, - lauf_runtime_value* vstack_ptr, - lauf_runtime_stack_frame* frame_ptr, - lauf_runtime_process* process) +LAUF_RUNTIME_BUILTIN_IMPL inline LAUF_BUILTIN_RETURN_TYPE lauf_runtime_builtin_dispatch( + const lauf_asm_inst* ip, lauf_runtime_value* vstack_ptr, lauf_runtime_stack_frame* frame_ptr, + lauf_runtime_process* process) #endif { diff --git a/src/lauf/writer.cpp b/src/lauf/writer.cpp index eb0846c4..1509f166 100644 --- a/src/lauf/writer.cpp +++ b/src/lauf/writer.cpp @@ -13,7 +13,10 @@ void lauf_writer::write(const char* str) write(str, std::strlen(str)); } -[[gnu::format(printf, 2, 3)]] void lauf_writer::format(const char* fmt, ...) +#if __has_cpp_attribute(gnu::format) +[[gnu::format(printf, 2, 3)]] +#endif +void lauf_writer::format(const char* fmt, ...) { constexpr auto small_buffer = 1024; char buffer[small_buffer + 1]; @@ -108,4 +111,3 @@ lauf_writer* lauf_create_stdout_writer(void) { return new file_writer(stdout); } - diff --git a/src/lauf/writer.hpp b/src/lauf/writer.hpp index 44a01dd9..fa6573dc 100644 --- a/src/lauf/writer.hpp +++ b/src/lauf/writer.hpp @@ -17,8 +17,10 @@ struct lauf_writer void write(const char* str); - [[gnu::format(printf, 2, 3)]] void format(const char* fmt, ...); +#if __has_cpp_attribute(gnu::format) + [[gnu::format(printf, 2, 3)]] +#endif + void format(const char* fmt, ...); }; #endif // SRC_LAUF_WRITER_HPP_INCLUDED - diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 62ba6efc..25d153cb 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -3,25 +3,42 @@ add_custom_target(lauf_test_qbe ALL) +find_program(QBE_EXE NAMES qbe) + +unset(ASSEMBLER CACHE) + if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") - set(USE_COMPILER clang) -else(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") - set(USE_COMPILER gcc) +elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") +elseif(MSVC) + find_program(ASSEMBLER NAMES yasm) endif() -file(GLOB test_files CONFIGURE_DEPENDS "*.lauf") -foreach(file ${test_files}) - get_filename_component(name ${file} NAME) - add_test(NAME ${name} COMMAND lauf_tool_interpreter ${file}) +if(QBE_EXE) + file(GLOB test_files CONFIGURE_DEPENDS "*.lauf") + foreach(file ${test_files}) + get_filename_component(name ${file} NAME) + add_test(NAME ${name} COMMAND lauf_tool_interpreter ${file}) - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${name}.qbe COMMAND lauf_tool_qbe ${file} > ${name}.qbe DEPENDS ${file} lauf_tool_qbe) - add_custom_command(OUTPUT ${name}.s COMMAND qbe ${CMAKE_CURRENT_BINARY_DIR}/${name}.qbe -o ${name}.s DEPENDS ${name}.qbe) - add_custom_command(OUTPUT ${name}.exe COMMAND ${USE_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/runtime.c ${name}.s -o ${name}.exe - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/runtime.c ${name}.s) + # MSVC crashes when running a generated QBE file for some alloc calls in heap and memory + if(ASSEMBLER AND name MATCHES "heap.lauf|memory.lauf") + continue() + endif() - add_custom_target(lauf_test_qbe_${name} DEPENDS ${name}.exe) - add_dependencies(lauf_test_qbe lauf_test_qbe_${name}) + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${name}.qbe COMMAND lauf_tool_qbe ${file} > ${name}.qbe DEPENDS ${file} lauf_tool_qbe) + add_custom_command(OUTPUT ${name}.s COMMAND ${QBE_EXE} -o ${name}.s ${CMAKE_CURRENT_BINARY_DIR}/${name}.qbe DEPENDS ${name}.qbe) + if(ASSEMBLER) + add_custom_command(OUTPUT ${name}.obj COMMAND ${ASSEMBLER} -f win64 -w -p gas ${name}.s -o ${name}.obj + DEPENDS ${name}.s) + add_custom_command(OUTPUT ${name}.exe COMMAND ${CMAKE_C_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/runtime.c ${name}.obj /nologo /MT /Fe:${name}.exe /link /FORCE:UNRESOLVED /DYNAMICBASE:NO + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/runtime.c ${name}.obj) + else() + add_custom_command(OUTPUT ${name}.exe COMMAND ${CMAKE_C_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/runtime.c ${name}.s -o ${name}.exe + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/runtime.c ${name}.s) + endif() - add_test(NAME ${name}.qbe COMMAND ${name}.exe) -endforeach() + add_custom_target(lauf_test_qbe_${name} DEPENDS ${name}.exe) + add_dependencies(lauf_test_qbe lauf_test_qbe_${name}) + add_test(NAME ${name}.qbe COMMAND ${name}.exe) + endforeach() +endif() diff --git a/tests/integration/runtime.c b/tests/integration/runtime.c index fa058c00..4b551135 100644 --- a/tests/integration/runtime.c +++ b/tests/integration/runtime.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -11,7 +12,12 @@ void lauf_panic(const char* msg) void* lauf_heap_alloc(uint64_t size, uint64_t alignment) { +#ifdef _MSC_VER + alignment = (alignment > 0) && ((alignment & (alignment - 1)) == 0) ? alignment : 16; + return _aligned_malloc(size, alignment); +#else return aligned_alloc(alignment, size); +#endif } void* lauf_heap_alloc_array(uint64_t count, uint64_t size, uint64_t alignment) @@ -23,7 +29,11 @@ void* lauf_heap_alloc_array(uint64_t count, uint64_t size, uint64_t alignment) void lauf_heap_free(void* ptr) { +#ifdef _MSC_VER + _aligned_free(ptr); +#else free(ptr); +#endif } uint64_t lauf_heap_gc(void) diff --git a/tests/lauf/asm/builder.cpp b/tests/lauf/asm/builder.cpp index 91143924..b6af8bc3 100644 --- a/tests/lauf/asm/builder.cpp +++ b/tests/lauf/asm/builder.cpp @@ -116,7 +116,7 @@ TEST_CASE("lauf_asm_inst_jump") }); REQUIRE(forward.size() >= 1); CHECK(forward[0].op() == lauf::asm_op::jump); - CHECK(forward[0].jump.offset == 4); + CHECK(forward[0].jump.offset() == 4); } TEST_CASE("lauf_asm_inst_branch2") @@ -133,7 +133,7 @@ TEST_CASE("lauf_asm_inst_branch2") }); REQUIRE(br_nop.size() >= 1); CHECK(br_nop[0].op() == lauf::asm_op::branch_eq); - CHECK(br_nop[0].branch_eq.offset == 4); + CHECK(br_nop[0].branch_eq.offset() == 4); auto br_jump = build({1, 0}, [](lauf_asm_module*, lauf_asm_builder* b) { auto if_false = lauf_asm_declare_block(b, 0); @@ -147,7 +147,7 @@ TEST_CASE("lauf_asm_inst_branch2") }); REQUIRE(br_jump.size() >= 1); CHECK(br_jump[0].op() == lauf::asm_op::branch_ne); - CHECK(br_jump[0].branch_ne.offset == 4); + CHECK(br_jump[0].branch_ne.offset() == 4); auto same = build({1, 0}, [](lauf_asm_module*, lauf_asm_builder* b) { auto block = lauf_asm_declare_block(b, 0); @@ -169,55 +169,55 @@ TEST_CASE("lauf_asm_inst_uint") auto zero = build_uint(0); REQUIRE(zero.size() == 1); CHECK(zero[0].op() == lauf::asm_op::push); - CHECK(zero[0].push.value == 0); + CHECK(zero[0].push.value() == 0); auto small = build_uint(0x12'3456); REQUIRE(small.size() == 1); CHECK(small[0].op() == lauf::asm_op::push); - CHECK(small[0].push.value == 0x12'3456); + CHECK(small[0].push.value() == 0x12'3456); auto max24 = build_uint(0xFF'FFFF); REQUIRE(max24.size() == 1); CHECK(max24[0].op() == lauf::asm_op::push); - CHECK(max24[0].push.value == 0xFF'FFFF); + CHECK(max24[0].push.value() == 0xFF'FFFF); auto bigger24 = build_uint(0xABFF'FFFF); REQUIRE(bigger24.size() == 2); CHECK(bigger24[0].op() == lauf::asm_op::push); - CHECK(bigger24[0].push.value == 0xFF'FFFF); + CHECK(bigger24[0].push.value() == 0xFF'FFFF); CHECK(bigger24[1].op() == lauf::asm_op::push2); - CHECK(bigger24[1].push2.value == 0xAB); + CHECK(bigger24[1].push2.value() == 0xAB); auto max48 = build_uint(0xFFFF'FFFF'FFFF); REQUIRE(max48.size() == 2); CHECK(max48[0].op() == lauf::asm_op::push); - CHECK(max48[0].push.value == 0xFF'FFFF); + CHECK(max48[0].push.value() == 0xFF'FFFF); CHECK(max48[1].op() == lauf::asm_op::push2); - CHECK(max48[1].push2.value == 0xFF'FFFF); + CHECK(max48[1].push2.value() == 0xFF'FFFF); auto bigger48 = build_uint(0x0123'4567'89AB'CDEF); REQUIRE(bigger48.size() == 3); CHECK(bigger48[0].op() == lauf::asm_op::push); - CHECK(bigger48[0].push.value == 0xAB'CDEF); + CHECK(bigger48[0].push.value() == 0xAB'CDEF); CHECK(bigger48[1].op() == lauf::asm_op::push2); - CHECK(bigger48[1].push2.value == 0x45'6789); + CHECK(bigger48[1].push2.value() == 0x45'6789); CHECK(bigger48[2].op() == lauf::asm_op::push3); - CHECK(bigger48[2].push2.value == 0x0123); + CHECK(bigger48[2].push2.value() == 0x0123); auto neg_zero = build_uint(0xFFFF'FFFF'FF00'0000); REQUIRE(neg_zero.size() == 1); CHECK(neg_zero[0].op() == lauf::asm_op::pushn); - CHECK(neg_zero[0].push.value == 0xFF'FFFF); + CHECK(neg_zero[0].push.value() == 0xFF'FFFF); auto neg_small = build_uint(0xFFFF'FFFF'FF12'3456); REQUIRE(neg_small.size() == 1); CHECK(neg_small[0].op() == lauf::asm_op::pushn); - CHECK(neg_small[0].push.value == 0xED'CBA9); + CHECK(neg_small[0].push.value() == 0xED'CBA9); auto neg_max = build_uint(0xFFFF'FFFF'FFFF'FFFF); REQUIRE(neg_max.size() == 1); CHECK(neg_max[0].op() == lauf::asm_op::pushn); - CHECK(neg_max[0].push.value == 0); + CHECK(neg_max[0].push.value() == 0); } TEST_CASE("lauf_asm_inst_sint") @@ -230,64 +230,64 @@ TEST_CASE("lauf_asm_inst_sint") auto zero = build_sint(0); REQUIRE(zero.size() == 1); CHECK(zero[0].op() == lauf::asm_op::push); - CHECK(zero[0].push.value == 0); + CHECK(zero[0].push.value() == 0); auto small = build_sint(0x12'3456); REQUIRE(small.size() == 1); CHECK(small[0].op() == lauf::asm_op::push); - CHECK(small[0].push.value == 0x12'3456); + CHECK(small[0].push.value() == 0x12'3456); auto max24 = build_sint(0xFF'FFFF); REQUIRE(max24.size() == 1); CHECK(max24[0].op() == lauf::asm_op::push); - CHECK(max24[0].push.value == 0xFF'FFFF); + CHECK(max24[0].push.value() == 0xFF'FFFF); auto bigger24 = build_sint(0xABFF'FFFF); REQUIRE(bigger24.size() == 2); CHECK(bigger24[0].op() == lauf::asm_op::push); - CHECK(bigger24[0].push.value == 0xFF'FFFF); + CHECK(bigger24[0].push.value() == 0xFF'FFFF); CHECK(bigger24[1].op() == lauf::asm_op::push2); - CHECK(bigger24[1].push2.value == 0xAB); + CHECK(bigger24[1].push2.value() == 0xAB); auto max48 = build_sint(0xFFFF'FFFF'FFFF); REQUIRE(max48.size() == 2); CHECK(max48[0].op() == lauf::asm_op::push); - CHECK(max48[0].push.value == 0xFF'FFFF); + CHECK(max48[0].push.value() == 0xFF'FFFF); CHECK(max48[1].op() == lauf::asm_op::push2); - CHECK(max48[1].push2.value == 0xFF'FFFF); + CHECK(max48[1].push2.value() == 0xFF'FFFF); auto bigger48 = build_sint(0x0123'4567'89AB'CDEF); REQUIRE(bigger48.size() == 3); CHECK(bigger48[0].op() == lauf::asm_op::push); - CHECK(bigger48[0].push.value == 0xAB'CDEF); + CHECK(bigger48[0].push.value() == 0xAB'CDEF); CHECK(bigger48[1].op() == lauf::asm_op::push2); - CHECK(bigger48[1].push2.value == 0x45'6789); + CHECK(bigger48[1].push2.value() == 0x45'6789); CHECK(bigger48[2].op() == lauf::asm_op::push3); - CHECK(bigger48[2].push2.value == 0x0123); + CHECK(bigger48[2].push2.value() == 0x0123); auto neg_one = build_sint(-1); REQUIRE(neg_one.size() == 1); CHECK(neg_one[0].op() == lauf::asm_op::pushn); - CHECK(neg_one[0].push.value == 0); + CHECK(neg_one[0].push.value() == 0); auto neg_small = build_sint(-0x12'3456); REQUIRE(neg_small.size() == 1); CHECK(neg_small[0].op() == lauf::asm_op::pushn); - CHECK(neg_small[0].push.value == 0x12'3455); + CHECK(neg_small[0].push.value() == 0x12'3455); auto neg_max24 = build_sint(-0x100'0000); REQUIRE(neg_max24.size() == 1); CHECK(neg_max24[0].op() == lauf::asm_op::pushn); - CHECK(neg_max24[0].push.value == 0xFF'FFFF); + CHECK(neg_max24[0].push.value() == 0xFF'FFFF); auto neg_bigger24 = build_sint(-0xFFFF'FFFFll); REQUIRE(neg_bigger24.size() == 3); CHECK(neg_bigger24[0].op() == lauf::asm_op::push); - CHECK(neg_bigger24[0].push.value == 0x00'0001); + CHECK(neg_bigger24[0].push.value() == 0x00'0001); CHECK(neg_bigger24[1].op() == lauf::asm_op::push2); - CHECK(neg_bigger24[1].push2.value == 0xFF'FF00); + CHECK(neg_bigger24[1].push2.value() == 0xFF'FF00); CHECK(neg_bigger24[2].op() == lauf::asm_op::push3); - CHECK(neg_bigger24[2].push3.value == 0xFFFF); + CHECK(neg_bigger24[2].push3.value() == 0xFFFF); } TEST_CASE("lauf_asm_inst_bytes") @@ -301,55 +301,55 @@ TEST_CASE("lauf_asm_inst_bytes") auto zero = build_bytes(0); REQUIRE(zero.size() == 1); CHECK(zero[0].op() == lauf::asm_op::push); - CHECK(zero[0].push.value == 0); + CHECK(zero[0].push.value() == 0); auto small = build_bytes(0x12'3456); REQUIRE(small.size() == 1); CHECK(small[0].op() == lauf::asm_op::push); - CHECK(small[0].push.value == 0x12'3456); + CHECK(small[0].push.value() == 0x12'3456); auto max24 = build_bytes(0xFF'FFFF); REQUIRE(max24.size() == 1); CHECK(max24[0].op() == lauf::asm_op::push); - CHECK(max24[0].push.value == 0xFF'FFFF); + CHECK(max24[0].push.value() == 0xFF'FFFF); auto bigger24 = build_bytes(0xABFF'FFFF); REQUIRE(bigger24.size() == 2); CHECK(bigger24[0].op() == lauf::asm_op::push); - CHECK(bigger24[0].push.value == 0xFF'FFFF); + CHECK(bigger24[0].push.value() == 0xFF'FFFF); CHECK(bigger24[1].op() == lauf::asm_op::push2); - CHECK(bigger24[1].push2.value == 0xAB); + CHECK(bigger24[1].push2.value() == 0xAB); auto max48 = build_bytes(0xFFFF'FFFF'FFFF); REQUIRE(max48.size() == 2); CHECK(max48[0].op() == lauf::asm_op::push); - CHECK(max48[0].push.value == 0xFF'FFFF); + CHECK(max48[0].push.value() == 0xFF'FFFF); CHECK(max48[1].op() == lauf::asm_op::push2); - CHECK(max48[1].push2.value == 0xFF'FFFF); + CHECK(max48[1].push2.value() == 0xFF'FFFF); auto bigger48 = build_bytes(0x0123'4567'89AB'CDEF); REQUIRE(bigger48.size() == 3); CHECK(bigger48[0].op() == lauf::asm_op::push); - CHECK(bigger48[0].push.value == 0xAB'CDEF); + CHECK(bigger48[0].push.value() == 0xAB'CDEF); CHECK(bigger48[1].op() == lauf::asm_op::push2); - CHECK(bigger48[1].push2.value == 0x45'6789); + CHECK(bigger48[1].push2.value() == 0x45'6789); CHECK(bigger48[2].op() == lauf::asm_op::push3); - CHECK(bigger48[2].push2.value == 0x0123); + CHECK(bigger48[2].push2.value() == 0x0123); auto neg_zero = build_bytes(0xFFFF'FFFF'FF00'0000); REQUIRE(neg_zero.size() == 1); CHECK(neg_zero[0].op() == lauf::asm_op::pushn); - CHECK(neg_zero[0].push.value == 0xFF'FFFF); + CHECK(neg_zero[0].push.value() == 0xFF'FFFF); auto neg_small = build_bytes(0xFFFF'FFFF'FF12'3456); REQUIRE(neg_small.size() == 1); CHECK(neg_small[0].op() == lauf::asm_op::pushn); - CHECK(neg_small[0].push.value == 0xED'CBA9); + CHECK(neg_small[0].push.value() == 0xED'CBA9); auto neg_max = build_bytes(0xFFFF'FFFF'FFFF'FFFF); REQUIRE(neg_max.size() == 1); CHECK(neg_max[0].op() == lauf::asm_op::pushn); - CHECK(neg_max[0].push.value == 0); + CHECK(neg_max[0].push.value() == 0); } TEST_CASE("lauf_asm_inst_null") @@ -358,7 +358,7 @@ TEST_CASE("lauf_asm_inst_null") = build({0, 1}, [](lauf_asm_module*, lauf_asm_builder* b) { lauf_asm_inst_null(b); }); REQUIRE(result.size() == 1); CHECK(result[0].op() == lauf::asm_op::pushn); - CHECK(result[0].push.value == 0); + CHECK(result[0].push.value() == 0); } TEST_CASE("lauf_asm_inst_global_addr") @@ -370,7 +370,7 @@ TEST_CASE("lauf_asm_inst_global_addr") }); REQUIRE(single.size() == 1); CHECK(single[0].op() == lauf::asm_op::global_addr); - CHECK(single[0].global_addr.value == 0); + CHECK(single[0].global_addr.value() == 0); auto multiple = build({0, 1}, [](lauf_asm_module* mod, lauf_asm_builder* b) { lauf_asm_add_global(mod, LAUF_ASM_GLOBAL_READ_WRITE); @@ -380,7 +380,7 @@ TEST_CASE("lauf_asm_inst_global_addr") }); REQUIRE(multiple.size() == 1); CHECK(multiple[0].op() == lauf::asm_op::global_addr); - CHECK(multiple[0].global_addr.value == 1); + CHECK(multiple[0].global_addr.value() == 1); } TEST_CASE("lauf_asm_inst_local_addr") @@ -411,7 +411,7 @@ TEST_CASE("lauf_asm_inst_cc") }); REQUIRE(dynamic.size() == 1); CHECK(dynamic[0].op() == lauf::asm_op::cc); - CHECK(dynamic[0].cc.value == LAUF_ASM_INST_CC_EQ); + CHECK(dynamic[0].cc.value() == LAUF_ASM_INST_CC_EQ); auto constant = build({0, 1}, [](lauf_asm_module*, lauf_asm_builder* b) { lauf_asm_inst_sint(b, -1); @@ -419,7 +419,7 @@ TEST_CASE("lauf_asm_inst_cc") }); REQUIRE(constant.size() == 1); CHECK(constant[0].op() == lauf::asm_op::push); - CHECK(constant[0].push.value == 0); + CHECK(constant[0].push.value() == 0); } TEST_CASE("lauf_asm_inst_function_addr") @@ -602,7 +602,7 @@ TEST_CASE("lauf_asm_inst_call_builtin") }); REQUIRE(constant.size() == 1); CHECK(constant[0].op() == lauf::asm_op::push); - CHECK(constant[0].push.value == 3); + CHECK(constant[0].push.value() == 3); } TEST_CASE("lauf_asm_inst_array_element") @@ -612,14 +612,14 @@ TEST_CASE("lauf_asm_inst_array_element") }); REQUIRE(normal.size() == 1); CHECK(normal[0].op() == lauf::asm_op::array_element); - CHECK(normal[0].array_element.value == 8); + CHECK(normal[0].array_element.value() == 8); auto alignment = build({2, 1}, [](lauf_asm_module*, lauf_asm_builder* b) { lauf_asm_inst_array_element(b, {4, 8}); }); REQUIRE(alignment.size() == 1); CHECK(alignment[0].op() == lauf::asm_op::array_element); - CHECK(alignment[0].array_element.value == 8); + CHECK(alignment[0].array_element.value() == 8); auto constant_zero = build({1, 1}, [](lauf_asm_module*, lauf_asm_builder* b) { lauf_asm_inst_uint(b, 0); @@ -633,7 +633,7 @@ TEST_CASE("lauf_asm_inst_array_element") }); REQUIRE(constant.size() == 1); CHECK(constant[0].op() == lauf::asm_op::aggregate_member); - CHECK(constant[0].aggregate_member.value == 16); + CHECK(constant[0].aggregate_member.value() == 16); } TEST_CASE("lauf_asm_inst_aggregate_member") @@ -650,13 +650,12 @@ TEST_CASE("lauf_asm_inst_aggregate_member") }); REQUIRE(second.size() == 1); CHECK(second[0].op() == lauf::asm_op::aggregate_member); - CHECK(second[0].aggregate_member.value == 8); + CHECK(second[0].aggregate_member.value() == 8); auto third = build({1, 1}, [&](lauf_asm_module*, lauf_asm_builder* b) { lauf_asm_inst_aggregate_member(b, 2, agg, 3); }); REQUIRE(third.size() == 1); CHECK(third[0].op() == lauf::asm_op::aggregate_member); - CHECK(third[0].aggregate_member.value == 16); + CHECK(third[0].aggregate_member.value() == 16); } - diff --git a/tests/lauf/reader.cpp b/tests/lauf/reader.cpp index 0b1528ba..3b136873 100644 --- a/tests/lauf/reader.cpp +++ b/tests/lauf/reader.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -52,4 +53,3 @@ TEST_CASE("file reader") std::remove(test_path); } -