Initial Windows on ARM (AArch64) Support (#249)

* Conditionally include ntdll.dll

* Use text relocation instead of GOT

* Use FlushInstructionCache instead of clear_cache for arm64

* Load address in two stages (adrp, add)

* objc_msgSend.aarch64.S add comments

* Add seh directives

* Move .seh_proc into slow sloop section

* Comment out cfi directives

* Substitute raw .seh directives with macros

* Add documentation of SEH annotations

* Detect CPU Architecture with preprocessor

* Cleanup CMakeLists.txt

Co-authored-by: David Chisnall <davidchisnall@users.noreply.github.com>

* Remove line in objc_msgSend.aarch64.S

Co-authored-by: David Chisnall <davidchisnall@users.noreply.github.com>

* Change Test CMakeList to use ARCHITECTURE var

* Use existing clear cache macro

* Change _WIN64 to _WIN32 and reorder labels

* Remove macro and replace _WIN64 with _WIN32

* Remove argument from non-win32 macro

---------

Co-authored-by: David Chisnall <davidchisnall@users.noreply.github.com>
main
Hugo Melder 2 years ago committed by GitHub
parent d0d28b8f9d
commit ab23f14fd7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,12 @@
// detect_arch.c
#if defined(__aarch64__)
#error aarch64
#elif defined(__arm__)
#error arm
#elif defined(__i386__)
#error i386
#elif defined(__x86_64__)
#error x86_64
#else
#error unknown
#endif

@ -34,6 +34,21 @@ if (MSVC)
set(objc_LINK_FLAGS "/DEBUG /INCREMENTAL:NO ${objc_LINK_FLAGS}") set(objc_LINK_FLAGS "/DEBUG /INCREMENTAL:NO ${objc_LINK_FLAGS}")
endif() endif()
# Get Architecture without relying on CMake
try_compile(
COMPILE_SUCCESS
${CMAKE_BINARY_DIR}
${CMAKE_SOURCE_DIR}/CMake/detect_arch.c
OUTPUT_VARIABLE COMPILE_OUTPUT
)
if(NOT COMPILE_SUCCESS)
string(REGEX MATCH "(aarch64|arm|i386|x86_64|unknown)" ARCHITECTURE ${COMPILE_OUTPUT})
endif()
set(ARCHITECTURE ${ARCHITECTURE} CACHE STRING "Architecture Type")
message(STATUS "Architecture: ${ARCHITECTURE}")
# Build configuration # Build configuration
add_compile_definitions(GNUSTEP __OBJC_RUNTIME_INTERNAL__=1) add_compile_definitions(GNUSTEP __OBJC_RUNTIME_INTERNAL__=1)
@ -215,6 +230,10 @@ target_sources(objc PRIVATE ${libobjc_CXX_SRCS})
include(FindThreads) include(FindThreads)
target_link_libraries(objc Threads::Threads) target_link_libraries(objc Threads::Threads)
# Link against ntdll.dll for RtlRaiseException
if (WIN32)
target_link_libraries(objc ntdll.dll)
endif()
set_target_properties(objc PROPERTIES set_target_properties(objc PROPERTIES
@ -342,8 +361,6 @@ configure_file(
add_custom_target(uninstall add_custom_target(uninstall
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
if (TESTS) if (TESTS)
enable_testing() enable_testing()
add_subdirectory(Test) add_subdirectory(Test)

@ -80,7 +80,7 @@ if (ENABLE_ALL_OBJC_ARC_TESTS)
endif() endif()
# UnexpectedException test currently fails on ARM and needs to be fixed # UnexpectedException test currently fails on ARM and needs to be fixed
if (NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)") if(NOT ARCHITECTURE MATCHES "^(arm|aarch64)")
list(APPEND TESTS UnexpectedException.m) list(APPEND TESTS UnexpectedException.m)
endif() endif()

@ -12,6 +12,8 @@
#include <unistd.h> #include <unistd.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/mman.h> #include <sys/mman.h>
#else
#include "safewindows.h"
#endif #endif
#include "objc/runtime.h" #include "objc/runtime.h"
#include "objc/blocks_runtime.h" #include "objc/blocks_runtime.h"
@ -22,7 +24,13 @@
#ifndef __has_builtin #ifndef __has_builtin
#define __has_builtin(x) 0 #define __has_builtin(x) 0
#endif #endif
#if __has_builtin(__builtin___clear_cache)
#if defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
static inline void __clear_cache(void* start, void* end) {
FlushInstructionCache(GetCurrentProcess(), start, end - start);
}
#define clear_cache __clear_cache
#elif __has_builtin(__builtin___clear_cache)
#define clear_cache __builtin___clear_cache #define clear_cache __builtin___clear_cache
#else #else
void __clear_cache(void* start, void* end); void __clear_cache(void* start, void* end);
@ -36,7 +44,6 @@ void __clear_cache(void* start, void* end);
#endif #endif
#ifdef _WIN32 #ifdef _WIN32
#include "safewindows.h"
#if defined(WINAPI_FAMILY) && WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP && _WIN32_WINNT >= 0x0A00 #if defined(WINAPI_FAMILY) && WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP && _WIN32_WINNT >= 0x0A00
// Prefer the *FromApp versions when we're being built in a Windows Store App context on // Prefer the *FromApp versions when we're being built in a Windows Store App context on
// Windows >= 10. *FromApp require the application to be manifested for "codeGeneration". // Windows >= 10. *FromApp require the application to be manifested for "codeGeneration".
@ -178,6 +185,7 @@ static struct trampoline_set *alloc_trampolines(char *start, char *end)
metadata->buffers->headers[HEADERS_PER_PAGE-1].block = NULL; metadata->buffers->headers[HEADERS_PER_PAGE-1].block = NULL;
mprotect(metadata->buffers->rx_buffer, PAGE_SIZE, PROT_READ | PROT_EXEC); mprotect(metadata->buffers->rx_buffer, PAGE_SIZE, PROT_READ | PROT_EXEC);
clear_cache(metadata->buffers->rx_buffer, &metadata->buffers->rx_buffer[PAGE_SIZE]); clear_cache(metadata->buffers->rx_buffer, &metadata->buffers->rx_buffer[PAGE_SIZE]);
return metadata; return metadata;
} }

@ -1,15 +1,88 @@
#define ARGUMENT_SPILL_SIZE (8*10 + 8*16) #define ARGUMENT_SPILL_SIZE (8*10 + 8*16)
.macro MSGSEND receiver, sel
.cfi_startproc /* Windows ARM64 Exception Handling
cbz \receiver, 4f // Skip everything if the receiver is nil *
* Structured Exception Handling (SEH) on Windows ARM64 differs from the x64
* implementation. Functions consist of a single prologue and zero or more
* epilogues. Instead of using offsets for the .seh* directives to manipulate the
* stack frame, each directive corresponds to a single instruction.
*
* This presents a challenge for our objc_msgSend function, which only modifies
* the stack when a slow lookup is needed (see label "5").
*
* To address this, we move the directive marking the start of a function deep
* into the msgSend body to prevent marking every instruction as ".seh_nop."
*
* For Windows:
* - EH_START(x): Start of function (no effect on Windows)
* - EH_END(x): End of function (no effect on Windows)
* - EH_START_AT_OFFSET(x): Mark Start of function (Delayed)
* - EH_END_AT_OFFSET(x): Mark End of function (Delayed)
* - EH_END_PROLOGUE: End of function prologue
* - EH_START_EPILOGUE: Start of function epilogue
* - EH_END_EPILOGUE: End of function epilogue
* - EH_SAVE_FP_LR(x): Save Frame Pointer and Link Register
* - EH_STACK_ALLOC(x): Stack allocation (inside prologue)
* - EH_ADD_FP(x): Add to Frame Pointer
* - EH_NOP: Mark instruction with no unwinding relevance
*
* For non-64-bit Windows systems or other platforms, these macros have no effect and can be used without causing issues.
*/
#ifdef _WIN32
# define EH_START
# define EH_END
# define EH_START_AT_OFFSET .seh_proc objc_msgSend
# define EH_END_AT_OFFSET .seh_endproc objc_msgSend
# define EH_END_PROLOGUE .seh_endprologue
# define EH_START_EPILOGUE .seh_startepilogue
# define EH_END_EPILOGUE .seh_endepilogue
# define EH_SAVE_FP_LR(x) .seh_save_fplr x
# define EH_STACK_ALLOC(x) .seh_stackalloc x
# define EH_ADD_FP(x) .seh_add_fp x
# define EH_NOP .seh_nop
#else
// Marks the real start and end of the function
# define EH_START .cfi_startproc
# define EH_END .cfi_endproc
// The following directives are either not
// needed or not available with CFI
# define EH_START_AT_OFFSET
# define EH_END_AT_OFFSET
# define EH_END_PROLOGUE
# define EH_START_EPILOGUE
# define EH_END_EPILOGUE
# define EH_SAVE_FP_LR(x)
# define EH_STACK_ALLOC(x)
# define EH_ADD_FP(x)
# define EH_NOP
#endif
.globl CDECL(objc_msgSend_fpret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_fpret), %function)
.globl CDECL(objc_msgSend)
TYPE_DIRECTIVE(CDECL(objc_msgSend), %function)
.globl CDECL(objc_msgSend_stret)
TYPE_DIRECTIVE(CDECL(objc_msgSend_stret), %function)
CDECL(objc_msgSend):
CDECL(objc_msgSend_fpret):
CDECL(objc_msgSend_stret):
EH_START
cbz x0, 4f // Skip everything if the receiver is nil
// Jump to 6: if this is a small object // Jump to 6: if this is a small object
ubfx x9, \receiver, #0, #SMALLOBJ_BITS ubfx x9, x0, #0, #SMALLOBJ_BITS
cbnz x9, 6f cbnz x9, 6f
ldr x9, [\receiver] // Load class to x9 if not a small int ldr x9, [x0] // Load class to x9 if not a small int
1: 1:
ldr x9, [x9, #DTABLE_OFFSET] // Dtable -> x9 ldr x9, [x9, #DTABLE_OFFSET] // Dtable -> x9
ldr w10, [\sel] // selector->index -> x10 ldr w10, [x1] // selector->index -> x10
ldr w11, [x9, #SHIFT_OFFSET] // dtable->shift -> x11 ldr w11, [x9, #SHIFT_OFFSET] // dtable->shift -> x11
cmp x11, #8 // If this is a small dtable, jump to the cmp x11, #8 // If this is a small dtable, jump to the
@ -41,59 +114,114 @@
mov v0.d[1], x0 mov v0.d[1], x0
br lr br lr
5: // Slow lookup 5: // Slow lookup
EH_START_AT_OFFSET
// Save anything that will be clobbered by // Save anything that will be clobbered by
// the call // the call.
// Note that we pre-index (see "!"), meaning
// that we adjust the sp before storing the pair
// of registers.
stp x0, x1, [sp, #-(ARGUMENT_SPILL_SIZE)]! stp x0, x1, [sp, #-(ARGUMENT_SPILL_SIZE)]!
stp x2, x3, [sp, #16] // The order is arbitrary, except that EH_STACK_ALLOC((ARGUMENT_SPILL_SIZE))
stp x4, x5, [sp, #32] // fp and lr must be spilled together and
stp x6, x7, [sp, #48] // it's convenient if \receiver is spilled at sp stp x2, x3, [sp, #16]
EH_NOP // The following instructions can be ignored by SEH
stp x4, x5, [sp, #32]
EH_NOP
stp x6, x7, [sp, #48]
EH_NOP
stp q0, q1, [sp, #64] stp q0, q1, [sp, #64]
EH_NOP
stp q2, q3, [sp, #96] stp q2, q3, [sp, #96]
EH_NOP
stp q4, q5, [sp, #128] stp q4, q5, [sp, #128]
EH_NOP
stp q6, q7, [sp, #160] stp q6, q7, [sp, #160]
stp fp, lr, [sp, #192] EH_NOP
add fp, sp, 192 stp fp, lr, [sp, #192] // The order is arbitrary, except that
stp \receiver, x8, [sp, #-16]! EH_SAVE_FP_LR(192) // fp and lr must be spilled together
add fp, sp, 192 // Adjust frame pointer
EH_ADD_FP(192)
stp x0, x8, [sp, #-16]! // it's convenient if x0 is spilled at sp
EH_STACK_ALLOC(16) // stp performed pre-indexing by sp-16
EH_END_PROLOGUE
#ifndef _WIN32
.cfi_def_cfa fp, 16 .cfi_def_cfa fp, 16
.cfi_offset fp, -16 .cfi_offset fp, -16
.cfi_offset lr, -8 .cfi_offset lr, -8
#endif
// We now have all argument registers, the link // We now have all argument registers, the link
// register and the receiver spilled on the // register and the receiver spilled on the
// stack, with sp containing // stack, with sp containing
// the address of the receiver // the address of the receiver
mov x0, sp // &self, _cmd in arguments mov x0, sp // &self, _cmd in arguments
mov x1, \sel mov x1, x1
bl CDECL(slowMsgLookup) // This is the only place where the CFI directives bl CDECL(slowMsgLookup) // This is the only place where the EH directives
// have to be accurate... // have to be accurate...
mov x9, x0 // IMP -> x9 mov x9, x0 // IMP -> x9
EH_START_EPILOGUE
ldp x0, x1, [sp, #16] // Reload spilled argument registers ldp x0, x1, [sp, #16] // Reload spilled argument registers
EH_NOP
ldp x2, x3, [sp, #32] ldp x2, x3, [sp, #32]
EH_NOP
ldp x4, x5, [sp, #64] ldp x4, x5, [sp, #64]
EH_NOP
ldp x6, x7, [sp, #64] ldp x6, x7, [sp, #64]
EH_NOP
ldp q0, q1, [sp, #80] ldp q0, q1, [sp, #80]
EH_NOP
ldp q2, q3, [sp, #112] ldp q2, q3, [sp, #112]
EH_NOP
ldp q4, q5, [sp, #144] ldp q4, q5, [sp, #144]
EH_NOP
ldp q6, q7, [sp, #176] ldp q6, q7, [sp, #176]
EH_NOP
ldp fp, lr, [sp, #208] ldp fp, lr, [sp, #208]
ldp \receiver, x8, [sp], #(ARGUMENT_SPILL_SIZE + 16) EH_SAVE_FP_LR(208)
// Post-increment sp += ARGUMENT_SPILL_SIZE +16
ldp x0, x8, [sp], #(ARGUMENT_SPILL_SIZE + 16)
EH_STACK_ALLOC((ARGUMENT_SPILL_SIZE + 16))
EH_END_EPILOGUE
EH_END_AT_OFFSET
br x9 br x9
6: 6:
adrp x10, :got:SmallObjectClasses // Load 63:12 of SmallObjectClasses address
ldr x10, [x10, :got_lo12:SmallObjectClasses] // We use the CDECL macro as Windows prefixes
// cdecl conforming symbols with "_".
adrp x10, CDECL(SmallObjectClasses) // The macro handles this transparently.
// Add lower 12-bits of SmallObjectClasses address to x10
add x10, x10, :lo12:CDECL(SmallObjectClasses)
ldr x9, [x10, x9, lsl #3] ldr x9, [x10, x9, lsl #3]
b 1b b 1b
.cfi_endproc EH_END
.endm
.globl CDECL(objc_msgSend_fpret) #ifdef _WIN32
TYPE_DIRECTIVE(CDECL(objc_msgSend_fpret), %function) .text
.globl CDECL(objc_msgSend) .def objc_msgSend;
TYPE_DIRECTIVE(CDECL(objc_msgSend), %function) .scl 2;
.globl CDECL(objc_msgSend_stret) .type 32;
TYPE_DIRECTIVE(CDECL(objc_msgSend_stret), %function) .endef
CDECL(objc_msgSend): .def objc_msgSend_fpret;
CDECL(objc_msgSend_fpret): .scl 2;
CDECL(objc_msgSend_stret): .type 32;
MSGSEND x0, x1 .endef
.def objc_msgSend_stret;
.scl 2;
.type 32;
.endef
.section .drectve,"yn"
.ascii " /EXPORT:objc_msgSend"
.ascii " /EXPORT:objc_msgSend_fpret"
.ascii " /EXPORT:objc_msgSend_stret"
#endif
Loading…
Cancel
Save