diff --git a/Test/BlockImpTest.m b/Test/BlockImpTest.m index 713ebcc..190779a 100644 --- a/Test/BlockImpTest.m +++ b/Test/BlockImpTest.m @@ -21,6 +21,7 @@ __attribute__((objc_root_class)) +(struct big)sret; @end + int main(void) { __block int b = 0; @@ -37,7 +38,10 @@ int main(void) assert(4 == [Foo count: 2]); assert(6 == [Foo count: 2]); assert(imp_getBlock(imp) == (blk)); - imp_removeBlock(blk); + IMP imp2 = imp_implementationWithBlock(blk); + assert(imp != imp2); + imp_removeBlock(imp); + assert(imp_getBlock(imp) != (blk)); blk = ^(id self) { struct big b = {1, 2, 3, 4, 5}; diff --git a/block_to_imp.c b/block_to_imp.c index 9fadea2..c927be1 100644 --- a/block_to_imp.c +++ b/block_to_imp.c @@ -35,136 +35,198 @@ void __clear_cache(void* start, void* end); #define PAGE_SIZE 4096 -static void *executeBuffer; -static void *writeBuffer; -static ptrdiff_t offset; -static mutex_t trampoline_lock; -#ifndef SHM_ANON -static char *tmpPattern; -static void initTmpFile(void) +struct block_header { - char *tmp = getenv("TMPDIR"); - if (NULL == tmp) - { - tmp = "/tmp/"; - } - if (0 > asprintf(&tmpPattern, "%s/objc_trampolinesXXXXXXXXXXX", tmp)) - { - abort(); - } -} -static int getAnonMemFd(void) + void *block; + void(*fnptr)(void); + /** + * On 64-bit platforms, we have 16 bytes for instructions, which ought to + * be enough without padding. On MIPS, we need + * Note: If we add too much padding, then we waste space but have no other + * ill effects. If we get this too small, then the assert in + * `init_trampolines` will fire on library load. + */ +#if defined(__i386__) || (defined(__mips__) && !defined(__mips_n64)) + uint64_t padding[3]; +#elif defined(__mips__) + uint64_t padding[2]; +#elif defined(__arm__) + uint64_t padding; +#endif +}; + +#define HEADERS_PER_PAGE (PAGE_SIZE/sizeof(struct block_header)) + +/** + * Structure containing a two pages of block trampolines. Each trampoline + * loads its block and target method address from the corresponding + * block_header (one page before the start of the block structure). + */ +struct trampoline_buffers { - const char *pattern = strdup(tmpPattern); - int fd = mkstemp(pattern); - unlink(pattern); - free(pattern); - return fd; -} -#else -static void initTmpFile(void) {} -static int getAnonMemFd(void) + struct block_header headers[HEADERS_PER_PAGE]; + char rx_buffer[PAGE_SIZE]; +}; +_Static_assert(__builtin_offsetof(struct trampoline_buffers, rx_buffer) == PAGE_SIZE, + "Incorrect offset for read-execute buffer"); +_Static_assert(sizeof(struct trampoline_buffers) == 2*PAGE_SIZE, + "Incorrect size for trampoline buffers"); + +struct trampoline_set { - return shm_open(SHM_ANON, O_CREAT | O_RDWR, 0); -} -#endif + struct trampoline_buffers *buffers; + struct trampoline_set *next; + int first_free; +}; + +static mutex_t trampoline_lock; struct wx_buffer { void *w; void *x; }; +extern char __objc_block_trampoline; +extern char __objc_block_trampoline_end; +extern char __objc_block_trampoline_sret; +extern char __objc_block_trampoline_end_sret; PRIVATE void init_trampolines(void) { + fprintf(stderr, "Trampoline size: %d, header size: %d\n", (int)(&__objc_block_trampoline_end - &__objc_block_trampoline), (int)sizeof(struct block_header)); + assert(&__objc_block_trampoline_end - &__objc_block_trampoline <= sizeof(struct block_header)); + assert(&__objc_block_trampoline_end_sret - &__objc_block_trampoline_sret <= sizeof(struct block_header)); INIT_LOCK(trampoline_lock); - initTmpFile(); } -static struct wx_buffer alloc_buffer(size_t size) +static id invalid(id self, SEL _cmd) { - LOCK_FOR_SCOPE(&trampoline_lock); - if ((0 == offset) || (offset + size >= PAGE_SIZE)) + fprintf(stderr, "Invalid block method called for [%s %s]\n", + class_getName(object_getClass(self)), sel_getName(_cmd)); + return nil; +} + +static struct trampoline_set *alloc_trampolines(char *start, char *end) +{ + struct trampoline_set *metadata = calloc(1, sizeof(struct trampoline_set)); + metadata->buffers = valloc(sizeof(struct trampoline_buffers)); + for (int i=0 ; ibuffers->headers[i].fnptr = (void(*)(void))invalid; + metadata->buffers->headers[i].block = &metadata->buffers->headers[i+1].block; + char *block = metadata->buffers->rx_buffer + (i * sizeof(struct trampoline_buffers)); + memcpy(block, start, end-start); } - struct wx_buffer b = { writeBuffer + offset, executeBuffer + offset }; - offset += size; - return b; + metadata->buffers->headers[HEADERS_PER_PAGE-1].block = NULL; + mprotect(metadata->buffers->rx_buffer, PAGE_SIZE, PROT_READ | PROT_EXEC); + clear_cache(metadata->buffers->rx_buffer, &metadata->buffers->rx_buffer[PAGE_SIZE]); + return metadata; } -extern void __objc_block_trampoline; -extern void __objc_block_trampoline_end; -extern void __objc_block_trampoline_sret; -extern void __objc_block_trampoline_end_sret; +static struct trampoline_set *sret_trampolines; +static struct trampoline_set *trampolines; IMP imp_implementationWithBlock(void *block) { struct Block_layout *b = block; void *start; void *end; + LOCK_FOR_SCOPE(&trampoline_lock); + struct trampoline_set **setptr; if ((b->flags & BLOCK_USE_SRET) == BLOCK_USE_SRET) { + setptr = &sret_trampolines; start = &__objc_block_trampoline_sret; end = &__objc_block_trampoline_end_sret; } else { + setptr = &trampolines; start = &__objc_block_trampoline; end = &__objc_block_trampoline_end; } - size_t trampolineSize = end - start; // If we don't have a trampoline intrinsic for this architecture, return a // null IMP. if (0 >= trampolineSize) { return 0; } + // Allocate some trampolines if this is the first time that we need to do this. + if (*setptr == NULL) + { + *setptr = alloc_trampolines(start, end); + } + for (struct trampoline_set *set=*setptr ; set!=NULL ; set=set->next) + { + if (set->first_free != -1) + { + int i = set->first_free; + struct block_header *h = &set->buffers->headers[i]; + struct block_header *next = h->block; + set->first_free = next ? (next - set->buffers->headers) : -1; + assert(set->first_free < HEADERS_PER_PAGE); + assert(set->first_free >= -1); + h->fnptr = (void(*)(void))b->invoke; + h->block = b; + fprintf(stderr, "Putting block %d invoke %p (block %p) at %p (block at %p)\n", i, b->invoke, &b, &h->fnptr, &h->block); + fprintf(stderr, "Returning imp: %p\n", &set->buffers->rx_buffer[i*sizeof(struct block_header)]); - struct wx_buffer buf = alloc_buffer(trampolineSize + 2*sizeof(void*)); - void **out = buf.w; - out[0] = (void*)b->invoke; - out[1] = Block_copy(b); - memcpy(&out[2], start, trampolineSize); - out = buf.x; - char *newIMP = (char*)&out[2]; - clear_cache(newIMP, newIMP+trampolineSize); - return (IMP)newIMP; + return (IMP)&set->buffers->rx_buffer[i*sizeof(struct block_header)]; + } + } + UNREACHABLE("Failed to allocate block"); } -static void* isBlockIMP(void *anIMP) +static int indexForIMP(IMP anIMP, struct trampoline_set **setptr) { - LOCK(&trampoline_lock); - void *e = executeBuffer; - void *w = writeBuffer; - UNLOCK(&trampoline_lock); - while (e) + for (struct trampoline_set *set=*setptr ; set!=NULL ; set=set->next) { - if ((anIMP > e) && (anIMP < e + PAGE_SIZE)) + if (((char*)anIMP >= set->buffers->rx_buffer) && + ((char*)anIMP < &set->buffers->rx_buffer[PAGE_SIZE])) { - return ((char*)w) + ((char*)anIMP - (char*)e); + *setptr = set; + ptrdiff_t offset = (char*)anIMP - set->buffers->rx_buffer; + return offset / sizeof(struct block_header); } - e = *(void**)e; - w = *(void**)w; } - return 0; + return -1; } void *imp_getBlock(IMP anImp) { - if (0 == isBlockIMP((void*)anImp)) { return 0; } - return *(((void**)anImp) - 1); + LOCK_FOR_SCOPE(&trampoline_lock); + struct trampoline_set *set = trampolines; + int idx = indexForIMP(anImp, &set); + if (idx == -1) + { + set = sret_trampolines; + indexForIMP(anImp, &set); + } + if (idx == -1) + { + return NULL; + } + return set->buffers->headers[idx].block; } + BOOL imp_removeBlock(IMP anImp) { - void *w = isBlockIMP((void*)anImp); - if (0 == w) { return NO; } - Block_release(((void**)anImp) - 1); + LOCK_FOR_SCOPE(&trampoline_lock); + struct trampoline_set *set = trampolines; + int idx = indexForIMP(anImp, &set); + if (idx == -1) + { + set = sret_trampolines; + indexForIMP(anImp, &set); + } + if (idx == -1) + { + return NO; + } + struct block_header *h = &set->buffers->headers[idx]; + Block_release(h->block); + h->fnptr = (void(*)(void))invalid; + h->block = set->first_free == -1 ? NULL : &set->buffers->headers[set->first_free]; + set->first_free = h - set->buffers->headers; return YES; } diff --git a/block_trampolines.S b/block_trampolines.S index f95d6a0..1ea96a8 100644 --- a/block_trampolines.S +++ b/block_trampolines.S @@ -42,74 +42,76 @@ TYPE_DIRECTIVE(CDECL(__objc_block_trampoline), @function) #define ARG2 %rdx #endif CDECL(__objc_block_trampoline): - mov -15(%rip), ARG1 # Load the block pointer into the second argument + mov -0x1007(%rip), ARG1 # Load the block pointer into the second argument xchg ARG1, ARG0 # Swap the first and second arguments - jmp *-32(%rip) # Call the block function + jmp *-0x1008(%rip) # Call the block function CDECL(__objc_block_trampoline_end): CDECL(__objc_block_trampoline_sret): - mov -15(%rip), ARG2 # Load the block pointer into the second argument + mov -0x1007(%rip), ARG2 # Load the block pointer into the second argument xchg ARG1, ARG2 # Swap the first and second arguments - jmp *-32(%rip) # Call the block function + jmp *-0x1008(%rip) # Call the block function CDECL(__objc_block_trampoline_end_sret): #elif __i386 CDECL(__objc_block_trampoline): - call next_line # Store the instruction pointer on the stack -next_line: + call Lnext_line # Store the instruction pointer on the stack +Lnext_line: pop %eax # Load the old instruction pointer mov 4(%esp), %ebx # Load the self parameter mov %ebx, 8(%esp) # Store self as the second argument - mov -9(%eax), %ebx # Load the block pointer to %ebx + mov -0x1005(%eax), %ebx # Load the block pointer to %ebx mov %ebx, 4(%esp) # Store the block pointer in the first argument - jmp *-13(%eax) # Call the block function + jmp *-0x1001(%eax) # Call the block function CDECL(__objc_block_trampoline_end): CDECL(__objc_block_trampoline_sret): - call next_line2 # Store the instruction pointer on the stack -next_line2: + call Lnext_line2 # Store the instruction pointer on the stack +Lnext_line2: pop %eax # Load the old instruction pointer mov 8(%esp), %ebx # Load the self parameter mov %ebx, 12(%esp) # Store self as the second argument - mov -9(%eax), %ebx # Load the block pointer to %ebx + mov -0x1005(%eax), %ebx # Load the block pointer to %ebx mov %ebx, 8(%esp) # Store the block pointer in the first argument - jmp *-13(%eax) # Call the block function + jmp *-0x1001(%eax) # Call the block function CDECL(__objc_block_trampoline_end_sret): #elif __mips__ # ifdef _ABI64 CDECL(__objc_block_trampoline): move $a1, $a0 - ld $a0, -16($25) - ld $25, -8($25) + ld $a0, -4096($25) + ld $25, -4088($25) jr $25 CDECL(__objc_block_trampoline_end): CDECL(__objc_block_trampoline_sret): move $a2, $a1 - ld $a1, -16($25) - ld $25, -8($25) + ld $a1, -4096($25) + ld $25, -4088($25) jr $25 CDECL(__objc_block_trampoline_end_sret): # else CDECL(__objc_block_trampoline): move $a1, $a0 - lw $a0, -8($25) - lw $25, -4($25) + lw $a0, -4096($25) + lw $25, -4092($25) jr $25 CDECL(__objc_block_trampoline_end): CDECL(__objc_block_trampoline_sret): move $a2, $a1 - lw $a1, -8($25) - lw $25, -4($25) + lw $a1, -4096($25) + lw $25, -4092($25) jr $25 CDECL(__objc_block_trampoline_end_sret): # endif #elif __arm__ CDECL(__objc_block_trampoline): + sub r12, pc, #4096 mov r1, r0 // Move self over _cmd - ldr r0, [pc, #-16] // Load the block pointer over self - ldr pc, [pc, #-24] // Jump to the block function + ldr r0, [r12, #-4] // Load the block pointer over self + ldr pc, [r12] // Jump to the block function CDECL(__objc_block_trampoline_end): CDECL(__objc_block_trampoline_sret): + sub r12, pc, #4096 mov r2, r1 // Move self over _cmd - ldr r1, [pc, #-16] // Load the block pointer over self - ldr pc, [pc, #-24] // Jump to the block function + ldr r0, [r12, #-4] // Load the block pointer over self + ldr pc, [r12] // Jump to the block function CDECL(__objc_block_trampoline_end_sret): #else #warning imp_implementationWithBlock() not implemented for your architecture