9 files changed, 3689 insertions, 0 deletions
diff --git a/libffi-3.0.4/src/x86/darwin.S b/libffi-3.0.4/src/x86/darwin.S
new file mode 100644
index 0000000..b0b8cdf
--- /dev/null
+++ b/libffi-3.0.4/src/x86/darwin.S
@@ -0,0 +1,443 @@
+/* -----------------------------------------------------------------------
+   darwin.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005  Red Hat, Inc.
+	Copyright (C) 2008  Free Software Foundation, Inc.
+
+   X86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
+	ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl _ffi_prep_args
+
+	.align 4
+.globl _ffi_call_SYSV
+
+_ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+        subl $8,%esp
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	subl  $8,%esp
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $16,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+	jmp   epilogue
+0:
+	.align 4
+	call 1f
+.Lstore_table:
+	.long   noretval-.Lstore_table		/* FFI_TYPE_VOID */
+	.long   retint-.Lstore_table		/* FFI_TYPE_INT */
+	.long   retfloat-.Lstore_table		/* FFI_TYPE_FLOAT */
+	.long   retdouble-.Lstore_table		/* FFI_TYPE_DOUBLE */
+	.long   retlongdouble-.Lstore_table     /* FFI_TYPE_LONGDOUBLE */
+	.long   retuint8-.Lstore_table		/* FFI_TYPE_UINT8 */
+	.long   retsint8-.Lstore_table		/* FFI_TYPE_SINT8 */
+	.long   retuint16-.Lstore_table		/* FFI_TYPE_UINT16 */
+	.long   retsint16-.Lstore_table		/* FFI_TYPE_SINT16 */
+	.long   retint-.Lstore_table		/* FFI_TYPE_UINT32 */
+	.long   retint-.Lstore_table		/* FFI_TYPE_SINT32 */
+	.long   retint64-.Lstore_table		/* FFI_TYPE_UINT64 */
+	.long   retint64-.Lstore_table		/* FFI_TYPE_SINT64 */
+	.long   retstruct-.Lstore_table		/* FFI_TYPE_STRUCT */
+	.long   retint-.Lstore_table		/* FFI_TYPE_POINTER */
+	.long   retstruct1b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long   retstruct2b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	fstpt (%ecx)
+	jmp   epilogue
+
+retint64:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+
+retstruct1b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movb  %al,0(%ecx)
+	jmp   epilogue
+
+retstruct2b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movw  %ax,0(%ecx)
+	jmp   epilogue
+
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+	popl %esi
+	movl %ebp,%esp
+	popl %ebp
+	ret
+
+.LFE1:
+.ffi_call_SYSV_end:
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl _ffi_closure_SYSV
+
+_ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	L_ffi_closure_SYSV_inner$stub
+	movl	8(%esp), %ebx
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, %eax
+	je	.Lcls_retstruct1b
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, %eax
+	je	.Lcls_retstruct2b
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct1b:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct2b:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	lea -8(%ebp),%esp
+	movl	%ebp, %esp
+	popl	%ebp
+	ret $4
+.LFE2:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl _ffi_closure_raw_SYSV
+
+_ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+#endif
+
+.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
+L_ffi_closure_SYSV_inner$stub:
+	.indirect_symbol _ffi_closure_SYSV_inner
+	hlt ; hlt ; hlt ; hlt ; hlt
+
+
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set	L$set$0,LECIE1-LSCIE1
+	.long	L$set$0
+LSCIE1:
+	.long	0x0
+	.byte	0x1
+	.ascii "zR\0"
+	.byte	0x1
+	.byte	0x7c
+	.byte	0x8
+	.byte	0x1
+	.byte	0x10
+	.byte	0xc
+	.byte	0x5
+	.byte	0x4
+	.byte	0x88
+	.byte	0x1
+	.align 2
+LECIE1:
+.globl _ffi_call_SYSV.eh
+_ffi_call_SYSV.eh:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1
+	.long	L$set$1
+LASFDE1:
+	.long	LASFDE1-EH_frame1
+	.long	.LFB1-.
+	.set L$set$2,.LFE1-.LFB1
+	.long L$set$2
+	.byte	0x0
+	.byte	0x4
+	.set L$set$3,.LCFI0-.LFB1
+	.long L$set$3
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$4,.LCFI1-.LCFI0
+	.long L$set$4
+	.byte	0xd
+	.byte	0x4
+	.align 2
+LEFDE1:
+.globl _ffi_closure_SYSV.eh
+_ffi_closure_SYSV.eh:
+LSFDE2:
+	.set	L$set$5,LEFDE2-LASFDE2
+	.long	L$set$5
+LASFDE2:
+	.long	LASFDE2-EH_frame1
+	.long	.LFB2-.
+	.set L$set$6,.LFE2-.LFB2
+	.long L$set$6
+	.byte	0x0
+	.byte	0x4
+	.set L$set$7,.LCFI2-.LFB2
+	.long L$set$7
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$8,.LCFI3-.LCFI2
+	.long L$set$8
+	.byte	0xd
+	.byte	0x4
+	.align 2
+LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.globl _ffi_closure_raw_SYSV.eh
+_ffi_closure_raw_SYSV.eh:
+LSFDE3:
+	.set	L$set$10,LEFDE3-LASFDE3
+	.long	L$set$10
+LASFDE3:
+	.long	LASFDE3-EH_frame1
+	.long	.LFB3-.
+	.set L$set$11,.LFE3-.LFB3
+	.long L$set$11
+	.byte	0x0
+	.byte	0x4
+	.set L$set$12,.LCFI4-.LFB3
+	.long L$set$12
+	.byte	0xe
+	.byte	0x8
+	.byte	0x84
+	.byte	0x2
+	.byte	0x4
+	.set L$set$13,.LCFI5-.LCFI4
+	.long L$set$13
+	.byte	0xd
+	.byte	0x4
+	.byte	0x4
+	.set L$set$14,.LCFI6-.LCFI5
+	.long L$set$14
+	.byte	0x85
+	.byte	0x3
+	.align 2
+LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
diff --git a/libffi-3.0.4/src/x86/darwin64.S b/libffi-3.0.4/src/x86/darwin64.S
new file mode 100644
index 0000000..5ba0a5f
--- /dev/null
+++ b/libffi-3.0.4/src/x86/darwin64.S
@@ -0,0 +1,415 @@
+/* -----------------------------------------------------------------------
+   darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc.
+   derived from unix64.S
+
+   x86-64 Foreign Function Interface for Darwin.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+	.file "darwin64.S"
+.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+		    void *raddr, void (*fnaddr)());
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	3
+	.globl	_ffi_call_unix64
+
+_ffi_call_unix64:
+LUW0:
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
+LUW1:
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
+
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
+	movq	8(%r10), %rsi
+	movq	16(%r10), %rdx
+	movq	24(%r10), %rcx
+	movq	32(%r10), %r8
+	movq	40(%r10), %r9
+	testl	%eax, %eax
+	jnz	Lload_sse
+Lret_from_load_sse:
+
+	/* Deallocate the reg arg area.  */
+	leaq	176(%r10), %rsp
+
+	/* Call the user function.  */
+	call	*%r11
+
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp
+
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
+LUW2:
+
+	/* The first byte of the flags contains the FFI_TYPE.  */
+	movzbl	%cl, %r10d
+	leaq	Lstore_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+Lstore_table:
+	.long	Lst_void-Lstore_table		/* FFI_TYPE_VOID */
+	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_INT */
+	.long	Lst_float-Lstore_table		/* FFI_TYPE_FLOAT */
+	.long	Lst_double-Lstore_table		/* FFI_TYPE_DOUBLE */
+	.long	Lst_ldouble-Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	Lst_uint8-Lstore_table		/* FFI_TYPE_UINT8 */
+	.long	Lst_sint8-Lstore_table		/* FFI_TYPE_SINT8 */
+	.long	Lst_uint16-Lstore_table		/* FFI_TYPE_UINT16 */
+	.long	Lst_sint16-Lstore_table		/* FFI_TYPE_SINT16 */
+	.long	Lst_uint32-Lstore_table		/* FFI_TYPE_UINT32 */
+	.long	Lst_sint32-Lstore_table		/* FFI_TYPE_SINT32 */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_UINT64 */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_SINT64 */
+	.long	Lst_struct-Lstore_table		/* FFI_TYPE_STRUCT */
+	.long	Lst_int64-Lstore_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align	3
+Lst_void:
+	ret
+	.align	3
+Lst_uint8:
+	movzbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_sint8:
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_uint16:
+	movzwq	%ax, %rax
+	movq	%rax, (%rdi)
+	.align	3
+Lst_sint16:
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_uint32:
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	.align	3
+Lst_sint32:
+	cltq
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_int64:
+	movq	%rax, (%rdi)
+	ret
+	.align	3
+Lst_float:
+	movss	%xmm0, (%rdi)
+	ret
+	.align	3
+Lst_double:
+	movsd	%xmm0, (%rdi)
+	ret
+Lst_ldouble:
+	fstpt	(%rdi)
+	ret
+	.align	3
+Lst_struct:
+	leaq	-20(%rsp), %rsi		/* Scratch area in redzone.  */
+
+	/* We have to locate the values now, and since we don't want to
+	   write too much data into the user's return value, we spill the
+	   value to a 16 byte scratch area first.  Bits 8, 9, and 10
+	   control where the values are located.  Only one of the three
+	   bits will be set; see ffi_prep_cif_machdep for the pattern.  */
+	movd	%xmm0, %r10
+	movd	%xmm1, %r11
+	testl	$0x100, %ecx
+	cmovnz	%rax, %rdx
+	cmovnz	%r10, %rax
+	testl	$0x200, %ecx
+	cmovnz	%r10, %rdx
+	testl	$0x400, %ecx
+	cmovnz	%r10, %rax
+	cmovnz	%r11, %rdx
+	movq	%rax, (%rsi)
+	movq	%rdx, 8(%rsi)
+
+	/* Bits 12-31 contain the true size of the structure.  Copy from
+	   the scratch area to the true destination.  */
+	shrl	$12, %ecx
+	rep movsb
+	ret
+
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.align	3
+LUW3:
+Lload_sse:
+	movdqa	48(%r10), %xmm0
+	movdqa	64(%r10), %xmm1
+	movdqa	80(%r10), %xmm2
+	movdqa	96(%r10), %xmm3
+	movdqa	112(%r10), %xmm4
+	movdqa	128(%r10), %xmm5
+	movdqa	144(%r10), %xmm6
+	movdqa	160(%r10), %xmm7
+	jmp	Lret_from_load_sse
+
+LUW4:
+	.align	3
+	.globl	_ffi_closure_unix64
+
+_ffi_closure_unix64:
+LUW5:
+	/* The carry flag is set by the trampoline iff SSE registers
+	   are used.  Don't clobber it before the branch instruction.  */
+	leaq    -200(%rsp), %rsp
+LUW6:
+	movq	%rdi, (%rsp)
+	movq    %rsi, 8(%rsp)
+	movq    %rdx, 16(%rsp)
+	movq    %rcx, 24(%rsp)
+	movq    %r8, 32(%rsp)
+	movq    %r9, 40(%rsp)
+	jc      Lsave_sse
+Lret_from_save_sse:
+
+	movq	%r10, %rdi
+	leaq	176(%rsp), %rsi
+	movq	%rsp, %rdx
+	leaq	208(%rsp), %rcx
+	call	_ffi_closure_unix64_inner
+
+	/* Deallocate stack frame early; return value is now in redzone.  */
+	addq	$200, %rsp
+LUW7:
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
+	movzbl	%al, %r10d
+	leaq	Lload_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+Lload_table:
+	.long	Lld_void-Lload_table		/* FFI_TYPE_VOID */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_INT */
+	.long	Lld_float-Lload_table		/* FFI_TYPE_FLOAT */
+	.long	Lld_double-Lload_table		/* FFI_TYPE_DOUBLE */
+	.long	Lld_ldouble-Lload_table		/* FFI_TYPE_LONGDOUBLE */
+	.long	Lld_int8-Lload_table		/* FFI_TYPE_UINT8 */
+	.long	Lld_int8-Lload_table		/* FFI_TYPE_SINT8 */
+	.long	Lld_int16-Lload_table		/* FFI_TYPE_UINT16 */
+	.long	Lld_int16-Lload_table		/* FFI_TYPE_SINT16 */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_UINT32 */
+	.long	Lld_int32-Lload_table		/* FFI_TYPE_SINT32 */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_UINT64 */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_SINT64 */
+	.long	Lld_struct-Lload_table		/* FFI_TYPE_STRUCT */
+	.long	Lld_int64-Lload_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align	3
+Lld_void:
+	ret
+	.align	3
+Lld_int8:
+	movzbl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int16:
+	movzwl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int32:
+	movl	-24(%rsp), %eax
+	ret
+	.align	3
+Lld_int64:
+	movq	-24(%rsp), %rax
+	ret
+	.align	3
+Lld_float:
+	movss	-24(%rsp), %xmm0
+	ret
+	.align	3
+Lld_double:
+	movsd	-24(%rsp), %xmm0
+	ret
+	.align	3
+Lld_ldouble:
+	fldt	-24(%rsp)
+	ret
+	.align	3
+Lld_struct:
+	/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+	   %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
+	   both rdx and xmm1 with the second word.  For the remaining,
+	   bit 8 set means xmm0 gets the second word, and bit 9 means
+	   that rax gets the second word.  */
+	movq	-24(%rsp), %rcx
+	movq	-16(%rsp), %rdx
+	movq	-16(%rsp), %xmm1
+	testl	$0x100, %eax
+	cmovnz	%rdx, %rcx
+	movd	%rcx, %xmm0
+	testl	$0x200, %eax
+	movq	-24(%rsp), %rax
+	cmovnz	%rdx, %rax
+	ret
+
+	/* See the comment above Lload_sse; the same logic applies here.  */
+	.align	3
+LUW8:
+Lsave_sse:
+	movdqa	%xmm0, 48(%rsp)
+	movdqa	%xmm1, 64(%rsp)
+	movdqa	%xmm2, 80(%rsp)
+	movdqa	%xmm3, 96(%rsp)
+	movdqa	%xmm4, 112(%rsp)
+	movdqa	%xmm5, 128(%rsp)
+	movdqa	%xmm6, 144(%rsp)
+	movdqa	%xmm7, 160(%rsp)
+	jmp	Lret_from_save_sse
+
+LUW9:
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set	L$set$0,LECIE1-LSCIE1		/* CIE Length */
+	.long	L$set$0
+LSCIE1:
+	.long	0x0		/* CIE Identifier Tag */
+	.byte	0x1		/* CIE Version */
+	.ascii	"zR\0"		/* CIE Augmentation */
+	.byte	0x1		/* uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x78		/* sleb128 -8; CIE Data Alignment Factor */
+	.byte	0x10		/* CIE RA Column */
+	.byte	0x1		/* uleb128 0x1; Augmentation size */
+	.byte	0x10		/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc		/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x7		/* uleb128 0x7 */
+	.byte	0x8		/* uleb128 0x8 */
+	.byte	0x90		/* DW_CFA_offset, column 0x10 */
+	.byte	0x1
+	.align	3
+LECIE1:
+	.globl _ffi_call_unix64.eh
+_ffi_call_unix64.eh:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1	/* FDE Length */
+	.long	L$set$1
+LASFDE1:
+	.long	LASFDE1-EH_frame1	/* FDE CIE offset */
+	.quad	LUW0-.			/* FDE initial location */
+	.set	L$set$2,LUW4-LUW0	/* FDE address range */
+	.quad	L$set$2
+	.byte	0x0			/* Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$3,LUW1-LUW0
+	.long	L$set$3
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	.byte	0xc			/* DW_CFA_def_cfa, %rbp offset 32 */
+	.byte	0x6
+	.byte	0x20
+	.byte	0x80+6			/* DW_CFA_offset, %rbp offset 2*-8 */
+	.byte	0x2
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$4,LUW2-LUW1
+	.long	L$set$4
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x7
+	.byte	0x8
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$5,LUW3-LUW2
+	.long	L$set$5
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align	3
+LEFDE1:
+	.globl _ffi_closure_unix64.eh
+_ffi_closure_unix64.eh:
+LSFDE3:
+	.set	L$set$6,LEFDE3-LASFDE3	/* FDE Length */
+	.long	L$set$6
+LASFDE3:
+	.long	LASFDE3-EH_frame1	/* FDE CIE offset */
+	.quad	LUW5-.			/* FDE initial location */
+	.set	L$set$7,LUW9-LUW5	/* FDE address range */
+	.quad	L$set$7
+	.byte	0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$8,LUW6-LUW5
+	.long	L$set$8
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	208,1			/* uleb128 208 */
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$9,LUW7-LUW6
+	.long	L$set$9
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	0x8
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.set	L$set$10,LUW8-LUW7
+	.long	L$set$10
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align	3
+LEFDE3:
+	.subsections_via_symbols
+
+#endif /* __x86_64__ */
diff --git a/libffi-3.0.4/src/x86/ffi.c b/libffi-3.0.4/src/x86/ffi.c
new file mode 100644
index 0000000..85059d0
--- /dev/null
+++ b/libffi-3.0.4/src/x86/ffi.c
@@ -0,0 +1,475 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007  Red Hat, Inc.
+           Copyright (c) 2002  Ranjit Mathew
+           Copyright (c) 2002  Bo Thorsen
+           Copyright (c) 2002  Roger Sayle
+	   Copyright (C) 2008  Free Software Foundation, Inc.
+
+   x86 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments */
+
+void ffi_prep_args(char *stack, extended_cif *ecif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+  if (ecif->cif->flags == FFI_TYPE_STRUCT)
+    {
+      *(void **) argp = ecif->rvalue;
+      argp += 4;
+    }
+
+  p_argv = ecif->avalue;
+
+  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+       i != 0;
+       i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(int) - 1) & (unsigned) argp)
+	argp = (char *) ALIGN(argp, sizeof(int));
+
+      z = (*p_arg)->size;
+      if (z < sizeof(int))
+	{
+	  z = sizeof(int);
+	  switch ((*p_arg)->type)
+	    {
+	    case FFI_TYPE_SINT8:
+	      *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_UINT8:
+	      *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_SINT16:
+	      *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_UINT16:
+	      *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_SINT32:
+	      *(signed int *) argp = (signed int)*(SINT32 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_UINT32:
+	      *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
+	      break;
+
+	    case FFI_TYPE_STRUCT:
+	      *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
+	      break;
+
+	    default:
+	      FFI_ASSERT(0);
+	    }
+	}
+      else
+	{
+	  memcpy(argp, *p_argv, z);
+	}
+      p_argv++;
+      argp += z;
+    }
+  
+  return;
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  /* Set the return type flag */
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+#ifdef X86
+    case FFI_TYPE_STRUCT:
+#endif
+#if defined(X86) || defined(X86_DARWIN)
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_SINT16:
+#endif
+
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+    case FFI_TYPE_LONGDOUBLE:
+      cif->flags = (unsigned) cif->rtype->type;
+      break;
+
+    case FFI_TYPE_UINT64:
+      cif->flags = FFI_TYPE_SINT64;
+      break;
+
+#ifndef X86
+    case FFI_TYPE_STRUCT:
+      if (cif->rtype->size == 1)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */
+        }
+      else if (cif->rtype->size == 2)
+        {
+          cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */
+        }
+      else if (cif->rtype->size == 4)
+        {
+          cif->flags = FFI_TYPE_INT; /* same as int type */
+        }
+      else if (cif->rtype->size == 8)
+        {
+          cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
+        }
+      else
+        {
+          cif->flags = FFI_TYPE_STRUCT;
+        }
+      break;
+#endif
+
+    default:
+      cif->flags = FFI_TYPE_INT;
+      break;
+    }
+
+#ifdef X86_DARWIN
+  cif->bytes = (cif->bytes + 15) & ~0xF;
+#endif
+
+  return FFI_OK;
+}
+
+extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
+			  unsigned, unsigned, unsigned *, void (*fn)());
+
+#ifdef X86_WIN32
+extern void ffi_call_STDCALL(void (*)(char *, extended_cif *), extended_cif *,
+			  unsigned, unsigned, unsigned *, void (*fn)());
+
+#endif /* X86_WIN32 */
+
+void ffi_call(ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return	*/
+  /* value address then we need to make one		        */
+
+  if ((rvalue == NULL) && 
+      (cif->flags == FFI_TYPE_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
+		    fn);
+      break;
+#ifdef X86_WIN32
+    case FFI_STDCALL:
+      ffi_call_STDCALL(ffi_prep_args, &ecif, cif->bytes, cif->flags,
+		       ecif.rvalue, fn);
+      break;
+#endif /* X86_WIN32 */
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+
+/** private members **/
+
+static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
+					 void** args, ffi_cif* cif);
+void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
+     __attribute__ ((regparm(1)));
+unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
+     __attribute__ ((regparm(1)));
+void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
+#ifdef X86_WIN32
+void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
+     __attribute__ ((regparm(1)));
+#endif
+
+/* This function is jumped to by the trampoline */
+
+unsigned int FFI_HIDDEN
+ffi_closure_SYSV_inner (closure, respp, args)
+     ffi_closure *closure;
+     void **respp;
+     void *args;
+{
+  // our various things...
+  ffi_cif       *cif;
+  void         **arg_area;
+
+  cif         = closure->cif;
+  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+
+  /* this call will initialize ARG_AREA, such that each
+   * element in that array points to the corresponding 
+   * value on the stack; and if the function returns
+   * a structure, it will re-set RESP to point to the
+   * structure return address.  */
+
+  ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+
+  (closure->fun) (cif, *respp, arg_area, closure->user_data);
+
+  return cif->flags;
+}
+
+static void
+ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
+			    ffi_cif *cif)
+{
+  register unsigned int i;
+  register void **p_argv;
+  register char *argp;
+  register ffi_type **p_arg;
+
+  argp = stack;
+
+  if ( cif->flags == FFI_TYPE_STRUCT ) {
+    *rvalue = *(void **) argp;
+    argp += 4;
+  }
+
+  p_argv = avalue;
+
+  for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
+    {
+      size_t z;
+
+      /* Align if necessary */
+      if ((sizeof(int) - 1) & (unsigned) argp) {
+	argp = (char *) ALIGN(argp, sizeof(int));
+      }
+
+      z = (*p_arg)->size;
+
+      /* because we're little endian, this is what it turns into.   */
+
+      *p_argv = (void*) argp;
+
+      p_argv++;
+      argp += z;
+    }
+  
+  return;
+}
+
+/* How to make a trampoline.  Derived from gcc/config/i386/i386.c. */
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
+({ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10);	\
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe9; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
+ })
+
+#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE)  \
+({ unsigned char *__tramp = (unsigned char*)(TRAMP); \
+   unsigned int  __fun = (unsigned int)(FUN); \
+   unsigned int  __ctx = (unsigned int)(CTX); \
+   unsigned int  __dis = __fun - (__ctx + 10); \
+   unsigned short __size = (unsigned short)(SIZE); \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe8; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* call __fun  */ \
+   *(unsigned char *)  &__tramp[10] = 0xc2; \
+   *(unsigned short*)  &__tramp[11] = __size; /* ret __size  */ \
+ })
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*,void*,void**,void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  if (cif->abi == FFI_SYSV)
+    {
+      FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+                           &ffi_closure_SYSV,
+                           (void*)closure);
+    }
+#ifdef X86_WIN32
+  else if (cif->abi == FFI_STDCALL)
+    {
+      FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0],
+                                   &ffi_closure_STDCALL,
+                                   (void*)closure, cif->bytes);
+    }
+#endif
+  else
+    {
+      return FFI_BAD_ABI;
+    }
+    
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+/* ------- Native raw API support -------------------------------- */
+
+#if !FFI_NO_RAW_API
+
+ffi_status
+ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
+			  ffi_cif* cif,
+			  void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
+			  void *user_data,
+			  void *codeloc)
+{
+  int i;
+
+  if (cif->abi != FFI_SYSV) {
+    return FFI_BAD_ABI;
+  }
+
+  // we currently don't support certain kinds of arguments for raw
+  // closures.  This should be implemented by a separate assembly language
+  // routine, since it would require argument processing, something we
+  // don't do now for performance.
+
+  for (i = cif->nargs-1; i >= 0; i--)
+    {
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
+      FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
+    }
+  
+
+  FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
+		       codeloc);
+    
+  closure->cif  = cif;
+  closure->user_data = user_data;
+  closure->fun  = fun;
+
+  return FFI_OK;
+}
+
+static void 
+ffi_prep_args_raw(char *stack, extended_cif *ecif)
+{
+  memcpy (stack, ecif->avalue, ecif->cif->bytes);
+}
+
+/* we borrow this routine from libffi (it must be changed, though, to
+ * actually call the function passed in the first argument.  as of
+ * libffi-1.20, this is not the case.)
+ */
+
+extern void
+ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, unsigned, 
+	      unsigned, unsigned *, void (*fn)());
+
+#ifdef X86_WIN32
+extern void
+ffi_call_STDCALL(void (*)(char *, extended_cif *), extended_cif *, unsigned,
+		 unsigned, unsigned *, void (*fn)());
+#endif /* X86_WIN32 */
+
+void
+ffi_raw_call(ffi_cif *cif, void (*fn)(), void *rvalue, ffi_raw *fake_avalue)
+{
+  extended_cif ecif;
+  void **avalue = (void **)fake_avalue;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+  
+  /* If the return value is a struct and we don't have a return	*/
+  /* value address then we need to make one		        */
+
+  if ((rvalue == NULL) && 
+      (cif->rtype->type == FFI_TYPE_STRUCT))
+    {
+      ecif.rvalue = alloca(cif->rtype->size);
+    }
+  else
+    ecif.rvalue = rvalue;
+    
+  
+  switch (cif->abi) 
+    {
+    case FFI_SYSV:
+      ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+		    ecif.rvalue, fn);
+      break;
+#ifdef X86_WIN32
+    case FFI_STDCALL:
+      ffi_call_STDCALL(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
+		       ecif.rvalue, fn);
+      break;
+#endif /* X86_WIN32 */
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+#endif
+
+#endif /* __x86_64__  */
diff --git a/libffi-3.0.4/src/x86/ffi64.c b/libffi-3.0.4/src/x86/ffi64.c
new file mode 100644
index 0000000..5162f69
--- /dev/null
+++ b/libffi-3.0.4/src/x86/ffi64.c
@@ -0,0 +1,571 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
+   
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef __x86_64__
+
+#define MAX_GPR_REGS 6
+#define MAX_SSE_REGS 8
+
+struct register_args
+{
+  /* Registers for argument passing.  */
+  UINT64 gpr[MAX_GPR_REGS];
+  __int128_t sse[MAX_SSE_REGS];
+};
+
+extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+			     void *raddr, void (*fnaddr)(), unsigned ssecount);
+
+/* All reference to register classes here is identical to the code in
+   gcc/config/i386/i386.c. Do *not* change one without the other.  */
+
+/* Register class used for passing given 64bit part of the argument.
+   These represent classes as documented by the PS ABI, with the exception
+   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
+   use SF or DFmode move instead of DImode to avoid reformating penalties.
+
+   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+   whenever possible (upper half does contain padding).  */
+enum x86_64_reg_class
+  {
+    X86_64_NO_CLASS,
+    X86_64_INTEGER_CLASS,
+    X86_64_INTEGERSI_CLASS,
+    X86_64_SSE_CLASS,
+    X86_64_SSESF_CLASS,
+    X86_64_SSEDF_CLASS,
+    X86_64_SSEUP_CLASS,
+    X86_64_X87_CLASS,
+    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
+    X86_64_MEMORY_CLASS
+  };
+
+#define MAX_CLASSES 4
+
+#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
+
+/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
+   of this code is to classify each 8bytes of incoming argument by the register
+   class and assign registers accordingly.  */
+
+/* Return the union class of CLASS1 and CLASS2.
+   See the x86-64 PS ABI for details.  */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+  /* Rule #1: If both classes are equal, this is the resulting class.  */
+  if (class1 == class2)
+    return class1;
+
+  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+     the other class.  */
+  if (class1 == X86_64_NO_CLASS)
+    return class2;
+  if (class2 == X86_64_NO_CLASS)
+    return class1;
+
+  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
+  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_INTEGERSI_CLASS;
+  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+    return X86_64_INTEGER_CLASS;
+
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #6: Otherwise class SSE is used.  */
+  return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+   CLASSES will be filled by the register class used to pass each word
+   of the operand.  The number of words is returned.  In case the parameter
+   should be passed in memory, 0 is returned. As a special case for zero
+   sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+   See the x86-64 PS ABI for details.
+*/
+static int
+classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
+		   size_t byte_offset)
+{
+  switch (type->type)
+    {
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_POINTER:
+      if (byte_offset + type->size <= 4)
+	classes[0] = X86_64_INTEGERSI_CLASS;
+      else
+	classes[0] = X86_64_INTEGER_CLASS;
+      return 1;
+    case FFI_TYPE_FLOAT:
+      if (byte_offset == 0)
+	classes[0] = X86_64_SSESF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case FFI_TYPE_DOUBLE:
+      classes[0] = X86_64_SSEDF_CLASS;
+      return 1;
+    case FFI_TYPE_LONGDOUBLE:
+      classes[0] = X86_64_X87_CLASS;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
+    case FFI_TYPE_STRUCT:
+      {
+	const int UNITS_PER_WORD = 8;
+	int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	ffi_type **ptr; 
+	int i;
+	enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+	/* If the struct is larger than 16 bytes, pass it on the stack.  */
+	if (type->size > 16)
+	  return 0;
+
+	for (i = 0; i < words; i++)
+	  classes[i] = X86_64_NO_CLASS;
+
+	/* Merge the fields of structure.  */
+	for (ptr = type->elements; *ptr != NULL; ptr++)
+	  {
+	    int num;
+
+	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
+
+	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
+	    if (num == 0)
+	      return 0;
+	    for (i = 0; i < num; i++)
+	      {
+		int pos = byte_offset / 8;
+		classes[i + pos] =
+		  merge_classes (subclasses[i], classes[i + pos]);
+	      }
+
+	    byte_offset += (*ptr)->size;
+	  }
+
+	/* Final merger cleanup.  */
+	for (i = 0; i < words; i++)
+	  {
+	    /* If one class is MEMORY, everything should be passed in
+	       memory.  */
+	    if (classes[i] == X86_64_MEMORY_CLASS)
+	      return 0;
+
+	    /* The X86_64_SSEUP_CLASS should be always preceded by
+	       X86_64_SSE_CLASS.  */
+	    if (classes[i] == X86_64_SSEUP_CLASS
+		&& (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
+	      classes[i] = X86_64_SSE_CLASS;
+
+	    /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
+	    if (classes[i] == X86_64_X87UP_CLASS
+		&& (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
+	      classes[i] = X86_64_SSE_CLASS;
+	  }
+	return words;
+      }
+
+    default:
+      FFI_ASSERT(0);
+    }
+  return 0; /* Never reached.  */
+}
+
+/* Examine the argument and return set number of register required in each
+   class.  Return zero iff parameter should be passed in memory, otherwise
+   the number of registers.  */
+
+static int
+examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
+		  _Bool in_return, int *pngpr, int *pnsse)
+{
+  int i, n, ngpr, nsse;
+
+  n = classify_argument (type, classes, 0);
+  if (n == 0)
+    return 0;
+
+  ngpr = nsse = 0;
+  for (i = 0; i < n; ++i)
+    switch (classes[i])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	ngpr++;
+	break;
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	nsse++;
+	break;
+      case X86_64_NO_CLASS:
+      case X86_64_SSEUP_CLASS:
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_X87UP_CLASS:
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return != 0;
+      default:
+	abort ();
+      }
+
+  *pngpr = ngpr;
+  *pnsse = nsse;
+
+  return n;
+}
+
+/* Perform machine dependent cif processing.  */
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  size_t bytes;
+
+  gprcount = ssecount = 0;
+
+  flags = cif->rtype->type;
+  if (flags != FFI_TYPE_VOID)
+    {
+      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value is passed in memory.  A pointer to that
+	     memory is the first argument.  Allocate a register for it.  */
+	  gprcount++;
+	  /* We don't have to do anything in asm for the return.  */
+	  flags = FFI_TYPE_VOID;
+	}
+      else if (flags == FFI_TYPE_STRUCT)
+	{
+	  /* Mark which registers the result appears in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+	  if (sse0 && !sse1)
+	    flags |= 1 << 8;
+	  else if (!sse0 && sse1)
+	    flags |= 1 << 9;
+	  else if (sse0 && sse1)
+	    flags |= 1 << 10;
+	  /* Mark the true size of the structure.  */
+	  flags |= cif->rtype->size << 12;
+	}
+    }
+
+  /* Go over all arguments and determine the way they should be passed.
+     If it's in a register and there is space for it, let that be so. If
+     not, add it's size to the stack byte count.  */
+  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = cif->arg_types[i]->alignment;
+
+	  if (align < 8)
+	    align = 8;
+
+	  bytes = ALIGN(bytes, align);
+	  bytes += cif->arg_types[i]->size;
+	}
+      else
+	{
+	  gprcount += ngpr;
+	  ssecount += nsse;
+	}
+    }
+  if (ssecount)
+    flags |= 1 << 11;
+  cif->flags = flags;
+  cif->bytes = bytes;
+
+  return FFI_OK;
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
+{
+  enum x86_64_reg_class classes[MAX_CLASSES];
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int gprcount, ssecount, ngpr, nsse, i, avn;
+  _Bool ret_in_memory;
+  struct register_args *reg_args;
+
+  /* Can't call 32-bit mode from 64-bit mode.  */
+  FFI_ASSERT (cif->abi == FFI_UNIX64);
+
+  /* If the return value is a struct and we don't have a return value
+     address then we need to make one.  Note the setting of flags to
+     VOID above in ffi_prep_cif_machdep.  */
+  ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
+		   && (cif->flags & 0xff) == FFI_TYPE_VOID);
+  if (rvalue == NULL && ret_in_memory)
+    rvalue = alloca (cif->rtype->size);
+
+  /* Allocate the space for the arguments, plus 4 words of temp space.  */
+  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
+  reg_args = (struct register_args *) stack;
+  argp = stack + sizeof (struct register_args);
+
+  gprcount = ssecount = 0;
+
+  /* If the return value is passed in memory, add the pointer as the
+     first integer argument.  */
+  if (ret_in_memory)
+    reg_args->gpr[gprcount++] = (long) rvalue;
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0; i < avn; ++i)
+    {
+      size_t size = arg_types[i]->size;
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  memcpy (argp, avalue[i], size);
+	  argp += size;
+	}
+      else
+	{
+	  /* The argument is passed entirely in registers.  */
+	  char *a = (char *) avalue[i];
+	  int j;
+
+	  for (j = 0; j < n; j++, a += 8, size -= 8)
+	    {
+	      switch (classes[j])
+		{
+		case X86_64_INTEGER_CLASS:
+		case X86_64_INTEGERSI_CLASS:
+		  reg_args->gpr[gprcount] = 0;
+		  memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+		  gprcount++;
+		  break;
+		case X86_64_SSE_CLASS:
+		case X86_64_SSEDF_CLASS:
+		  reg_args->sse[ssecount++] = *(UINT64 *) a;
+		  break;
+		case X86_64_SSESF_CLASS:
+		  reg_args->sse[ssecount++] = *(UINT32 *) a;
+		  break;
+		default:
+		  abort();
+		}
+	    }
+	}
+    }
+
+  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
+		   cif->flags, rvalue, fn, ssecount);
+}
+
+
+extern void ffi_closure_unix64(void);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  volatile unsigned short *tramp;
+
+  tramp = (volatile unsigned short *) &closure->tramp[0];
+
+  tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
+  *(void * volatile *) &tramp[1] = ffi_closure_unix64;
+  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
+  *(void * volatile *) &tramp[6] = codeloc;
+
+  /* Set the carry bit iff the function uses any sse registers.
+     This is clc or stc, together with the first byte of the jmp.  */
+  tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+
+  tramp[11] = 0xe3ff;			/* jmp *%r11    */
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+int
+ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
+			 struct register_args *reg_args, char *argp)
+{
+  ffi_cif *cif;
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn;
+  int gprcount, ssecount, ngpr, nsse;
+  int ret;
+
+  cif = closure->cif;
+  avalue = alloca(cif->nargs * sizeof(void *));
+  gprcount = ssecount = 0;
+
+  ret = cif->rtype->type;
+  if (ret != FFI_TYPE_VOID)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+      if (n == 0)
+	{
+	  /* The return value goes in memory.  Arrange for the closure
+	     return value to go directly back to the original caller.  */
+	  rvalue = (void *) reg_args->gpr[gprcount++];
+	  /* We don't have to do anything in asm for the return.  */
+	  ret = FFI_TYPE_VOID;
+	}
+      else if (ret == FFI_TYPE_STRUCT && n == 2)
+	{
+	  /* Mark which register the second word of the structure goes in.  */
+	  _Bool sse0 = SSE_CLASS_P (classes[0]);
+	  _Bool sse1 = SSE_CLASS_P (classes[1]);
+	  if (!sse0 && sse1)
+	    ret |= 1 << 8;
+	  else if (sse0 && !sse1)
+	    ret |= 1 << 9;
+	}
+    }
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+  
+  for (i = 0; i < avn; ++i)
+    {
+      enum x86_64_reg_class classes[MAX_CLASSES];
+      int n;
+
+      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
+      if (n == 0
+	  || gprcount + ngpr > MAX_GPR_REGS
+	  || ssecount + nsse > MAX_SSE_REGS)
+	{
+	  long align = arg_types[i]->alignment;
+
+	  /* Stack arguments are *always* at least 8 byte aligned.  */
+	  if (align < 8)
+	    align = 8;
+
+	  /* Pass this argument in memory.  */
+	  argp = (void *) ALIGN (argp, align);
+	  avalue[i] = argp;
+	  argp += arg_types[i]->size;
+	}
+      /* If the argument is in a single register, or two consecutive
+	 registers, then we can use that address directly.  */
+      else if (n == 1
+	       || (n == 2
+		   && SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
+	{
+	  /* The argument is in a single register.  */
+	  if (SSE_CLASS_P (classes[0]))
+	    {
+	      avalue[i] = &reg_args->sse[ssecount];
+	      ssecount += n;
+	    }
+	  else
+	    {
+	      avalue[i] = &reg_args->gpr[gprcount];
+	      gprcount += n;
+	    }
+	}
+      /* Otherwise, allocate space to make them consecutive.  */
+      else
+	{
+	  char *a = alloca (16);
+	  int j;
+
+	  avalue[i] = a;
+	  for (j = 0; j < n; j++, a += 8)
+	    {
+	      if (SSE_CLASS_P (classes[j]))
+		memcpy (a, &reg_args->sse[ssecount++], 8);
+	      else
+		memcpy (a, &reg_args->gpr[gprcount++], 8);
+	    }
+	}
+    }
+
+  /* Invoke the closure.  */
+  closure->fun (cif, rvalue, avalue, closure->user_data);
+
+  /* Tell assembly how to perform return type promotions.  */
+  return ret;
+}
+
+#endif /* __x86_64__ */
diff --git a/libffi-3.0.4/src/x86/ffitarget.h b/libffi-3.0.4/src/x86/ffitarget.h
new file mode 100644
index 0000000..8178d06
--- /dev/null
+++ b/libffi-3.0.4/src/x86/ffitarget.h
@@ -0,0 +1,90 @@
+/* -----------------------------------------------------------------*-C-*-
+   ffitarget.h - Copyright (c) 1996-2003  Red Hat, Inc.
+   Copyright (C) 2008  Free Software Foundation, Inc.
+
+   Target configuration macros for x86 and x86-64.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+/* ---- System specific configurations ----------------------------------- */
+
+#if defined (X86_64) && defined (__i386__)
+#undef X86_64
+#define X86
+#endif
+
+/* ---- Generic type definitions ----------------------------------------- */
+
+#ifndef LIBFFI_ASM
+typedef unsigned long          ffi_arg;
+typedef signed long            ffi_sarg;
+
+typedef enum ffi_abi {
+  FFI_FIRST_ABI = 0,
+
+  /* ---- Intel x86 Win32 ---------- */
+#ifdef X86_WIN32
+  FFI_SYSV,
+  FFI_STDCALL,
+  /* TODO: Add fastcall support for the sake of completeness */
+  FFI_DEFAULT_ABI = FFI_SYSV,
+#endif
+
+  /* ---- Intel x86 and AMD x86-64 - */
+#if !defined(X86_WIN32) && (defined(__i386__) || defined(__x86_64__))
+  FFI_SYSV,
+  FFI_UNIX64,   /* Unix variants all use the same ABI for x86-64  */
+#ifdef __i386__
+  FFI_DEFAULT_ABI = FFI_SYSV,
+#else
+  FFI_DEFAULT_ABI = FFI_UNIX64,
+#endif
+#endif
+
+  FFI_LAST_ABI = FFI_DEFAULT_ABI + 1
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
+#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
+
+#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_NATIVE_RAW_API 0
+#else
+#ifdef X86_WIN32
+#define FFI_TRAMPOLINE_SIZE 13
+#else
+#define FFI_TRAMPOLINE_SIZE 10
+#endif
+#define FFI_NATIVE_RAW_API 1	/* x86 has native raw api support */
+#endif
+
+#endif
+
diff --git a/libffi-3.0.4/src/x86/freebsd.S b/libffi-3.0.4/src/x86/freebsd.S
new file mode 100644
index 0000000..afde513
--- /dev/null
+++ b/libffi-3.0.4/src/x86/freebsd.S
@@ -0,0 +1,458 @@
+/* -----------------------------------------------------------------------
+   freebsd.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005  Red Hat, Inc.
+	       Copyright (c) 2008  Björn König
+	
+   X86 Foreign Function Interface for FreeBSD
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl ffi_prep_args
+
+	.align 4
+.globl ffi_call_SYSV
+        .type    ffi_call_SYSV,@function
+
+ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $8,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+        jmp   epilogue
+
+0:
+	call  1f
+
+.Lstore_table:
+	.long	noretval-.Lstore_table	/* FFI_TYPE_VOID */
+	.long	retint-.Lstore_table	/* FFI_TYPE_INT */
+	.long	retfloat-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	retdouble-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	retlongdouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	retuint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	retsint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	retuint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	retsint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	retstruct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	retint-.Lstore_table	/* FFI_TYPE_POINTER */
+	.long   retstruct1b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_1B */
+	.long   retstruct2b-.Lstore_table	/* FFI_TYPE_SMALL_STRUCT_2B */
+
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpt (%ecx)
+	jmp   epilogue
+	
+retint64:	
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+	
+retstruct1b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movb  %al,0(%ecx)
+	jmp   epilogue
+
+retstruct2b:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx
+	movw  %ax,0(%ecx)
+	jmp   epilogue
+
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+        popl %esi
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.LFE1:
+.ffi_call_SYSV_end:
+        .size    ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl ffi_closure_SYSV
+	.type	ffi_closure_SYSV, @function
+
+ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
+	call	ffi_closure_SYSV_inner
+#else
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	1f
+1:	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+	call	ffi_closure_SYSV_inner@PLT
+	movl	8(%esp), %ebx
+#endif
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+	
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, %eax
+	je	.Lcls_retstruct1b
+	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, %eax
+	je	.Lcls_retstruct2b
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct1b:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct2b:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$4
+.LFE2:
+	.size	ffi_closure_SYSV, .-ffi_closure_SYSV
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl ffi_closure_raw_SYSV
+	.type	ffi_closure_raw_SYSV, @function
+
+ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+	.size	ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
+#endif
+
+	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
+.Lframe1:
+	.long	.LECIE1-.LSCIE1	/* Length of Common Information Entry */
+.LSCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	0x1b	/* FDE Encoding (pcrel sdata4) */
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB1-.	/* FDE initial location */
+#else
+	.long	.LFB1	/* FDE initial location */
+#endif
+	.long	.LFE1-.LFB1	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.align 4
+.LEFDE1:
+.LSFDE2:
+	.long	.LEFDE2-.LASFDE2	/* FDE Length */
+.LASFDE2:
+	.long	.LASFDE2-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB2-.	/* FDE initial location */
+#else
+	.long	.LFB2
+#endif
+	.long	.LFE2-.LFB2	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI2-.LFB2
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI3-.LCFI2
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI3
+	.byte	0x83	/* DW_CFA_offset, column 0x3 */
+	.byte	0xa	/* .uleb128 0xa */
+#endif
+	.align 4
+.LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB3-.	/* FDE initial location */
+#else
+	.long	.LFB3
+#endif
+	.long	.LFE3-.LFB3	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LCFI5
+	.byte	0x86	/* DW_CFA_offset, column 0x6 */
+	.byte	0x3	/* .uleb128 0x3 */
+	.align 4
+.LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
diff --git a/libffi-3.0.4/src/x86/sysv.S b/libffi-3.0.4/src/x86/sysv.S
new file mode 100644
index 0000000..de08209
--- /dev/null
+++ b/libffi-3.0.4/src/x86/sysv.S
@@ -0,0 +1,433 @@
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005  Red Hat, Inc.
+   
+   X86 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
+	ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+.globl ffi_prep_args
+
+	.align 4
+.globl ffi_call_SYSV
+        .type    ffi_call_SYSV,@function
+
+ffi_call_SYSV:
+.LFB1:
+        pushl %ebp
+.LCFI0:
+        movl  %esp,%ebp
+.LCFI1:
+	/* Make room for all of the new args.  */
+	movl  16(%ebp),%ecx
+	subl  %ecx,%esp
+
+	movl  %esp,%eax
+
+	/* Place all of the ffi_prep_args in position  */
+	pushl 12(%ebp)
+	pushl %eax
+	call  *8(%ebp)
+
+	/* Return stack to previous state and call the function  */
+	addl  $8,%esp	
+
+	call  *28(%ebp)
+
+	/* Load %ecx with the return type code  */
+	movl  20(%ebp),%ecx	
+
+	/* Protect %esi.  We're going to pop it in the epilogue.  */
+	pushl %esi
+
+	/* If the return value pointer is NULL, assume no return value.  */
+	cmpl  $0,24(%ebp)
+	jne  0f
+
+	/* Even if there is no space for the return value, we are 
+	   obliged to handle floating-point values.  */
+	cmpl  $FFI_TYPE_FLOAT,%ecx
+	jne   noretval
+	fstp  %st(0)
+
+        jmp   epilogue
+
+0:
+	call  1f
+
+.Lstore_table:
+	.long	noretval-.Lstore_table	/* FFI_TYPE_VOID */
+	.long	retint-.Lstore_table	/* FFI_TYPE_INT */
+	.long	retfloat-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	retdouble-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	retlongdouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	retuint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	retsint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	retuint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	retsint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	retint-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	retint64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	retstruct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	retint-.Lstore_table	/* FFI_TYPE_POINTER */
+
+1:
+	pop  %esi
+	add  (%esi, %ecx, 4), %esi
+	jmp  *%esi
+
+	/* Sign/zero extend as appropriate.  */
+retsint8:
+	movsbl  %al, %eax
+	jmp  retint
+
+retsint16:
+	movswl  %ax, %eax
+	jmp  retint
+
+retuint8:
+	movzbl  %al, %eax
+	jmp  retint
+
+retuint16:
+	movzwl  %ax, %eax
+	jmp  retint
+
+retfloat:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstps (%ecx)
+	jmp   epilogue
+
+retdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpl (%ecx)
+	jmp   epilogue
+
+retlongdouble:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	fstpt (%ecx)
+	jmp   epilogue
+	
+retint64:	
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+	movl  %edx,4(%ecx)
+	jmp   epilogue
+	
+retint:
+	/* Load %ecx with the pointer to storage for the return value  */
+	movl  24(%ebp),%ecx	
+	movl  %eax,0(%ecx)
+
+retstruct:
+	/* Nothing to do!  */
+
+noretval:
+epilogue:
+        popl %esi
+        movl %ebp,%esp
+        popl %ebp
+        ret
+.LFE1:
+.ffi_call_SYSV_end:
+        .size    ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
+
+	.align	4
+FFI_HIDDEN (ffi_closure_SYSV)
+.globl ffi_closure_SYSV
+	.type	ffi_closure_SYSV, @function
+
+ffi_closure_SYSV:
+.LFB2:
+	pushl	%ebp
+.LCFI2:
+	movl	%esp, %ebp
+.LCFI3:
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
+	call	ffi_closure_SYSV_inner
+#else
+	movl	%ebx, 8(%esp)
+.LCFI7:
+	call	1f
+1:	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+	call	ffi_closure_SYSV_inner@PLT
+	movl	8(%esp), %ebx
+#endif
+	movl	-12(%ebp), %ecx
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lcls_retint
+	
+0:	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	.Lcls_retstruct
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret	$4
+.LFE2:
+	.size	ffi_closure_SYSV, .-ffi_closure_SYSV
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+	.align	4
+FFI_HIDDEN (ffi_closure_raw_SYSV)
+.globl ffi_closure_raw_SYSV
+	.type	ffi_closure_raw_SYSV, @function
+
+ffi_closure_raw_SYSV:
+.LFB3:
+	pushl	%ebp
+.LCFI4:
+	movl	%esp, %ebp
+.LCFI5:
+	pushl	%esi
+.LCFI6:
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+
+	/* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
+	   FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32.  */
+	cmpl	$FFI_TYPE_UINT64, %eax
+	jge	0f
+	cmpl	$FFI_TYPE_UINT8, %eax
+	jge	.Lrcls_retint
+0:
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.LFE3:
+	.size	ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
+#endif
+
+	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
+.Lframe1:
+	.long	.LECIE1-.LSCIE1	/* Length of Common Information Entry */
+.LSCIE1:
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+#ifdef __PIC__
+	.ascii "zR\0"	/* CIE Augmentation */
+#else
+	.ascii "\0"	/* CIE Augmentation */
+#endif
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+#ifdef __PIC__
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	0x1b	/* FDE Encoding (pcrel sdata4) */
+#endif
+	.byte	0xc	/* DW_CFA_def_cfa */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB1-.	/* FDE initial location */
+#else
+	.long	.LFB1	/* FDE initial location */
+#endif
+	.long	.LFE1-.LFB1	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI0-.LFB1
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI1-.LCFI0
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.align 4
+.LEFDE1:
+.LSFDE2:
+	.long	.LEFDE2-.LASFDE2	/* FDE Length */
+.LASFDE2:
+	.long	.LASFDE2-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB2-.	/* FDE initial location */
+#else
+	.long	.LFB2
+#endif
+	.long	.LFE2-.LFB2	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI2-.LFB2
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI3-.LCFI2
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI7-.LCFI3
+	.byte	0x83	/* DW_CFA_offset, column 0x3 */
+	.byte	0xa	/* .uleb128 0xa */
+#endif
+	.align 4
+.LEFDE2:
+
+#if !FFI_NO_RAW_API
+
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+#ifdef __PIC__
+	.long	.LFB3-.	/* FDE initial location */
+#else
+	.long	.LFB3
+#endif
+	.long	.LFE3-.LFB3	/* FDE address range */
+#ifdef __PIC__
+	.byte	0x0	/* .uleb128 0x0; Augmentation size */
+#endif
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI4-.LFB3
+	.byte	0xe	/* DW_CFA_def_cfa_offset */
+	.byte	0x8	/* .uleb128 0x8 */
+	.byte	0x85	/* DW_CFA_offset, column 0x5 */
+	.byte	0x2	/* .uleb128 0x2 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI5-.LCFI4
+	.byte	0xd	/* DW_CFA_def_cfa_register */
+	.byte	0x5	/* .uleb128 0x5 */
+	.byte	0x4	/* DW_CFA_advance_loc4 */
+	.long	.LCFI6-.LCFI5
+	.byte	0x86	/* DW_CFA_offset, column 0x6 */
+	.byte	0x3	/* .uleb128 0x3 */
+	.align 4
+.LEFDE3:
+
+#endif
+
+#endif /* ifndef __x86_64__ */
diff --git a/libffi-3.0.4/src/x86/unix64.S b/libffi-3.0.4/src/x86/unix64.S
new file mode 100644
index 0000000..cdc065f
--- /dev/null
+++ b/libffi-3.0.4/src/x86/unix64.S
@@ -0,0 +1,413 @@
+/* -----------------------------------------------------------------------
+   unix64.S - Copyright (c) 2002  Bo Thorsen <bo@suse.de>
+
+   x86-64 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
+	ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __x86_64__
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+
+/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
+	            void *raddr, void (*fnaddr)());
+
+   Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	2
+	.globl	ffi_call_unix64
+	.type	ffi_call_unix64,@function
+
+ffi_call_unix64:
+.LUW0:
+	movq	(%rsp), %r10		/* Load return address.  */
+	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
+	movq	%rdx, (%rax)		/* Save flags.  */
+	movq	%rcx, 8(%rax)		/* Save raddr.  */
+	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
+	movq	%r10, 24(%rax)		/* Relocate return address.  */
+	movq	%rax, %rbp		/* Finalize local stack frame.  */
+.LUW1:
+	movq	%rdi, %r10		/* Save a copy of the register area. */
+	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
+
+	/* Load up all argument registers.  */
+	movq	(%r10), %rdi
+	movq	8(%r10), %rsi
+	movq	16(%r10), %rdx
+	movq	24(%r10), %rcx
+	movq	32(%r10), %r8
+	movq	40(%r10), %r9
+	testl	%eax, %eax
+	jnz	.Lload_sse
+.Lret_from_load_sse:
+
+	/* Deallocate the reg arg area.  */
+	leaq	176(%r10), %rsp
+
+	/* Call the user function.  */
+	call	*%r11
+
+	/* Deallocate stack arg area; local stack frame in redzone.  */
+	leaq	24(%rbp), %rsp
+
+	movq	0(%rbp), %rcx		/* Reload flags.  */
+	movq	8(%rbp), %rdi		/* Reload raddr.  */
+	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
+.LUW2:
+
+	/* The first byte of the flags contains the FFI_TYPE.  */
+	movzbl	%cl, %r10d
+	leaq	.Lstore_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+	.section .rodata
+.Lstore_table:
+	.long	.Lst_void-.Lstore_table		/* FFI_TYPE_VOID */
+	.long	.Lst_sint32-.Lstore_table	/* FFI_TYPE_INT */
+	.long	.Lst_float-.Lstore_table	/* FFI_TYPE_FLOAT */
+	.long	.Lst_double-.Lstore_table	/* FFI_TYPE_DOUBLE */
+	.long	.Lst_ldouble-.Lstore_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lst_uint8-.Lstore_table	/* FFI_TYPE_UINT8 */
+	.long	.Lst_sint8-.Lstore_table	/* FFI_TYPE_SINT8 */
+	.long	.Lst_uint16-.Lstore_table	/* FFI_TYPE_UINT16 */
+	.long	.Lst_sint16-.Lstore_table	/* FFI_TYPE_SINT16 */
+	.long	.Lst_uint32-.Lstore_table	/* FFI_TYPE_UINT32 */
+	.long	.Lst_sint32-.Lstore_table	/* FFI_TYPE_SINT32 */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_UINT64 */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_SINT64 */
+	.long	.Lst_struct-.Lstore_table	/* FFI_TYPE_STRUCT */
+	.long	.Lst_int64-.Lstore_table	/* FFI_TYPE_POINTER */
+
+	.text
+	.align 2
+.Lst_void:
+	ret
+	.align 2
+
+.Lst_uint8:
+	movzbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_sint8:
+	movsbq	%al, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_uint16:
+	movzwq	%ax, %rax
+	movq	%rax, (%rdi)
+	.align 2
+.Lst_sint16:
+	movswq	%ax, %rax
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_uint32:
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	.align 2
+.Lst_sint32:
+	cltq
+	movq	%rax, (%rdi)
+	ret
+	.align 2
+.Lst_int64:
+	movq	%rax, (%rdi)
+	ret
+
+	.align 2
+.Lst_float:
+	movss	%xmm0, (%rdi)
+	ret
+	.align 2
+.Lst_double:
+	movsd	%xmm0, (%rdi)
+	ret
+.Lst_ldouble:
+	fstpt	(%rdi)
+	ret
+
+	.align 2
+.Lst_struct:
+	leaq	-20(%rsp), %rsi		/* Scratch area in redzone.  */
+
+	/* We have to locate the values now, and since we don't want to
+	   write too much data into the user's return value, we spill the
+	   value to a 16 byte scratch area first.  Bits 8, 9, and 10
+	   control where the values are located.  Only one of the three
+	   bits will be set; see ffi_prep_cif_machdep for the pattern.  */
+	movd	%xmm0, %r10
+	movd	%xmm1, %r11
+	testl	$0x100, %ecx
+	cmovnz	%rax, %rdx
+	cmovnz	%r10, %rax
+	testl	$0x200, %ecx
+	cmovnz	%r10, %rdx
+	testl	$0x400, %ecx
+	cmovnz	%r10, %rax
+	cmovnz	%r11, %rdx
+	movq	%rax, (%rsi)
+	movq	%rdx, 8(%rsi)
+
+	/* Bits 12-31 contain the true size of the structure.  Copy from
+	   the scratch area to the true destination.  */
+	shrl	$12, %ecx
+	rep movsb
+	ret
+
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.align 2
+.LUW3:
+.Lload_sse:
+	movdqa	48(%r10), %xmm0
+	movdqa	64(%r10), %xmm1
+	movdqa	80(%r10), %xmm2
+	movdqa	96(%r10), %xmm3
+	movdqa	112(%r10), %xmm4
+	movdqa	128(%r10), %xmm5
+	movdqa	144(%r10), %xmm6
+	movdqa	160(%r10), %xmm7
+	jmp	.Lret_from_load_sse
+
+.LUW4:
+	.size    ffi_call_unix64,.-ffi_call_unix64
+
+	.align	2
+	.globl ffi_closure_unix64
+	.type	ffi_closure_unix64,@function
+
+ffi_closure_unix64:
+.LUW5:
+	/* The carry flag is set by the trampoline iff SSE registers
+	   are used.  Don't clobber it before the branch instruction.  */
+	leaq    -200(%rsp), %rsp
+.LUW6:
+	movq	%rdi, (%rsp)
+	movq    %rsi, 8(%rsp)
+	movq    %rdx, 16(%rsp)
+	movq    %rcx, 24(%rsp)
+	movq    %r8, 32(%rsp)
+	movq    %r9, 40(%rsp)
+	jc      .Lsave_sse
+.Lret_from_save_sse:
+
+	movq	%r10, %rdi
+	leaq	176(%rsp), %rsi
+	movq	%rsp, %rdx
+	leaq	208(%rsp), %rcx
+	call	ffi_closure_unix64_inner@PLT
+
+	/* Deallocate stack frame early; return value is now in redzone.  */
+	addq	$200, %rsp
+.LUW7:
+
+	/* The first byte of the return value contains the FFI_TYPE.  */
+	movzbl	%al, %r10d
+	leaq	.Lload_table(%rip), %r11
+	movslq	(%r11, %r10, 4), %r10
+	addq	%r11, %r10
+	jmp	*%r10
+
+	.section .rodata
+.Lload_table:
+	.long	.Lld_void-.Lload_table		/* FFI_TYPE_VOID */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_INT */
+	.long	.Lld_float-.Lload_table		/* FFI_TYPE_FLOAT */
+	.long	.Lld_double-.Lload_table	/* FFI_TYPE_DOUBLE */
+	.long	.Lld_ldouble-.Lload_table	/* FFI_TYPE_LONGDOUBLE */
+	.long	.Lld_int8-.Lload_table		/* FFI_TYPE_UINT8 */
+	.long	.Lld_int8-.Lload_table		/* FFI_TYPE_SINT8 */
+	.long	.Lld_int16-.Lload_table		/* FFI_TYPE_UINT16 */
+	.long	.Lld_int16-.Lload_table		/* FFI_TYPE_SINT16 */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_UINT32 */
+	.long	.Lld_int32-.Lload_table		/* FFI_TYPE_SINT32 */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_UINT64 */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_SINT64 */
+	.long	.Lld_struct-.Lload_table	/* FFI_TYPE_STRUCT */
+	.long	.Lld_int64-.Lload_table		/* FFI_TYPE_POINTER */
+
+	.text
+	.align 2
+.Lld_void:
+	ret
+
+	.align 2
+.Lld_int8:
+	movzbl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int16:
+	movzwl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int32:
+	movl	-24(%rsp), %eax
+	ret
+	.align 2
+.Lld_int64:
+	movq	-24(%rsp), %rax
+	ret
+
+	.align 2
+.Lld_float:
+	movss	-24(%rsp), %xmm0
+	ret
+	.align 2
+.Lld_double:
+	movsd	-24(%rsp), %xmm0
+	ret
+	.align 2
+.Lld_ldouble:
+	fldt	-24(%rsp)
+	ret
+
+	.align 2
+.Lld_struct:
+	/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
+	   %rax/%xmm0, %xmm0/%xmm1.  We collapse two by always loading
+	   both rdx and xmm1 with the second word.  For the remaining,
+	   bit 8 set means xmm0 gets the second word, and bit 9 means
+	   that rax gets the second word.  */
+	movq	-24(%rsp), %rcx
+	movq	-16(%rsp), %rdx
+	movq	-16(%rsp), %xmm1
+	testl	$0x100, %eax
+	cmovnz	%rdx, %rcx
+	movd	%rcx, %xmm0
+	testl	$0x200, %eax
+	movq	-24(%rsp), %rax
+	cmovnz	%rdx, %rax
+	ret
+
+	/* See the comment above .Lload_sse; the same logic applies here.  */
+	.align 2
+.LUW8:
+.Lsave_sse:
+	movdqa	%xmm0, 48(%rsp)
+	movdqa	%xmm1, 64(%rsp)
+	movdqa	%xmm2, 80(%rsp)
+	movdqa	%xmm3, 96(%rsp)
+	movdqa	%xmm4, 112(%rsp)
+	movdqa	%xmm5, 128(%rsp)
+	movdqa	%xmm6, 144(%rsp)
+	movdqa	%xmm7, 160(%rsp)
+	jmp	.Lret_from_save_sse
+
+.LUW9:
+	.size	ffi_closure_unix64,.-ffi_closure_unix64
+
+	.section	.eh_frame,"a",@progbits
+.Lframe1:
+	.long	.LECIE1-.LSCIE1		/* CIE Length */
+.LSCIE1:
+	.long	0			/* CIE Identifier Tag */
+	.byte	1			/* CIE Version */
+	.ascii "zR\0"			/* CIE Augmentation */
+	.uleb128 1			/* CIE Code Alignment Factor */
+	.sleb128 -8			/* CIE Data Alignment Factor */
+	.byte	0x10			/* CIE RA Column */
+	.uleb128 1			/* Augmentation size */
+	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.uleb128 7
+	.uleb128 8
+	.byte	0x80+16			/* DW_CFA_offset, %rip offset 1*-8 */
+	.uleb128 1
+	.align 8
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1	/* FDE Length */
+.LASFDE1:
+	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
+	.long	.LUW0-.			/* FDE initial location */
+	.long	.LUW4-.LUW0		/* FDE address range */
+	.uleb128 0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW1-.LUW0
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	.byte	0xc			/* DW_CFA_def_cfa, %rbp offset 32 */
+	.uleb128 6
+	.uleb128 32
+	.byte	0x80+6			/* DW_CFA_offset, %rbp offset 2*-8 */
+	.uleb128 2
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW2-.LUW1
+	.byte	0xc			/* DW_CFA_def_cfa, %rsp offset 8 */
+	.uleb128 7
+	.uleb128 8
+	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW3-.LUW2
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align 8
+.LEFDE1:
+.LSFDE3:
+	.long	.LEFDE3-.LASFDE3	/* FDE Length */
+.LASFDE3:
+	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
+	.long	.LUW5-.			/* FDE initial location */
+	.long	.LUW9-.LUW5		/* FDE address range */
+	.uleb128 0x0			/* Augmentation size */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW6-.LUW5
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.uleb128 208
+	.byte	0xa			/* DW_CFA_remember_state */
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW7-.LUW6
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.uleb128 8
+
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	.LUW8-.LUW7
+	.byte	0xb			/* DW_CFA_restore_state */
+
+	.align 8
+.LEFDE3:
+
+#endif /* __x86_64__ */
diff --git a/libffi-3.0.4/src/x86/win32.S b/libffi-3.0.4/src/x86/win32.S
new file mode 100644
index 0000000..eba11c1
--- /dev/null
+++ b/libffi-3.0.4/src/x86/win32.S
@@ -0,0 +1,391 @@
+/* -----------------------------------------------------------------------
+   win32.S - Copyright (c) 1996, 1998, 2001, 2002  Red Hat, Inc.
+	     Copyright (c) 2001  John Beniton
+	     Copyright (c) 2002  Ranjit Mathew
+			
+ 
+   X86 Foreign Function Interface
+ 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+ 
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+ 
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
+	ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+ 
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+ 
+.text
+ 
+.globl ffi_prep_args
+ 
+        # This assumes we are using gas.
+        .balign 16
+.globl _ffi_call_SYSV
+ 
+_ffi_call_SYSV:
+        pushl %ebp
+        movl  %esp,%ebp
+ 
+        # Make room for all of the new args.
+        movl  16(%ebp),%ecx                                                     
+        subl  %ecx,%esp
+ 
+        movl  %esp,%eax
+ 
+        # Place all of the ffi_prep_args in position
+        pushl 12(%ebp)
+        pushl %eax
+        call  *8(%ebp)
+ 
+        # Return stack to previous state and call the function
+        addl  $8,%esp
+ 
+        # FIXME: Align the stack to a 128-bit boundary to avoid
+        # potential performance hits.
+
+	call  *28(%ebp)
+ 
+        # Remove the space we pushed for the args
+        movl  16(%ebp),%ecx
+        addl  %ecx,%esp
+ 
+        # Load %ecx with the return type code
+        movl  20(%ebp),%ecx
+ 
+        # If the return value pointer is NULL, assume no return value.
+        cmpl  $0,24(%ebp)
+        jne   retint
+ 
+        # Even if there is no space for the return value, we are
+        # obliged to handle floating-point values.
+        cmpl  $FFI_TYPE_FLOAT,%ecx
+        jne   noretval
+        fstp  %st(0)
+ 
+        jmp   epilogue
+ 
+retint:
+        cmpl  $FFI_TYPE_INT,%ecx
+        jne   retfloat
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   epilogue
+ 
+retfloat:
+        cmpl  $FFI_TYPE_FLOAT,%ecx
+        jne   retdouble   
+         # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        fstps (%ecx)
+        jmp   epilogue
+ 
+retdouble:
+        cmpl  $FFI_TYPE_DOUBLE,%ecx
+        jne   retlongdouble
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        fstpl (%ecx)
+        jmp   epilogue
+ 
+retlongdouble:
+        cmpl  $FFI_TYPE_LONGDOUBLE,%ecx
+        jne   retint64
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        fstpt (%ecx)
+        jmp   epilogue
+ 
+retint64:
+        cmpl  $FFI_TYPE_SINT64,%ecx
+        jne   retstruct1b
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        movl  %edx,4(%ecx)
+ 
+retstruct1b:
+        cmpl  $FFI_TYPE_SINT8,%ecx
+        jne   retstruct2b
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movb  %al,0(%ecx)
+        jmp   epilogue
+ 
+retstruct2b:
+        cmpl  $FFI_TYPE_SINT16,%ecx
+        jne   retstruct
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movw  %ax,0(%ecx)
+        jmp   epilogue
+ 
+retstruct:
+        # Nothing to do!
+ 
+noretval:
+epilogue:
+        movl %ebp,%esp
+        popl %ebp
+        ret
+ 
+.ffi_call_SYSV_end:
+
+        # This assumes we are using gas.
+        .balign 16
+.globl _ffi_call_STDCALL
+
+_ffi_call_STDCALL:
+        pushl %ebp
+        movl  %esp,%ebp
+
+        # Make room for all of the new args.
+        movl  16(%ebp),%ecx 
+        subl  %ecx,%esp
+
+        movl  %esp,%eax
+
+        # Place all of the ffi_prep_args in position
+        pushl 12(%ebp)
+        pushl %eax
+        call  *8(%ebp)
+
+        # Return stack to previous state and call the function
+        addl  $8,%esp
+
+        # FIXME: Align the stack to a 128-bit boundary to avoid
+        # potential performance hits.
+
+        call  *28(%ebp)
+
+        # stdcall functions pop arguments off the stack themselves
+
+        # Load %ecx with the return type code
+        movl  20(%ebp),%ecx
+
+        # If the return value pointer is NULL, assume no return value.
+        cmpl  $0,24(%ebp)
+        jne   sc_retint
+
+        # Even if there is no space for the return value, we are
+        # obliged to handle floating-point values.
+        cmpl  $FFI_TYPE_FLOAT,%ecx
+        jne   sc_noretval
+        fstp  %st(0)
+
+        jmp   sc_epilogue
+
+sc_retint:
+        cmpl  $FFI_TYPE_INT,%ecx
+        jne   sc_retfloat
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        jmp   sc_epilogue
+
+sc_retfloat:
+        cmpl  $FFI_TYPE_FLOAT,%ecx
+        jne   sc_retdouble
+         # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        fstps (%ecx)
+        jmp   sc_epilogue
+
+sc_retdouble:
+        cmpl  $FFI_TYPE_DOUBLE,%ecx
+        jne   sc_retlongdouble
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        fstpl (%ecx)
+        jmp   sc_epilogue
+
+sc_retlongdouble:
+        cmpl  $FFI_TYPE_LONGDOUBLE,%ecx
+        jne   sc_retint64
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        fstpt (%ecx)
+        jmp   sc_epilogue
+
+sc_retint64:
+        cmpl  $FFI_TYPE_SINT64,%ecx
+        jne   sc_retstruct1b
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movl  %eax,0(%ecx)
+        movl  %edx,4(%ecx)
+
+sc_retstruct1b:
+        cmpl  $FFI_TYPE_SINT8,%ecx
+        jne   sc_retstruct2b
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movb  %al,0(%ecx)
+        jmp   sc_epilogue
+
+sc_retstruct2b:
+        cmpl  $FFI_TYPE_SINT16,%ecx
+        jne   sc_retstruct
+        # Load %ecx with the pointer to storage for the return value
+        movl  24(%ebp),%ecx
+        movw  %ax,0(%ecx)
+        jmp   sc_epilogue
+
+sc_retstruct:
+        # Nothing to do!
+
+sc_noretval:
+sc_epilogue:
+        movl %ebp,%esp
+        popl %ebp
+        ret
+
+.ffi_call_STDCALL_end:
+
+	.globl _ffi_closure_STDCALL
+_ffi_closure_STDCALL:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	12(%ebp), %edx  /* account for stub return address on stack */
+	movl	%edx, 4(%esp)	/* args */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	call	_ffi_closure_SYSV_inner
+	movl	-12(%ebp), %ecx
+	jmp     .Lcls_return_result
+.ffi_closure_STDCALL_end:
+
+	.globl _ffi_closure_SYSV
+_ffi_closure_SYSV:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$40, %esp
+	leal	-24(%ebp), %edx
+	movl	%edx, -12(%ebp)	/* resp */
+	leal	8(%ebp), %edx
+	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
+	leal	-12(%ebp), %edx
+	movl	%edx, (%esp)	/* &resp */
+	call	_ffi_closure_SYSV_inner
+	movl	-12(%ebp), %ecx
+.Lcls_return_result:
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lcls_retint
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lcls_retllong
+	cmpl	$FFI_TYPE_SINT8, %eax	/* 1-byte struct */
+	je	.Lcls_retstruct1
+	cmpl	$FFI_TYPE_SINT16, %eax	/* 2-bytes struct */
+	je	.Lcls_retstruct2
+.Lcls_epilogue:
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+.Lcls_retint:
+	movl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retfloat:
+	flds	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retdouble:
+	fldl	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retldouble:
+	fldt	(%ecx)
+	jmp	.Lcls_epilogue
+.Lcls_retllong:
+	movl	(%ecx), %eax
+	movl	4(%ecx), %edx
+	jmp	.Lcls_epilogue
+.Lcls_retstruct1:
+	movsbl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.Lcls_retstruct2:
+	movswl	(%ecx), %eax
+	jmp	.Lcls_epilogue
+.ffi_closure_SYSV_end:
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+	.balign	16
+	.globl _ffi_closure_raw_SYSV
+_ffi_closure_raw_SYSV:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%esi
+	subl	$36, %esp
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+	movl	%edx, 12(%esp)	/* user_data */
+	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
+	movl	%edx, 8(%esp)	/* raw_args */
+	leal	-24(%ebp), %edx
+	movl	%edx, 4(%esp)	/* &res */
+	movl	%esi, (%esp)	/* cif */
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
+	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
+	cmpl	$FFI_TYPE_INT, %eax
+	je	.Lrcls_retint
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	.Lrcls_retfloat
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	.Lrcls_retdouble
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	.Lrcls_retldouble
+	cmpl	$FFI_TYPE_SINT64, %eax
+	je	.Lrcls_retllong
+.Lrcls_epilogue:
+	addl	$36, %esp
+	popl	%esi
+	popl	%ebp
+	ret
+.Lrcls_retint:
+	movl	-24(%ebp), %eax
+	jmp	.Lrcls_epilogue
+.Lrcls_retfloat:
+	flds	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retdouble:
+	fldl	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retldouble:
+	fldt	-24(%ebp)
+	jmp	.Lrcls_epilogue
+.Lrcls_retllong:
+	movl	-24(%ebp), %eax
+	movl	-20(%ebp), %edx
+	jmp	.Lrcls_epilogue
+.ffi_closure_raw_SYSV_end:
+
+#endif