#ifdef __ELF__
// Mark the object as not requiring an executable stack.
.section .note.GNU-stack,"",%progbits
#endif

#include "sbcl.h"
#include "lispregs.h"
#include "globals.h"
#include "validate.h"

#include "genesis/simple-fun.h"
#include "genesis/fdefn.h"
#include "genesis/closure.h"
#include "genesis/code.h"
#include "genesis/funcallable-instance.h"
#include "genesis/static-symbols.h"
#include "genesis/thread.h"

#define CSYMBOL(x) x

#define FUNCDEF(x)	.text ; \
			.align 3 ; \
			.type x,@function ; \
x:

#define GFUNCDEF(x)	.globl x ; \
	FUNCDEF(x)

#define SET_SIZE(x) .size x,.-x

/* Load a register from a global, using the register as an intermediary */
/* The register will be a fixnum for one instruction, so this is gc-safe */

#define load(reg,global) \
	lis reg,global@ha; lwz reg,global@l(reg)
#define store(reg,temp,global) \
	lis temp,global@ha; stw reg,global@l(temp)

#define	FIRST_SAVE_FPR	14	/* lowest-numbered non-volatile FPR */
#define	FIRST_SAVE_GPR	14	/* lowest-numbered non-volatile GPR */
#define NGPR_SAVE_BYTES(n) ((32-(~1&((n)+1)))*4)
#define FRAME_ARG_BYTES(n)  (((((n)+2)*4)+15)&~15)
#define	NFPR_SAVE_BYTES(n) ((32-(n))*8)

#define FRAME_SIZE(first_g,first_f,out_arg_words,savecr) \
(NFPR_SAVE_BYTES(first_f)+ NGPR_SAVE_BYTES(first_g)+ FRAME_ARG_BYTES(out_arg_words+savecr))
#define SAVE_FPR(n) stfd n,-8*(32-(n))(11)
#define SAVE_GPR(n) stw n,-4*(32-(n))(11)
#define FULL_FRAME_SIZE FRAME_SIZE(FIRST_SAVE_GPR,FIRST_SAVE_FPR,0,1)

#define RESTORE_FPR(n) lfd n,-8*(32-(n))(11)
#define RESTORE_GPR(n) lwz n,-4*(32-(n))(11)

#define C_FULL_PROLOG \
	mflr 0 ; \
	stw 0,4(1) ; \
	mr 11,1 ; \
	stwu 1,-FULL_FRAME_SIZE(1) ; \
	SAVE_FPR(14) ; \
	SAVE_FPR(15) ; \
	SAVE_FPR(16) ; \
	SAVE_FPR(17) ; \
	SAVE_FPR(18) ; \
	SAVE_FPR(19) ; \
	SAVE_FPR(20) ; \
	SAVE_FPR(21) ; \
	SAVE_FPR(22) ; \
	SAVE_FPR(23) ; \
	SAVE_FPR(24) ; \
	SAVE_FPR(25) ; \
	SAVE_FPR(26) ; \
	SAVE_FPR(27) ; \
	SAVE_FPR(28) ; \
	SAVE_FPR(29) ; \
	SAVE_FPR(30) ; \
	SAVE_FPR(31) ; \
	la 11,-NFPR_SAVE_BYTES(FIRST_SAVE_FPR)(11) ; \
	SAVE_GPR(14) ; \
	SAVE_GPR(15) ; \
	SAVE_GPR(16) ; \
	SAVE_GPR(17) ; \
	SAVE_GPR(18) ; \
	SAVE_GPR(19) ; \
	SAVE_GPR(20) ; \
	SAVE_GPR(21) ; \
	SAVE_GPR(22) ; \
	SAVE_GPR(23) ; \
	SAVE_GPR(24) ; \
	SAVE_GPR(25) ; \
	SAVE_GPR(26) ; \
	SAVE_GPR(27) ; \
	SAVE_GPR(28) ; \
	SAVE_GPR(29) ; \
	SAVE_GPR(30) ; \
	SAVE_GPR(31) ; \
	mfcr 0	; \
	stw 0,8(1)

#define C_FULL_EPILOG \
	lwz 5,8(1) ; \
	mtcrf 255,5 ; \
	la 11,FULL_FRAME_SIZE-NFPR_SAVE_BYTES(FIRST_SAVE_FPR)(1) ; \
	RESTORE_GPR(14) ; \
	RESTORE_GPR(15) ; \
	RESTORE_GPR(16) ; \
	RESTORE_GPR(17) ; \
	RESTORE_GPR(18) ; \
	RESTORE_GPR(19) ; \
	RESTORE_GPR(20) ; \
	RESTORE_GPR(21) ; \
	RESTORE_GPR(22) ; \
	RESTORE_GPR(23) ; \
	RESTORE_GPR(24) ; \
	RESTORE_GPR(25) ; \
	RESTORE_GPR(26) ; \
	RESTORE_GPR(27) ; \
	RESTORE_GPR(28) ; \
	RESTORE_GPR(29) ; \
	RESTORE_GPR(30) ; \
	RESTORE_GPR(31) ; \
	la 11,NFPR_SAVE_BYTES(FIRST_SAVE_FPR)(11) ; \
	RESTORE_FPR(14) ; \
	RESTORE_FPR(15) ; \
	RESTORE_FPR(16) ; \
	RESTORE_FPR(17) ; \
	RESTORE_FPR(18) ; \
	RESTORE_FPR(19) ; \
	RESTORE_FPR(20) ; \
	RESTORE_FPR(21) ; \
	RESTORE_FPR(22) ; \
	RESTORE_FPR(23) ; \
	RESTORE_FPR(24) ; \
	RESTORE_FPR(25) ; \
	RESTORE_FPR(26) ; \
	RESTORE_FPR(27) ; \
	RESTORE_FPR(28) ; \
	RESTORE_FPR(29) ; \
	RESTORE_FPR(30) ; \
	RESTORE_FPR(31) ; \
	lwz 1,0(1) ; \
	lwz 0,4(1) ; \
	mtlr 0 ; \

#define BEGIN_PSEUDO_ATOMIC stb reg_NULL,THREAD_PSEUDO_ATOMIC_BITS_OFFSET(reg_THREAD)
#define END_PSEUDO_ATOMIC \
	stb reg_THREAD, THREAD_PSEUDO_ATOMIC_BITS_OFFSET(reg_THREAD) ; \
	lhz reg_NL3, (2+THREAD_PSEUDO_ATOMIC_BITS_OFFSET)(reg_THREAD) ; \
	twnei reg_NL3, 0

	.text

#ifdef LISP_FEATURE_OS_THREAD_STACK
	GFUNCDEF(funcall1_switching_stack)
        /* The arguments are switched, funcall1_switching_stack(arg, function)
           to avoid shuffling registers
        */
	/* save LR on old stack */
	mflr	REG(0)
	stw	REG(0),4(REG(1))

	/* store old SP and switch to new stack */
        lwz     REG(10),THREAD_ALIEN_STACK_START_OFFSET(REG(3))
        addis   REG(10),REG(10),ALIEN_STACK_SIZE@ha
#if ALIEN_STACK_SIZE % 65536 != 0
        addi    REG(10),REG(10),ALIEN_STACK_SIZE@l
#endif
	stwu	REG(1),-16(REG(10))
	mr	REG(1),REG(10)

	/* call function */
	mtctr	REG(4)
	bctrl

	/* restore old SP and LR then return */
	lwz	REG(1),0(REG(1))
	lwz	REG(0),4(REG(1))
	mtlr	REG(0)
	blr
	SET_SIZE(funcall1_switching_stack)
#endif
/*
 * Function to transfer control into lisp.  The lisp object to invoke is
 * passed as the first argument, which puts it in NL0
 */

	GFUNCDEF(call_into_lisp)
	C_FULL_PROLOG
	/* NL0 - function, NL1 - frame pointer, NL2 - nargs. */
#if defined(LISP_FEATURE_SB_THREAD)
	/* We need to obtain a pointer to our TLS block before we do
	 * anything else.  For this, we call pthread_getspecific().
	 * We've preserved all of the callee-saves registers, so we
	 * can use them to stash our arguments temporarily while we
	 * make the call. */
	mr reg_A0, reg_NL0
	mr reg_A1, reg_NL1
	mr reg_A2, reg_NL2

	/* Call out to obtain our TLS block. */
	load(reg_NL0,CSYMBOL(current_thread))
	lis reg_CFUNC,CSYMBOL(pthread_getspecific)@h
	ori reg_CFUNC,reg_CFUNC,CSYMBOL(pthread_getspecific)@l
	mtctr reg_CFUNC
	bctrl
	mr reg_THREAD, reg_NL0

	/* Restore our original parameters. */
	mr reg_NL2, reg_A2
	mr reg_NL1, reg_A1
	mr reg_NL0, reg_A0
#else
	load(reg_THREAD,CSYMBOL(all_threads))
#endif
	/* store(reg_POLL,11,saver2) */
	/* Initialize tagged registers */
	li reg_ZERO,0
	li reg_CODE,0
	li reg_CNAME,0
	li reg_LEXENV,0
	li reg_FDEFN,0
	li reg_OCFP,0
	li reg_LRA,0
	li reg_A0,0
	li reg_A1,0
	li reg_A2,0
	li reg_A3,0
	li reg_L0,0
	li reg_L1,0
	li reg_L2,0
	li reg_LIP,0
	lis reg_NULL,NIL@h
	ori reg_NULL,reg_NULL,NIL@l
	/* Turn on pseudo-atomic */

	BEGIN_PSEUDO_ATOMIC
#if defined(LISP_FEATURE_SB_THREAD)
	stw reg_ZERO,THREAD_FOREIGN_FUNCTION_CALL_ACTIVE_OFFSET(reg_THREAD)
	lwz reg_BSP,THREAD_BINDING_STACK_POINTER_OFFSET(reg_THREAD)
	lwz reg_CSP,THREAD_CONTROL_STACK_POINTER_OFFSET(reg_THREAD)
	lwz reg_OCFP,THREAD_CONTROL_FRAME_POINTER_OFFSET(reg_THREAD)
#else
	store(reg_ZERO,reg_NL4,CSYMBOL(foreign_function_call_active))
	load(reg_BSP,CSYMBOL(current_binding_stack_pointer))
	load(reg_CSP,CSYMBOL(current_control_stack_pointer))
	load(reg_OCFP,CSYMBOL(current_control_frame_pointer))
#endif

	/* No longer atomic, and check for interrupt */
	END_PSEUDO_ATOMIC

	/* Pass in the arguments */

	mr reg_CFP,reg_NL1
	mr reg_LEXENV,reg_NL0
	lwz reg_A0,0(reg_CFP)
	lwz reg_A1,4(reg_CFP)
	lwz reg_A2,8(reg_CFP)
	lwz reg_A3,12(reg_CFP)

	/* Calculate LRA */
	lis reg_LRA,lra@h
	ori reg_LRA,reg_LRA,lra@l
	addi reg_LRA,reg_LRA,OTHER_POINTER_LOWTAG

	/* Function is an indirect closure */
	lwz reg_CODE,SIMPLE_FUN_SELF_OFFSET(reg_LEXENV)
	addi reg_LIP,reg_CODE,SIMPLE_FUN_INSTS_OFFSET
	mtctr reg_LIP
	slwi reg_NARGS,reg_NL2,2
	bctr

	.align 3
lra:
	.long RETURN_PC_WIDETAG

	/* Blow off any extra values. */
	mr reg_CSP,reg_OCFP
	nop

	/* Return the one value. */

	mr REG(3),reg_A0

	/* Turn on  pseudo-atomic */
	BEGIN_PSEUDO_ATOMIC

#if defined(LISP_FEATURE_SB_THREAD)
	/* Store lisp state */
	stw reg_BSP,THREAD_BINDING_STACK_POINTER_OFFSET(reg_THREAD)
	stw reg_CSP,THREAD_CONTROL_STACK_POINTER_OFFSET(reg_THREAD)
	stw reg_CFP,THREAD_CONTROL_FRAME_POINTER_OFFSET(reg_THREAD)

	/* No longer in Lisp. */
	stw reg_THREAD,THREAD_FOREIGN_FUNCTION_CALL_ACTIVE_OFFSET(reg_THREAD)
#else
	/* Store lisp state */
	store(reg_BSP,reg_NL2,CSYMBOL(current_binding_stack_pointer))
	store(reg_CSP,reg_NL2,CSYMBOL(current_control_stack_pointer))
	store(reg_CFP,reg_NL2,CSYMBOL(current_control_frame_pointer))

	/* No longer in Lisp. */
	store(reg_NL1,reg_NL2,CSYMBOL(foreign_function_call_active))
#endif

	/* Check for interrupt */
	END_PSEUDO_ATOMIC

	/* Back to C */
	C_FULL_EPILOG
	blr
	SET_SIZE(call_into_lisp)


	GFUNCDEF(call_into_c)
	/* We're kind of low on unboxed, non-dedicated registers here:
	most of the unboxed registers may have outgoing C args in them.
	CFUNC is going to have to go in the CTR in a moment, anyway
	so we'll free it up soon.  reg_NFP is preserved by lisp if it
	has a meaningful value in it, so we can use it.  reg_NARGS is
	free when it's not holding a copy of the "real" reg_NL3, which
	gets tied up by the pseudo-atomic mechanism */
	mtctr reg_CFUNC
	mflr reg_LIP
	/* Build a lisp stack frame */
	mr reg_OCFP,reg_CFP
	mr reg_CFP,reg_CSP
	la reg_CSP,32(reg_CSP)
	stw reg_OCFP,0(reg_CFP)
	stw reg_CODE,8(reg_CFP)
	/* The pseudo-atomic mechanism wants to use reg_NL3, but that
	may be an outgoing C argument.  Copy reg_NL3 to something that's
	unboxed and -not- one of the C argument registers */
	mr reg_NARGS,reg_NL3

	/* Turn on pseudo-atomic */
	BEGIN_PSEUDO_ATOMIC

	/* Convert the return address to an offset and save it on the stack. */
	sub reg_NFP,reg_LIP,reg_CODE
	la reg_NFP,OTHER_POINTER_LOWTAG(reg_NFP)
	stw reg_NFP,4(reg_CFP)

#ifdef LISP_FEATURE_SB_THREAD
	/* Store Lisp state */
	stw reg_BSP,THREAD_BINDING_STACK_POINTER_OFFSET(reg_THREAD)
	stw reg_CSP,THREAD_CONTROL_STACK_POINTER_OFFSET(reg_THREAD)
	stw reg_CFP,THREAD_CONTROL_FRAME_POINTER_OFFSET(reg_THREAD)

	/* No longer in Lisp. */
	stw reg_CSP,THREAD_FOREIGN_FUNCTION_CALL_ACTIVE_OFFSET(reg_THREAD)
#else
	/* Store Lisp state */
	/* load(reg_CFUNC,current_thread) */

	store(reg_BSP,reg_CFUNC,CSYMBOL(current_binding_stack_pointer))
	store(reg_CSP,reg_CFUNC,CSYMBOL(current_control_stack_pointer))
	store(reg_CFP,reg_CFUNC,CSYMBOL(current_control_frame_pointer))

	/* No longer in Lisp */
	store(reg_CSP,reg_CFUNC,CSYMBOL(foreign_function_call_active))
#endif
	/* Disable pseudo-atomic; check pending interrupt */
	END_PSEUDO_ATOMIC

	mr reg_NL3,reg_NARGS

        /* Into C we go. */
	bctrl

	/* Re-establish NIL */
	lis reg_NULL,NIL@h
	ori reg_NULL,reg_NULL,NIL@l
	/* And reg_ZERO */
	li reg_ZERO,0

	/* If we GC'ed during the FF code (as the result of a callback ?)
	the tagged lisp registers may now contain garbage (since the
	registers were saved by C and not seen by the GC.)  Put something
	harmless in all such registers before allowing an interrupt */
        li reg_FDEFN,0
	li reg_CODE,0
	li reg_CNAME,0
	li reg_LEXENV,0
	/* reg_OCFP was pointing to a control stack frame & was preserved by C */
	li reg_LRA,0
	li reg_A0,0
	li reg_A1,0
	li reg_A2,0
	li reg_A3,0
	li reg_L0,0
	li reg_L1,0
	li reg_L2,0
	li reg_LIP,0

	/* Atomic ... */
	BEGIN_PSEUDO_ATOMIC

#if defined(LISP_FEATURE_SB_THREAD)
	/* No longer in foreign function call. */
	stw reg_ZERO,THREAD_FOREIGN_FUNCTION_CALL_ACTIVE_OFFSET(reg_THREAD)

	/* The binding stack pointer isn't preserved by C. */
	lwz reg_BSP,THREAD_BINDING_STACK_POINTER_OFFSET(reg_THREAD)
#else
	/* No long in foreign function call. */
	store(reg_ZERO,reg_NL2,CSYMBOL(foreign_function_call_active))

	/* The free pointer may have moved */
	/* (moved below) */

	/* The BSP wasn't preserved by C, so load it */
	load(reg_BSP,CSYMBOL(current_binding_stack_pointer))
#endif

	/* Other lisp stack/frame pointers were preserved by C.
	I can't imagine why they'd have moved */

	/* Get the return address back. */
	lwz reg_LIP,4(reg_CFP)
	lwz reg_CODE,8(reg_CFP)
	add reg_LIP,reg_CODE,reg_LIP
	la reg_LIP,-OTHER_POINTER_LOWTAG(reg_LIP)

        /* Debugger expects LR to be valid when we come out of PA */
	mtlr reg_LIP

	/* No longer atomic */
	END_PSEUDO_ATOMIC

	/* Reset the lisp stack. */
	mr reg_CSP,reg_CFP
	mr reg_CFP,reg_OCFP

	/* And back into Lisp. */
	blr

	SET_SIZE(call_into_c)

	/* The fun_end_breakpoint support here is considered by the
	authors of the other $ARCH-assem.S files to be magic, and it
	is.  It is a small fragment of code that is copied into a heap
	code-object when needed, and contains an LRA object, code to
	convert a single-value return to unknown-values format, and a
	trap_FunEndBreakpoint. */
	GFUNCDEF(fun_end_breakpoint_guts)
	.globl CSYMBOL(fun_end_breakpoint_trap)
	.globl CSYMBOL(fun_end_breakpoint_end)

	/* We are receiving unknown multiple values, thus must deal
	with the single-value and multiple-value cases separately. */
	b fun_end_breakpoint_multiple_values
	nop

	/* Compute the correct value for reg_CODE based on the LRA.
	This is a "simple" matter of subtracting a constant from
	reg_LRA (where the LRA is stored by the return sequence) to
	obtain a tagged pointer to the enclosing code component.  Both
	values are tagged OTHER_POINTER_LOWTAG, so we just have to
	account for N words (see calculation for RETURN_PC_WIDETAG, above)
	between the two addresses. */
        /* KLUDGE: I have no idea why CODE_SIZE+4 is correct here,
	let alone the general formula for keeping this maintenance-free */
	addi reg_CODE, reg_LRA, (CODE_SIZE+4)*-N_WORD_BYTES

	/* Multiple values are stored relative to reg_OCFP, which we
	set to be the current top-of-stack. */
	mr reg_OCFP, reg_CSP

	/* Reserve a save location for the one value we have. */
	addi reg_CSP, reg_CSP, 4

	/* Record the number of values we have as a FIXNUM. */
	li reg_NARGS, 4

	/* Blank the remaining arg-passing registers. */
	mr reg_A1, reg_NULL
	mr reg_A2, reg_NULL
	mr reg_A3, reg_NULL

	/* And branch to our trap. */
	b CSYMBOL(fun_end_breakpoint_trap)

fun_end_breakpoint_multiple_values:
	/* Compute the correct value for reg_CODE.  See the
	explanation for the single-value case, above. */
        /* XXX: I have literally no idea why this constant is -24 */
	addi reg_CODE, reg_LRA, -24

	/* The actual magic trap. */
CSYMBOL(fun_end_breakpoint_trap):
	twllei	reg_ZERO, trap_FunEndBreakpoint

	/* Finally, the debugger needs to know where the end of the
	fun_end_breakpoint_guts are, so that it may calculate its size
	in order to populate out a suitably-sized code object. */
CSYMBOL(fun_end_breakpoint_end):
	SET_SIZE(fun_end_breakpoint_guts)


	GFUNCDEF(ppc_flush_cache_line)
	dcbf 0,REG(3)
	sync
	icbi 0,REG(3)
	sync
	isync
	blr
	SET_SIZE(ppc_flush_cache_line)

        GFUNCDEF(do_pending_interrupt)
	trap
	blr
/* King Nato's branch has a nop here. Do we need this? */
	SET_SIZE(do_pending_interrupt)
