mirror of
				https://github.com/espressif/esp-idf.git
				synced 2025-11-04 09:01:40 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			940 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			940 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * xtensa/coreasm.h -- assembler-specific definitions that depend on CORE configuration
 | 
						|
 *
 | 
						|
 *  Source for configuration-independent binaries (which link in a
 | 
						|
 *  configuration-specific HAL library) must NEVER include this file.
 | 
						|
 *  It is perfectly normal, however, for the HAL itself to include this file.
 | 
						|
 *
 | 
						|
 *  This file must NOT include xtensa/config/system.h.  Any assembler
 | 
						|
 *  header file that depends on system information should likely go
 | 
						|
 *  in a new systemasm.h (or sysasm.h) header file.
 | 
						|
 *
 | 
						|
 *  NOTE: macro beqi32 is NOT configuration-dependent, and is placed
 | 
						|
 *        here until we have a proper configuration-independent header file.
 | 
						|
 */
 | 
						|
 | 
						|
/* $Id: //depot/rel/Eaglenest/Xtensa/OS/include/xtensa/coreasm.h#3 $ */
 | 
						|
 | 
						|
/*
 | 
						|
 * Copyright (c) 2000-2014 Tensilica Inc.
 | 
						|
 *
 | 
						|
 * Permission is hereby granted, free of charge, to any person obtaining
 | 
						|
 * a copy of this software and associated documentation files (the
 | 
						|
 * "Software"), to deal in the Software without restriction, including
 | 
						|
 * without limitation the rights to use, copy, modify, merge, publish,
 | 
						|
 * distribute, sublicense, and/or sell copies of the Software, and to
 | 
						|
 * permit persons to whom the Software is furnished to do so, subject to
 | 
						|
 * the following conditions:
 | 
						|
 *
 | 
						|
 * The above copyright notice and this permission notice shall be included
 | 
						|
 * in all copies or substantial portions of the Software.
 | 
						|
 *
 | 
						|
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 | 
						|
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 | 
						|
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 | 
						|
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 | 
						|
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 | 
						|
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 | 
						|
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 | 
						|
 */
 | 
						|
 | 
						|
#ifndef XTENSA_COREASM_H
 | 
						|
#define XTENSA_COREASM_H
 | 
						|
 | 
						|
/*
 | 
						|
 *  Tell header files this is assembly source, so they can avoid non-assembler
 | 
						|
 *  definitions (eg. C types etc):
 | 
						|
 */
 | 
						|
#ifndef _ASMLANGUAGE	/* conditionalize to avoid cpp warnings (3rd parties might use same macro) */
 | 
						|
#define _ASMLANGUAGE
 | 
						|
#endif
 | 
						|
 | 
						|
#include <xtensa/config/core.h>
 | 
						|
#include <xtensa/config/specreg.h>
 | 
						|
#include <xtensa/config/system.h>
 | 
						|
 | 
						|
/*
 | 
						|
 *  Assembly-language specific definitions (assembly macros, etc.).
 | 
						|
 */
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  find_ms_setbit
 | 
						|
 *
 | 
						|
 *  This macro finds the most significant bit that is set in <as>
 | 
						|
 *  and return its index + <base> in <ad>, or <base> - 1 if <as> is zero.
 | 
						|
 *  The index counts starting at zero for the lsbit, so the return
 | 
						|
 *  value ranges from <base>-1 (no bit set) to <base>+31 (msbit set).
 | 
						|
 *
 | 
						|
 *  Parameters:
 | 
						|
 *	<ad>	destination address register (any register)
 | 
						|
 *	<as>	source address register
 | 
						|
 *	<at>	temporary address register (must be different than <as>)
 | 
						|
 *	<base>	constant value added to result (usually 0 or 1)
 | 
						|
 *  On entry:
 | 
						|
 *	<ad> = undefined if different than <as>
 | 
						|
 *	<as> = value whose most significant set bit is to be found
 | 
						|
 *	<at> = undefined
 | 
						|
 *	no other registers are used by this macro.
 | 
						|
 *  On exit:
 | 
						|
 *	<ad> = <base> + index of msbit set in original <as>,
 | 
						|
 *	     = <base> - 1 if original <as> was zero.
 | 
						|
 *	<as> clobbered (if not <ad>)
 | 
						|
 *	<at> clobbered (if not <ad>)
 | 
						|
 *  Example:
 | 
						|
 *	find_ms_setbit a0, a4, a0, 0		-- return in a0 index of msbit set in a4
 | 
						|
 */
 | 
						|
 | 
						|
	.macro	find_ms_setbit ad, as, at, base
 | 
						|
#if XCHAL_HAVE_NSA
 | 
						|
	movi	\at, 31+\base
 | 
						|
	nsau	\as, \as	// get index of \as, numbered from msbit (32 if absent)
 | 
						|
	sub	\ad, \at, \as	// get numbering from lsbit (0..31, -1 if absent)
 | 
						|
#else /* XCHAL_HAVE_NSA */
 | 
						|
	movi	\at, \base	// start with result of 0 (point to lsbit of 32)
 | 
						|
 | 
						|
	beqz	\as, 2f		// special case for zero argument: return -1
 | 
						|
	bltui	\as, 0x10000, 1f	// is it one of the 16 lsbits? (if so, check lower 16 bits)
 | 
						|
	addi	\at, \at, 16	// no, increment result to upper 16 bits (of 32)
 | 
						|
	//srli	\as, \as, 16	// check upper half (shift right 16 bits)
 | 
						|
	extui	\as, \as, 16, 16	// check upper half (shift right 16 bits)
 | 
						|
1:	bltui	\as, 0x100, 1f	// is it one of the 8 lsbits? (if so, check lower 8 bits)
 | 
						|
	addi	\at, \at, 8	// no, increment result to upper 8 bits (of 16)
 | 
						|
	srli	\as, \as, 8	// shift right to check upper 8 bits
 | 
						|
1:	bltui	\as, 0x10, 1f	// is it one of the 4 lsbits? (if so, check lower 4 bits)
 | 
						|
	addi	\at, \at, 4	// no, increment result to upper 4 bits (of 8)
 | 
						|
	srli	\as, \as, 4	// shift right 4 bits to check upper half
 | 
						|
1:	bltui	\as, 0x4, 1f	// is it one of the 2 lsbits? (if so, check lower 2 bits)
 | 
						|
	addi	\at, \at, 2	// no, increment result to upper 2 bits (of 4)
 | 
						|
	srli	\as, \as, 2	// shift right 2 bits to check upper half
 | 
						|
1:	bltui	\as, 0x2, 1f	// is it the lsbit?
 | 
						|
	addi	\at, \at, 2	// no, increment result to upper bit (of 2)
 | 
						|
2:	addi	\at, \at, -1	// (from just above: add 1;  from beqz: return -1)
 | 
						|
	//srli	\as, \as, 1
 | 
						|
1:				// done! \at contains index of msbit set (or -1 if none set)
 | 
						|
	.if	0x\ad - 0x\at	// destination different than \at ? (works because regs are a0-a15)
 | 
						|
	mov	\ad, \at	// then move result to \ad
 | 
						|
	.endif
 | 
						|
#endif /* XCHAL_HAVE_NSA */
 | 
						|
	.endm	// find_ms_setbit
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  find_ls_setbit
 | 
						|
 *
 | 
						|
 *  This macro finds the least significant bit that is set in <as>,
 | 
						|
 *  and return its index in <ad>.
 | 
						|
 *  Usage is the same as for the find_ms_setbit macro.
 | 
						|
 *  Example:
 | 
						|
 *	find_ls_setbit a0, a4, a0, 0	-- return in a0 index of lsbit set in a4
 | 
						|
 */
 | 
						|
 | 
						|
	.macro	find_ls_setbit ad, as, at, base
 | 
						|
	neg	\at, \as	// keep only the least-significant bit that is set...
 | 
						|
	and	\as, \at, \as	// ... in \as
 | 
						|
	find_ms_setbit	\ad, \as, \at, \base
 | 
						|
	.endm	// find_ls_setbit
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  find_ls_one
 | 
						|
 *
 | 
						|
 *  Same as find_ls_setbit with base zero.
 | 
						|
 *  Source (as) and destination (ad) registers must be different.
 | 
						|
 *  Provided for backward compatibility.
 | 
						|
 */
 | 
						|
 | 
						|
	.macro	find_ls_one ad, as
 | 
						|
	find_ls_setbit	\ad, \as, \ad, 0
 | 
						|
	.endm	// find_ls_one
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  floop, floopnez, floopgtz, floopend
 | 
						|
 *
 | 
						|
 *  These macros are used for fast inner loops that
 | 
						|
 *  work whether or not the Loops options is configured.
 | 
						|
 *  If the Loops option is configured, they simply use
 | 
						|
 *  the zero-overhead LOOP instructions; otherwise
 | 
						|
 *  they use explicit decrement and branch instructions.
 | 
						|
 *
 | 
						|
 *  They are used in pairs, with floop, floopnez or floopgtz
 | 
						|
 *  at the beginning of the loop, and floopend at the end.
 | 
						|
 *
 | 
						|
 *  Each pair of loop macro calls must be given the loop count
 | 
						|
 *  address register and a unique label for that loop.
 | 
						|
 *
 | 
						|
 *  Example:
 | 
						|
 *
 | 
						|
 *	movi	 a3, 16     // loop 16 times
 | 
						|
 *	floop    a3, myloop1
 | 
						|
 *	:
 | 
						|
 *	bnez     a7, end1	// exit loop if a7 != 0
 | 
						|
 *	:
 | 
						|
 *	floopend a3, myloop1
 | 
						|
 *  end1:
 | 
						|
 *
 | 
						|
 *  Like the LOOP instructions, these macros cannot be
 | 
						|
 *  nested, must include at least one instruction,
 | 
						|
 *  cannot call functions inside the loop, etc.
 | 
						|
 *  The loop can be exited by jumping to the instruction
 | 
						|
 *  following floopend (or elsewhere outside the loop),
 | 
						|
 *  or continued by jumping to a NOP instruction placed
 | 
						|
 *  immediately before floopend.
 | 
						|
 *
 | 
						|
 *  Unlike LOOP instructions, the register passed to floop*
 | 
						|
 *  cannot be used inside the loop, because it is used as
 | 
						|
 *  the loop counter if the Loops option is not configured.
 | 
						|
 *  And its value is undefined after exiting the loop.
 | 
						|
 *  And because the loop counter register is active inside
 | 
						|
 *  the loop, you can't easily use this construct to loop
 | 
						|
 *  across a register file using ROTW as you might with LOOP
 | 
						|
 *  instructions, unless you copy the loop register along.
 | 
						|
 */
 | 
						|
 | 
						|
	/*  Named label version of the macros:  */
 | 
						|
 | 
						|
	.macro	floop		ar, endlabel
 | 
						|
	floop_		\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
 | 
						|
	.endm
 | 
						|
 | 
						|
	.macro	floopnez	ar, endlabel
 | 
						|
	floopnez_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
 | 
						|
	.endm
 | 
						|
 | 
						|
	.macro	floopgtz	ar, endlabel
 | 
						|
	floopgtz_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
 | 
						|
	.endm
 | 
						|
 | 
						|
	.macro	floopend	ar, endlabel
 | 
						|
	floopend_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
 | 
						|
	.endm
 | 
						|
 | 
						|
	/*  Numbered local label version of the macros:  */
 | 
						|
#if 0 /*UNTESTED*/
 | 
						|
	.macro	floop89		ar
 | 
						|
	floop_		\ar, 8, 9f
 | 
						|
	.endm
 | 
						|
 | 
						|
	.macro	floopnez89	ar
 | 
						|
	floopnez_	\ar, 8, 9f
 | 
						|
	.endm
 | 
						|
 | 
						|
	.macro	floopgtz89	ar
 | 
						|
	floopgtz_	\ar, 8, 9f
 | 
						|
	.endm
 | 
						|
 | 
						|
	.macro	floopend89	ar
 | 
						|
	floopend_	\ar, 8b, 9
 | 
						|
	.endm
 | 
						|
#endif /*0*/
 | 
						|
 | 
						|
	/*  Underlying version of the macros:  */
 | 
						|
 | 
						|
	.macro	floop_	ar, startlabel, endlabelref
 | 
						|
	.ifdef	_infloop_
 | 
						|
	.if	_infloop_
 | 
						|
	.err	// Error: floop cannot be nested
 | 
						|
	.endif
 | 
						|
	.endif
 | 
						|
	.set	_infloop_, 1
 | 
						|
#if XCHAL_HAVE_LOOPS
 | 
						|
	loop	\ar, \endlabelref
 | 
						|
#else /* XCHAL_HAVE_LOOPS */
 | 
						|
\startlabel:
 | 
						|
	addi	\ar, \ar, -1
 | 
						|
#endif /* XCHAL_HAVE_LOOPS */
 | 
						|
	.endm	// floop_
 | 
						|
 | 
						|
	.macro	floopnez_	ar, startlabel, endlabelref
 | 
						|
	.ifdef	_infloop_
 | 
						|
	.if	_infloop_
 | 
						|
	.err	// Error: floopnez cannot be nested
 | 
						|
	.endif
 | 
						|
	.endif
 | 
						|
	.set	_infloop_, 1
 | 
						|
#if XCHAL_HAVE_LOOPS
 | 
						|
	loopnez	\ar, \endlabelref
 | 
						|
#else /* XCHAL_HAVE_LOOPS */
 | 
						|
	beqz	\ar, \endlabelref
 | 
						|
\startlabel:
 | 
						|
	addi	\ar, \ar, -1
 | 
						|
#endif /* XCHAL_HAVE_LOOPS */
 | 
						|
	.endm	// floopnez_
 | 
						|
 | 
						|
	.macro	floopgtz_	ar, startlabel, endlabelref
 | 
						|
	.ifdef	_infloop_
 | 
						|
	.if	_infloop_
 | 
						|
	.err	// Error: floopgtz cannot be nested
 | 
						|
	.endif
 | 
						|
	.endif
 | 
						|
	.set	_infloop_, 1
 | 
						|
#if XCHAL_HAVE_LOOPS
 | 
						|
	loopgtz	\ar, \endlabelref
 | 
						|
#else /* XCHAL_HAVE_LOOPS */
 | 
						|
	bltz	\ar, \endlabelref
 | 
						|
	beqz	\ar, \endlabelref
 | 
						|
\startlabel:
 | 
						|
	addi	\ar, \ar, -1
 | 
						|
#endif /* XCHAL_HAVE_LOOPS */
 | 
						|
	.endm	// floopgtz_
 | 
						|
 | 
						|
 | 
						|
	.macro	floopend_	ar, startlabelref, endlabel
 | 
						|
	.ifndef	_infloop_
 | 
						|
	.err	// Error: floopend without matching floopXXX
 | 
						|
	.endif
 | 
						|
	.ifeq	_infloop_
 | 
						|
	.err	// Error: floopend without matching floopXXX
 | 
						|
	.endif
 | 
						|
	.set	_infloop_, 0
 | 
						|
#if ! XCHAL_HAVE_LOOPS
 | 
						|
	bnez	\ar, \startlabelref
 | 
						|
#endif /* XCHAL_HAVE_LOOPS */
 | 
						|
\endlabel:
 | 
						|
	.endm	// floopend_
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  crsil  --  conditional RSIL (read/set interrupt level)
 | 
						|
 *
 | 
						|
 *  Executes the RSIL instruction if it exists, else just reads PS.
 | 
						|
 *  The RSIL instruction does not exist in the new exception architecture
 | 
						|
 *  if the interrupt option is not selected.
 | 
						|
 */
 | 
						|
 | 
						|
	.macro	crsil	ar, newlevel
 | 
						|
#if XCHAL_HAVE_OLD_EXC_ARCH || XCHAL_HAVE_INTERRUPTS
 | 
						|
	rsil	\ar, \newlevel
 | 
						|
#else
 | 
						|
	rsr	\ar, PS
 | 
						|
#endif
 | 
						|
	.endm	// crsil
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  safe_movi_a0  --  move constant into a0 when L32R is not safe
 | 
						|
 *
 | 
						|
 *  This macro is typically used by interrupt/exception handlers.
 | 
						|
 *  Loads a 32-bit constant in a0, without using any other register,
 | 
						|
 *  and without corrupting the LITBASE register, even when the
 | 
						|
 *  value of the LITBASE register is unknown (eg. when application
 | 
						|
 *  code and interrupt/exception handling code are built independently,
 | 
						|
 *  and thus with independent values of the LITBASE register;
 | 
						|
 *  debug monitors are one example of this).
 | 
						|
 *
 | 
						|
 *  Worst-case size of resulting code:  17 bytes.
 | 
						|
 */
 | 
						|
 | 
						|
	.macro	safe_movi_a0	constant
 | 
						|
#if XCHAL_HAVE_ABSOLUTE_LITERALS
 | 
						|
	/*  Contort a PC-relative literal load even though we may be in litbase-relative mode: */
 | 
						|
	j	1f
 | 
						|
	.begin	no-transform			// ensure what follows is assembled exactly as-is
 | 
						|
	.align	4				// ensure constant and call0 target ...
 | 
						|
	.byte	0				// ... are 4-byte aligned (call0 instruction is 3 bytes long)
 | 
						|
1:	call0	2f				// read PC (that follows call0) in a0
 | 
						|
	.long	\constant			// 32-bit constant to load into a0
 | 
						|
2:
 | 
						|
	.end	no-transform
 | 
						|
	l32i	a0, a0, 0			// load constant
 | 
						|
#else
 | 
						|
	movi	a0, \constant			// no LITBASE, can assume PC-relative L32R
 | 
						|
#endif
 | 
						|
	.endm
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  window_spill{4,8,12}
 | 
						|
 *
 | 
						|
 *  These macros spill callers' register windows to the stack.
 | 
						|
 *  They work for both privileged and non-privileged tasks.
 | 
						|
 *  Must be called from a windowed ABI context, eg. within
 | 
						|
 *  a windowed ABI function (ie. valid stack frame, window
 | 
						|
 *  exceptions enabled, not in exception mode, etc).
 | 
						|
 *
 | 
						|
 *  This macro requires a single invocation of the window_spill_common
 | 
						|
 *  macro in the same assembly unit and section.
 | 
						|
 *
 | 
						|
 *  Note that using window_spill{4,8,12} macros is more efficient
 | 
						|
 *  than calling a function implemented using window_spill_function,
 | 
						|
 *  because the latter needs extra code to figure out the size of
 | 
						|
 *  the call to the spilling function.
 | 
						|
 *
 | 
						|
 *  Example usage:
 | 
						|
 *	
 | 
						|
 *		.text
 | 
						|
 *		.align	4
 | 
						|
 *		.global	some_function
 | 
						|
 *		.type	some_function,@function
 | 
						|
 *	some_function:
 | 
						|
 *		entry	a1, 16
 | 
						|
 *		:
 | 
						|
 *		:
 | 
						|
 *
 | 
						|
 *		window_spill4	// Spill windows of some_function's callers; preserves a0..a3 only;
 | 
						|
 *				// to use window_spill{8,12} in this example function we'd have
 | 
						|
 *				// to increase space allocated by the entry instruction, because
 | 
						|
 *				// 16 bytes only allows call4; 32 or 48 bytes (+locals) are needed
 | 
						|
 *				// for call8/window_spill8 or call12/window_spill12 respectively.
 | 
						|
 *
 | 
						|
 *		:
 | 
						|
 *
 | 
						|
 *		retw
 | 
						|
 *
 | 
						|
 *		window_spill_common	// instantiates code used by window_spill4
 | 
						|
 *
 | 
						|
 *
 | 
						|
 *  On entry:
 | 
						|
 *	none (if window_spill4)
 | 
						|
 *	stack frame has enough space allocated for call8 (if window_spill8)
 | 
						|
 *	stack frame has enough space allocated for call12 (if window_spill12)
 | 
						|
 *  On exit:
 | 
						|
 *	 a4..a15 clobbered (if window_spill4)
 | 
						|
 *	 a8..a15 clobbered (if window_spill8)
 | 
						|
 *	a12..a15 clobbered (if window_spill12)
 | 
						|
 *	no caller windows are in live registers
 | 
						|
 */
 | 
						|
 | 
						|
	.macro	window_spill4
 | 
						|
#if XCHAL_HAVE_WINDOWED
 | 
						|
# if XCHAL_NUM_AREGS == 16
 | 
						|
	movi	a15, 0			// for 16-register files, no need to call to reach the end
 | 
						|
# elif XCHAL_NUM_AREGS == 32
 | 
						|
	call4	.L__wdwspill_assist28	// call deep enough to clear out any live callers
 | 
						|
# elif XCHAL_NUM_AREGS == 64
 | 
						|
	call4	.L__wdwspill_assist60	// call deep enough to clear out any live callers
 | 
						|
# endif
 | 
						|
#endif
 | 
						|
	.endm	// window_spill4
 | 
						|
 | 
						|
	.macro	window_spill8
 | 
						|
#if XCHAL_HAVE_WINDOWED
 | 
						|
# if XCHAL_NUM_AREGS == 16
 | 
						|
	movi	a15, 0			// for 16-register files, no need to call to reach the end
 | 
						|
# elif XCHAL_NUM_AREGS == 32
 | 
						|
	call8	.L__wdwspill_assist24	// call deep enough to clear out any live callers
 | 
						|
# elif XCHAL_NUM_AREGS == 64
 | 
						|
	call8	.L__wdwspill_assist56	// call deep enough to clear out any live callers
 | 
						|
# endif
 | 
						|
#endif
 | 
						|
	.endm	// window_spill8
 | 
						|
 | 
						|
	.macro	window_spill12
 | 
						|
#if XCHAL_HAVE_WINDOWED
 | 
						|
# if XCHAL_NUM_AREGS == 16
 | 
						|
	movi	a15, 0			// for 16-register files, no need to call to reach the end
 | 
						|
# elif XCHAL_NUM_AREGS == 32
 | 
						|
	call12	.L__wdwspill_assist20	// call deep enough to clear out any live callers
 | 
						|
# elif XCHAL_NUM_AREGS == 64
 | 
						|
	call12	.L__wdwspill_assist52	// call deep enough to clear out any live callers
 | 
						|
# endif
 | 
						|
#endif
 | 
						|
	.endm	// window_spill12
 | 
						|
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  window_spill_function
 | 
						|
 *
 | 
						|
 *  This macro outputs a function that will spill its caller's callers'
 | 
						|
 *  register windows to the stack.  Eg. it could be used to implement
 | 
						|
 *  a version of xthal_window_spill() that works in non-privileged tasks.
 | 
						|
 *  This works for both privileged and non-privileged tasks.
 | 
						|
 *
 | 
						|
 *  Typical usage:
 | 
						|
 *
 | 
						|
 *		.text
 | 
						|
 *		.align	4
 | 
						|
 *		.global	my_spill_function
 | 
						|
 *		.type	my_spill_function,@function
 | 
						|
 *	my_spill_function:
 | 
						|
 *		window_spill_function
 | 
						|
 *
 | 
						|
 *  On entry to resulting function:
 | 
						|
 *	none
 | 
						|
 *  On exit from resulting function:
 | 
						|
 *	none (no caller windows are in live registers)
 | 
						|
 */
 | 
						|
 | 
						|
	.macro	window_spill_function
 | 
						|
#if XCHAL_HAVE_WINDOWED
 | 
						|
#  if XCHAL_NUM_AREGS == 32
 | 
						|
	entry	sp, 48
 | 
						|
	bbci.l	a0, 31, 1f		// branch if called with call4
 | 
						|
	bbsi.l	a0, 30, 2f		// branch if called with call12
 | 
						|
	call8	.L__wdwspill_assist16	// called with call8, only need another 8
 | 
						|
	retw
 | 
						|
1:	call12	.L__wdwspill_assist16	// called with call4, only need another 12
 | 
						|
	retw
 | 
						|
2:	call4	.L__wdwspill_assist16	// called with call12, only need another 4
 | 
						|
	retw
 | 
						|
#  elif XCHAL_NUM_AREGS == 64
 | 
						|
	entry	sp, 48
 | 
						|
	bbci.l	a0, 31, 1f		// branch if called with call4
 | 
						|
	bbsi.l	a0, 30, 2f		// branch if called with call12
 | 
						|
	call4	.L__wdwspill_assist52	// called with call8, only need a call4
 | 
						|
	retw
 | 
						|
1:	call8	.L__wdwspill_assist52	// called with call4, only need a call8
 | 
						|
	retw
 | 
						|
2:	call12	.L__wdwspill_assist40	// called with call12, can skip a call12
 | 
						|
	retw
 | 
						|
#  elif XCHAL_NUM_AREGS == 16
 | 
						|
	entry	sp, 16
 | 
						|
	bbci.l	a0, 31, 1f	// branch if called with call4
 | 
						|
	bbsi.l	a0, 30, 2f	// branch if called with call12
 | 
						|
	movi	a7, 0		// called with call8
 | 
						|
	retw
 | 
						|
1:	movi	a11, 0		// called with call4
 | 
						|
2:	retw			// if called with call12, everything already spilled
 | 
						|
 | 
						|
//	movi	a15, 0		// trick to spill all but the direct caller
 | 
						|
//	j	1f
 | 
						|
//	//  The entry instruction is magical in the assembler (gets auto-aligned)
 | 
						|
//	//  so we have to jump to it to avoid falling through the padding.
 | 
						|
//	//  We need entry/retw to know where to return.
 | 
						|
//1:	entry	sp, 16
 | 
						|
//	retw
 | 
						|
#  else
 | 
						|
#   error "unrecognized address register file size"
 | 
						|
#  endif
 | 
						|
 | 
						|
#endif /* XCHAL_HAVE_WINDOWED */
 | 
						|
	window_spill_common
 | 
						|
	.endm	// window_spill_function
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  window_spill_common
 | 
						|
 *
 | 
						|
 *  Common code used by any number of invocations of the window_spill##
 | 
						|
 *  and window_spill_function macros.
 | 
						|
 *
 | 
						|
 *  Must be instantiated exactly once within a given assembly unit,
 | 
						|
 *  within call/j range of and same section as window_spill##
 | 
						|
 *  macro invocations for that assembly unit.
 | 
						|
 *  (Is automatically instantiated by the window_spill_function macro.)
 | 
						|
 */
 | 
						|
 | 
						|
	.macro	window_spill_common
 | 
						|
#if XCHAL_HAVE_WINDOWED && (XCHAL_NUM_AREGS == 32 || XCHAL_NUM_AREGS == 64)
 | 
						|
	.ifndef	.L__wdwspill_defined
 | 
						|
#  if XCHAL_NUM_AREGS >= 64
 | 
						|
.L__wdwspill_assist60:
 | 
						|
	entry	sp, 32
 | 
						|
	call8	.L__wdwspill_assist52
 | 
						|
	retw
 | 
						|
.L__wdwspill_assist56:
 | 
						|
	entry	sp, 16
 | 
						|
	call4	.L__wdwspill_assist52
 | 
						|
	retw
 | 
						|
.L__wdwspill_assist52:
 | 
						|
	entry	sp, 48
 | 
						|
	call12	.L__wdwspill_assist40
 | 
						|
	retw
 | 
						|
.L__wdwspill_assist40:
 | 
						|
	entry	sp, 48
 | 
						|
	call12	.L__wdwspill_assist28
 | 
						|
	retw
 | 
						|
#  endif
 | 
						|
.L__wdwspill_assist28:
 | 
						|
	entry	sp, 48
 | 
						|
	call12	.L__wdwspill_assist16
 | 
						|
	retw
 | 
						|
.L__wdwspill_assist24:
 | 
						|
	entry	sp, 32
 | 
						|
	call8	.L__wdwspill_assist16
 | 
						|
	retw
 | 
						|
.L__wdwspill_assist20:
 | 
						|
	entry	sp, 16
 | 
						|
	call4	.L__wdwspill_assist16
 | 
						|
	retw
 | 
						|
.L__wdwspill_assist16:
 | 
						|
	entry	sp, 16
 | 
						|
	movi	a15, 0
 | 
						|
	retw
 | 
						|
	.set	.L__wdwspill_defined, 1
 | 
						|
	.endif
 | 
						|
#endif /* XCHAL_HAVE_WINDOWED with 32 or 64 aregs */
 | 
						|
	.endm	// window_spill_common
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  beqi32
 | 
						|
 *
 | 
						|
 *  macro implements version of beqi for arbitrary 32-bit immediate value
 | 
						|
 *
 | 
						|
 *     beqi32 ax, ay, imm32, label
 | 
						|
 * 
 | 
						|
 *  Compares value in register ax with imm32 value and jumps to label if
 | 
						|
 *  equal. Clobbers register ay if needed
 | 
						|
 *
 | 
						|
 */
 | 
						|
   .macro beqi32	ax, ay, imm, label
 | 
						|
    .ifeq ((\imm-1) & ~7)	// 1..8 ?
 | 
						|
		beqi	\ax, \imm, \label
 | 
						|
    .else
 | 
						|
      .ifeq (\imm+1)		// -1 ?
 | 
						|
		beqi	\ax, \imm, \label
 | 
						|
      .else
 | 
						|
        .ifeq (\imm)		// 0 ?
 | 
						|
		beqz	\ax, \label
 | 
						|
        .else
 | 
						|
		//  We could also handle immediates 10,12,16,32,64,128,256 
 | 
						|
		//  but it would be a long macro...
 | 
						|
		movi	\ay, \imm
 | 
						|
		beq	\ax, \ay, \label
 | 
						|
        .endif
 | 
						|
      .endif
 | 
						|
    .endif
 | 
						|
   .endm // beqi32
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  isync_retw_nop
 | 
						|
 *
 | 
						|
 *  This macro must be invoked immediately after ISYNC if ISYNC
 | 
						|
 *  would otherwise be immediately followed by RETW (or other instruction
 | 
						|
 *  modifying WindowBase or WindowStart), in a context where
 | 
						|
 *  kernel vector mode may be selected, and level-one interrupts
 | 
						|
 *  and window overflows may be enabled, on an XEA1 configuration.
 | 
						|
 *
 | 
						|
 *  On hardware with erratum "XEA1KWIN" (see <xtensa/core.h> for details),
 | 
						|
 *  XEA1 code must have at least one instruction between ISYNC and RETW if
 | 
						|
 *  run in kernel vector mode with interrupts and window overflows enabled.
 | 
						|
 */
 | 
						|
	.macro	isync_retw_nop
 | 
						|
#if XCHAL_MAYHAVE_ERRATUM_XEA1KWIN
 | 
						|
	nop
 | 
						|
#endif
 | 
						|
	.endm
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  isync_erratum453
 | 
						|
 *
 | 
						|
 *  This macro must be invoked at certain points in the code,
 | 
						|
 *  such as in exception and interrupt vectors in particular,
 | 
						|
 *  to work around erratum 453.
 | 
						|
 */
 | 
						|
	.macro	isync_erratum453
 | 
						|
#if XCHAL_ERRATUM_453
 | 
						|
	isync
 | 
						|
#endif
 | 
						|
	.endm
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  abs
 | 
						|
 *
 | 
						|
 *  implements abs on machines that do not have it configured
 | 
						|
 */
 | 
						|
	
 | 
						|
#if !XCHAL_HAVE_ABS
 | 
						|
	.macro abs arr, ars
 | 
						|
	.ifc \arr, \ars
 | 
						|
	//src equal dest is less efficient
 | 
						|
	bgez \arr, 1f
 | 
						|
	neg \arr, \arr
 | 
						|
1:	
 | 
						|
	.else
 | 
						|
	neg \arr, \ars
 | 
						|
	movgez \arr, \ars, \ars
 | 
						|
	.endif
 | 
						|
	.endm
 | 
						|
#endif /* !XCHAL_HAVE_ABS */
 | 
						|
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  addx2
 | 
						|
 *  
 | 
						|
 *  implements addx2 on machines that do not have it configured
 | 
						|
 *     
 | 
						|
 */
 | 
						|
 | 
						|
#if !XCHAL_HAVE_ADDX
 | 
						|
	.macro addx2 arr, ars, art
 | 
						|
	.ifc \arr, \art
 | 
						|
	.ifc \arr, \ars
 | 
						|
	// addx2 a, a, a     (not common)
 | 
						|
	.err
 | 
						|
	.else
 | 
						|
	add \arr, \ars, \art
 | 
						|
	add \arr, \ars, \art
 | 
						|
	.endif
 | 
						|
	.else
 | 
						|
	//addx2 a, b, c
 | 
						|
	//addx2 a, a, b
 | 
						|
	//addx2 a, b, b
 | 
						|
	slli \arr, \ars, 1
 | 
						|
	add  \arr, \arr, \art
 | 
						|
	.endif
 | 
						|
	.endm
 | 
						|
#endif /* !XCHAL_HAVE_ADDX */
 | 
						|
	
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  addx4
 | 
						|
 * 
 | 
						|
 *  implements addx4 on machines that do not have it configured
 | 
						|
 *
 | 
						|
 */
 | 
						|
	
 | 
						|
#if !XCHAL_HAVE_ADDX
 | 
						|
	.macro addx4 arr, ars, art
 | 
						|
	.ifc \arr, \art
 | 
						|
	.ifc \arr, \ars
 | 
						|
	// addx4 a, a, a     (not common)
 | 
						|
	 .err
 | 
						|
	 .else
 | 
						|
	//# addx4 a, b, a
 | 
						|
	add \arr, \ars, \art
 | 
						|
	add \arr, \ars, \art
 | 
						|
	add \arr, \ars, \art
 | 
						|
	add \arr, \ars, \art
 | 
						|
	.endif
 | 
						|
	.else
 | 
						|
	//addx4 a, b, c
 | 
						|
	//addx4 a, a, b
 | 
						|
	//addx4 a, b, b
 | 
						|
	slli \arr, \ars, 2
 | 
						|
	add  \arr, \arr, \art
 | 
						|
	.endif
 | 
						|
	.endm
 | 
						|
#endif /* !XCHAL_HAVE_ADDX */
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  addx8
 | 
						|
 * 
 | 
						|
 *  implements addx8 on machines that do not have it configured
 | 
						|
 * 
 | 
						|
 */
 | 
						|
 | 
						|
#if !XCHAL_HAVE_ADDX
 | 
						|
	.macro addx8 arr, ars, art
 | 
						|
	.ifc \arr, \art
 | 
						|
	.ifc \arr, \ars
 | 
						|
	//addx8 a, a, a     (not common)
 | 
						|
	.err
 | 
						|
	.else
 | 
						|
	//addx8 a, b, a
 | 
						|
	add \arr, \ars, \art
 | 
						|
	add \arr, \ars, \art
 | 
						|
	add \arr, \ars, \art
 | 
						|
	add \arr, \ars, \art
 | 
						|
	add \arr, \ars, \art
 | 
						|
	add \arr, \ars, \art
 | 
						|
	add \arr, \ars, \art
 | 
						|
	add \arr, \ars, \art
 | 
						|
	.endif
 | 
						|
	.else
 | 
						|
	//addx8 a, b, c
 | 
						|
	//addx8 a, a, b
 | 
						|
	//addx8 a, b, b
 | 
						|
	slli \arr, \ars, 3
 | 
						|
	add  \arr, \arr, \art
 | 
						|
	.endif
 | 
						|
	.endm
 | 
						|
#endif /* !XCHAL_HAVE_ADDX */
 | 
						|
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  rfe_rfue
 | 
						|
 * 
 | 
						|
 *  Maps to RFUE on XEA1, and RFE on XEA2.  No mapping on XEAX.
 | 
						|
 */
 | 
						|
 | 
						|
#if XCHAL_HAVE_XEA1
 | 
						|
	.macro	rfe_rfue
 | 
						|
	rfue
 | 
						|
	.endm
 | 
						|
#elif XCHAL_HAVE_XEA2
 | 
						|
	.macro	rfe_rfue
 | 
						|
	rfe
 | 
						|
	.endm
 | 
						|
#endif
 | 
						|
 
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  abi_entry
 | 
						|
 * 
 | 
						|
 *  Generate proper function entry sequence for the current ABI
 | 
						|
 *  (windowed or call0).  Takes care of allocating stack space (up to 1kB)
 | 
						|
 *  and saving the return PC, if necessary.  The corresponding abi_return
 | 
						|
 *  macro does the corresponding stack deallocation and restoring return PC.
 | 
						|
 *
 | 
						|
 *  Parameters are:
 | 
						|
 *
 | 
						|
 *	locsize		Number of bytes to allocate on the stack
 | 
						|
 *			for local variables (and for args to pass to
 | 
						|
 *			callees, if any calls are made).  Defaults to zero.
 | 
						|
 *			The macro rounds this up to a multiple of 16.
 | 
						|
 *			NOTE:  large values are allowed (e.g. up to 1 GB).
 | 
						|
 *
 | 
						|
 *	callsize	Maximum call size made by this function.
 | 
						|
 *			Leave zero (default) for leaf functions, i.e. if
 | 
						|
 *			this function makes no calls to other functions.
 | 
						|
 *			Otherwise must be set to 4, 8, or 12 according
 | 
						|
 *			to whether the "largest" call made is a call[x]4,
 | 
						|
 *			call[x]8, or call[x]12 (for call0 ABI, it makes
 | 
						|
 *			no difference whether this is set to 4, 8 or 12,
 | 
						|
 *			but it must be set to one of these values).
 | 
						|
 *
 | 
						|
 *  NOTE:  It is up to the caller to align the entry point, declare the
 | 
						|
 *  function symbol, make it global, etc.
 | 
						|
 *
 | 
						|
 *  NOTE:  This macro relies on assembler relaxation for large values
 | 
						|
 *  of locsize.  It might not work with the no-transform directive.
 | 
						|
 *  NOTE:  For the call0 ABI, this macro ensures SP is allocated or
 | 
						|
 *  de-allocated cleanly, i.e. without temporarily allocating too much
 | 
						|
 *  (or allocating negatively!) due to addi relaxation.
 | 
						|
 *
 | 
						|
 *  NOTE:  Generating the proper sequence and register allocation for
 | 
						|
 *  making calls in an ABI independent manner is a separate topic not
 | 
						|
 *  covered by this macro.
 | 
						|
 *
 | 
						|
 *  NOTE:  To access arguments, you can't use a fixed offset from SP.
 | 
						|
 *  The offset depends on the ABI, whether the function is leaf, etc.
 | 
						|
 *  The simplest method is probably to use the .locsz symbol, which
 | 
						|
 *  is set by this macro to the actual number of bytes allocated on
 | 
						|
 *  the stack, in other words, to the offset from SP to the arguments.
 | 
						|
 *  E.g. for a function whose arguments are all 32-bit integers, you
 | 
						|
 *  can get the 7th and 8th arguments (1st and 2nd args stored on stack)
 | 
						|
 *  using:
 | 
						|
 *	l32i	a2, sp, .locsz
 | 
						|
 *	l32i	a3, sp, .locsz+4
 | 
						|
 *  (this example works as long as locsize is under L32I's offset limit
 | 
						|
 *   of 1020 minus up to 48 bytes of ABI-specific stack usage;
 | 
						|
 *   otherwise you might first need to do "addi a?, sp, .locsz"
 | 
						|
 *   or similar sequence).
 | 
						|
 *
 | 
						|
 *  NOTE:  For call0 ABI, this macro (and abi_return) may clobber a9
 | 
						|
 *  (a caller-saved register).
 | 
						|
 *
 | 
						|
 *  Examples:
 | 
						|
 *		abi_entry
 | 
						|
 *		abi_entry  5
 | 
						|
 *		abi_entry  22, 8
 | 
						|
 *		abi_entry  0, 4
 | 
						|
 */
 | 
						|
 | 
						|
	/*
 | 
						|
	 *  Compute .locsz and .callsz without emitting any instructions.
 | 
						|
	 *  Used by both abi_entry and abi_return.
 | 
						|
	 *  Assumes locsize >= 0.
 | 
						|
	 */
 | 
						|
	.macro	abi_entry_size locsize=0, callsize=0
 | 
						|
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
 | 
						|
	.ifeq	\callsize
 | 
						|
	 .set	.callsz, 16
 | 
						|
	.else
 | 
						|
	 .ifeq	\callsize-4
 | 
						|
	  .set	.callsz, 16
 | 
						|
	 .else
 | 
						|
	  .ifeq	\callsize-8
 | 
						|
	   .set	.callsz, 32
 | 
						|
	  .else
 | 
						|
	   .ifeq \callsize-12
 | 
						|
	    .set .callsz, 48
 | 
						|
	   .else
 | 
						|
	    .error	"abi_entry: invalid call size \callsize"
 | 
						|
	   .endif
 | 
						|
	  .endif
 | 
						|
	 .endif
 | 
						|
	.endif
 | 
						|
	.set	.locsz, .callsz + ((\locsize + 15) & -16)
 | 
						|
#else
 | 
						|
	.set	.callsz, \callsize
 | 
						|
	.if	.callsz		/* if calls, need space for return PC */
 | 
						|
	 .set	.locsz, (\locsize + 4 + 15) & -16
 | 
						|
	.else
 | 
						|
	 .set	.locsz, (\locsize + 15) & -16
 | 
						|
	.endif
 | 
						|
#endif
 | 
						|
	.endm
 | 
						|
 | 
						|
	.macro abi_entry locsize=0, callsize=0
 | 
						|
	.iflt	\locsize
 | 
						|
	 .error	"abi_entry: invalid negative size of locals (\locsize)"
 | 
						|
	.endif
 | 
						|
	abi_entry_size	\locsize, \callsize
 | 
						|
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
 | 
						|
	.ifgt	.locsz - 32760	/* .locsz > 32760 (ENTRY's max range)? */
 | 
						|
	/*  Funky computation to try to have assembler use addmi efficiently if possible:  */
 | 
						|
	entry	sp, 0x7F00 + (.locsz & 0xF0)
 | 
						|
	addi	a12, sp, - ((.locsz & -0x100) - 0x7F00)
 | 
						|
	movsp	sp, a12
 | 
						|
	.else
 | 
						|
	entry	sp, .locsz
 | 
						|
	.endif
 | 
						|
#else
 | 
						|
	.if	.locsz
 | 
						|
	 .ifle	.locsz - 128	/* if locsz <= 128 */
 | 
						|
	addi	sp, sp, -.locsz
 | 
						|
	  .if	.callsz
 | 
						|
	s32i	a0, sp, .locsz - 4
 | 
						|
	  .endif
 | 
						|
	 .elseif  .callsz	/* locsz > 128, with calls: */
 | 
						|
	movi	a9, .locsz - 16		/* note: a9 is caller-saved */
 | 
						|
	addi	sp, sp, -16
 | 
						|
	s32i	a0, sp, 12
 | 
						|
	sub	sp, sp, a9
 | 
						|
	 .else			/* locsz > 128, no calls: */
 | 
						|
	movi	a9, .locsz
 | 
						|
	sub	sp, sp, a9
 | 
						|
	 .endif			/* end */
 | 
						|
	.endif
 | 
						|
#endif
 | 
						|
	.endm
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/*----------------------------------------------------------------------
 | 
						|
 *  abi_return
 | 
						|
 * 
 | 
						|
 *  Generate proper function exit sequence for the current ABI
 | 
						|
 *  (windowed or call0).  Takes care of freeing stack space and
 | 
						|
 *  restoring the return PC, if necessary.
 | 
						|
 *  NOTE:  This macro MUST be invoked following a corresponding
 | 
						|
 *  abi_entry macro invocation.  For call0 ABI in particular,
 | 
						|
 *  all stack and PC restoration are done according to the last
 | 
						|
 *  abi_entry macro invoked before this macro in the assembly file.
 | 
						|
 *
 | 
						|
 *  Normally this macro takes no arguments.  However to allow
 | 
						|
 *  for placing abi_return *before* abi_entry (as must be done
 | 
						|
 *  for some highly optimized assembly), it optionally takes
 | 
						|
 *  exactly the same arguments as abi_entry.
 | 
						|
 */
 | 
						|
 | 
						|
	.macro abi_return	locsize=-1, callsize=0
 | 
						|
	.ifge	\locsize
 | 
						|
	abi_entry_size	\locsize, \callsize
 | 
						|
	.endif
 | 
						|
#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
 | 
						|
	retw
 | 
						|
#else
 | 
						|
	.if	.locsz
 | 
						|
	 .iflt	.locsz - 128	/* if locsz < 128 */
 | 
						|
	  .if	.callsz
 | 
						|
	l32i	a0, sp, .locsz - 4
 | 
						|
	  .endif
 | 
						|
	addi	sp, sp, .locsz
 | 
						|
	 .elseif  .callsz	/* locsz >= 128, with calls: */
 | 
						|
	addi	a9, sp, .locsz - 16
 | 
						|
	l32i	a0, a9, 12
 | 
						|
	addi	sp, a9, 16
 | 
						|
	 .else			/* locsz >= 128, no calls: */
 | 
						|
	movi	a9, .locsz
 | 
						|
	add	sp, sp, a9
 | 
						|
	 .endif			/* end */
 | 
						|
	.endif
 | 
						|
	ret
 | 
						|
#endif
 | 
						|
	.endm
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
 * HW erratum fixes.
 | 
						|
 */
 | 
						|
 | 
						|
	.macro hw_erratum_487_fix
 | 
						|
#if defined XSHAL_ERRATUM_487_FIX
 | 
						|
	isync
 | 
						|
#endif
 | 
						|
	.endm
 | 
						|
 | 
						|
 | 
						|
#endif /*XTENSA_COREASM_H*/
 | 
						|
 |