Added the MC68040 Floating Point Support Package. This was ported

to RTEMS by Eric Norum. It is freely distributable and was acquired from the Motorola WWW site. More info is in the FPSP README.
2025-12-20 03:05:47 +08:00 · 1997-04-16 17:33:04 +00:00
parent 83e39b2631
commit f9b93da8b4
47 changed files with 16820 additions and 0 deletions
--- a/c/src/lib/libcpu/m68k/Makefile.in
+++ b/c/src/lib/libcpu/m68k/Makefile.in
@@ -0,0 +1,13 @@
 #	
 #  $Id$
 #
@SET_MAKE@
 srcdir = @srcdir@
 top_srcdir = @top_srcdir@
 VPATH=@srcdir@
 include $(RTEMS_CUSTOM)
 include $(PROJECT_ROOT)/make/directory.cfg
 SUB_DIRS=$(wildcard $(RTEMS_CPU_MODEL))
--- a/c/src/lib/libcpu/m68k/m68040/Makefile.in
+++ b/c/src/lib/libcpu/m68k/m68040/Makefile.in
@@ -0,0 +1,13 @@
 #
 #  $Id$
 #
@SET_MAKE@
 srcdir = @srcdir@
 top_srcdir = @top_srcdir@
 VPATH=@srcdir@
 include $(RTEMS_CUSTOM)
 include $(PROJECT_ROOT)/make/directory.cfg
 SUB_DIRS=fpsp
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/Makefile.in
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/Makefile.in
@@ -0,0 +1,63 @@
 #
 #  $Id$
 #
@SET_MAKE@
 srcdir = @srcdir@
 top_srcdir = @top_srcdir@
 VPATH=@srcdir@
 PGM=${ARCH}/fpsp.rel
 # C source names, if any, go here -- minus the .c
 C_PIECES=rtems_fpsp
 C_FILES=$(C_PIECES:%=%.c)
 C_O_FILES=$(C_PIECES:%=${ARCH}/%.o)
 H_FILES=
 # Assembly source names, if any, go here -- minus the .s
 S_PIECES= bindec binstr bugfix decbin do_func gen_except get_op kernel_ex \
    res_func round rtems_skel sacos sasin satan satanh scale scosh setox \
    sgetem sint slog2 slogn smovecr srem_mod ssin ssinh stan stanh sto_res \
    stwotox tbldo util x_bsun x_fline x_operr x_ovfl x_snan x_store x_unfl \
    x_unimp x_unsupp
 S_FILES=$(S_PIECES:%=%.s)
 S_O_FILES=$(S_FILES:%.s=${ARCH}/%.o)
 SRCS=$(C_FILES) $(CC_FILES) $(H_FILES) $(S_FILES)
 OBJS=$(C_O_FILES) $(CC_O_FILES) $(S_O_FILES)
 include $(RTEMS_CUSTOM)
 include $(PROJECT_ROOT)/make/leaf.cfg
 #
 # (OPTIONAL) Add local stuff here using +=
 #
 DEFINES  +=
 CPPFLAGS += 
 CFLAGS   += $(CFLAGS_OS_V)
 LD_PATHS  += 
 LD_LIBS   += 
 LDFLAGS   += 
 #
 # Add your list of files to delete here.  The config files
 #  already know how to delete some stuff, so you may want
 #  to just run 'make clean' first to see what gets missed.
 #  'make clobber' already includes 'make clean'
 #
 CLEAN_ADDITIONS +=
 CLOBBER_ADDITIONS +=
 ${PGM}: ${SRCS} ${OBJS}
 	$(make-rel)
 all:	${ARCH} $(SRCS) $(PGM)
 # the .rel file built here will be put into libbsp.a by
 #    libbsp/hppa/BSP/wrapup/Makefile
 install:  all
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/README
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/README
@@ -0,0 +1,40 @@
 M68040FPSP -- Motorola 68040 floating point support package
 -----------------------------------------------------------
 Modified for RTEMS by Eric Norum (eric@skatter.usask.ca)
 To include these routines in your application call
 	M68KFPSPInstallExceptionHandlers ();
 before performing any floating point operations.
 Acknowledgement
 ---------------
 This code can be obtain from the Motorola Engineer's Toolbox WWW page
 at http://www.mot.com/SPS/HPESD/tools/freeware/040fpsp.html.  Here is
 the description from that page:
  The MC68040 contains a subset of the floating-point hardware that is
  implemented in the MC68881/882 devices and as such provides reduced yet
  high performance on-chip floating-point support. Those applications that
  require full compatibility with earlier members of the M68000 family
  will need to provide emulation support fo r the un-implemented MC68040
  floating-point instructions. The M68040FPSP provides complete emulation
  of the floating-point functionality available in the MC68881/882. 
  The M68040FPSP is offered in source code form to allow integration into
  existing systems to support either a kernel or library version of
  floating-point support. The M68040FPSP operates in conjunction with the
  on-chip MC68040 features to provide fast and full emulation. The kernel
  version allows full emulation via a trap mechanism to allow full binary
  compatibility and is fully reentrant. The library version is used to
  eliminate the trap overhead in situation where re-compilation is
  possible or desired. 
 From this page one may download the original source code.  Inline with
 the first sentence of the second paragraph, we have integrated it with
 RTEMS.
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/bindec.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/bindec.s
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/binstr.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/binstr.s
@@ -0,0 +1,140 @@
 //
 //	binstr.sa 3.3 12/19/90
 //
 //
 //	Description: Converts a 64-bit binary integer to bcd.
 //
 //	Input: 64-bit binary integer in d2:d3, desired length (LEN) in
 //          d0, and a  pointer to start in memory for bcd characters
 //          in d0. (This pointer must point to byte 4 of the first
 //          lword of the packed decimal memory string.)
 //
 //	Output:	LEN bcd digits representing the 64-bit integer.
 //
 //	Algorithm:
 //		The 64-bit binary is assumed to have a decimal point before
 //		bit 63.  The fraction is multiplied by 10 using a mul by 2
 //		shift and a mul by 8 shift.  The bits shifted out of the
 //		msb form a decimal digit.  This process is iterated until
 //		LEN digits are formed.
 //
 //	A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the
 //		digit formed will be assumed the least significant.  This is
 //		to force the first byte formed to have a 0 in the upper 4 bits.
 //
 //	A2. Beginning of the loop:
 //		Copy the fraction in d2:d3 to d4:d5.
 //
 //	A3. Multiply the fraction in d2:d3 by 8 using bit-field
 //		extracts and shifts.  The three msbs from d2 will go into
 //		d1.
 //
 //	A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb
 //		will be collected by the carry.
 //
 //	A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5
 //		into d2:d3.  D1 will contain the bcd digit formed.
 //
 //	A6. Test d7.  If zero, the digit formed is the ms digit.  If non-
 //		zero, it is the ls digit.  Put the digit in its place in the
 //		upper word of d0.  If it is the ls digit, write the word
 //		from d0 to memory.
 //
 //	A7. Decrement d6 (LEN counter) and repeat the loop until zero.
 //
 //	Implementation Notes:
 //
 //	The registers are used as follows:
 //
 //		d0: LEN counter
 //		d1: temp used to form the digit
 //		d2: upper 32-bits of fraction for mul by 8
 //		d3: lower 32-bits of fraction for mul by 8
 //		d4: upper 32-bits of fraction for mul by 2
 //		d5: lower 32-bits of fraction for mul by 2
 //		d6: temp for bit-field extracts
 //		d7: byte digit formation word;digit count {0,1}
 //		a0: pointer into memory for packed bcd string formation
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //BINSTR    idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	.global	binstr
 binstr:
 	moveml	%d0-%d7,-(%a7)
 //
 // A1: Init d7
 //
 	moveql	#1,%d7			//init d7 for second digit
 	subql	#1,%d0			//for dbf d0 would have LEN+1 passes
 //
 // A2. Copy d2:d3 to d4:d5.  Start loop.
 //
 loop:
 	movel	%d2,%d4			//copy the fraction before muls
 	movel	%d3,%d5			//to d4:d5
 //
 // A3. Multiply d2:d3 by 8; extract msbs into d1.
 //
 	bfextu	%d2{#0:#3},%d1		//copy 3 msbs of d2 into d1
 	asll	#3,%d2			//shift d2 left by 3 places
 	bfextu	%d3{#0:#3},%d6		//copy 3 msbs of d3 into d6
 	asll	#3,%d3			//shift d3 left by 3 places
 	orl	%d6,%d2			//or in msbs from d3 into d2
 //
 // A4. Multiply d4:d5 by 2; add carry out to d1.
 //
 	asll	#1,%d5			//mul d5 by 2
 	roxll	#1,%d4			//mul d4 by 2
 	swap	%d6			//put 0 in d6 lower word
 	addxw	%d6,%d1			//add in extend from mul by 2
 //
 // A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
 //
 	addl	%d5,%d3			//add lower 32 bits
 	nop				//ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
 	addxl	%d4,%d2			//add with extend upper 32 bits
 	nop				//ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
 	addxw	%d6,%d1			//add in extend from add to d1
 	swap	%d6			//with d6 = 0; put 0 in upper word
 //
 // A6. Test d7 and branch.
 //
 	tstw	%d7			//if zero, store digit & to loop
 	beqs	first_d			//if non-zero, form byte & write
 sec_d:
 	swap	%d7			//bring first digit to word d7b
 	aslw	#4,%d7			//first digit in upper 4 bits d7b
 	addw	%d1,%d7			//add in ls digit to d7b
 	moveb	%d7,(%a0)+		//store d7b byte in memory
 	swap	%d7			//put LEN counter in word d7a
 	clrw	%d7			//set d7a to signal no digits done
 	dbf	%d0,loop		//do loop some more!
 	bras	end_bstr		//finished, so exit
 first_d:
 	swap	%d7			//put digit word in d7b
 	movew	%d1,%d7			//put new digit in d7b
 	swap	%d7			//put LEN counter in word d7a
 	addqw	#1,%d7			//set d7a to signal first digit done
 	dbf	%d0,loop		//do loop some more!
 	swap	%d7			//put last digit in string
 	lslw	#4,%d7			//move it to upper 4 bits
 	moveb	%d7,(%a0)+		//store it in memory string
 //
 // Clean up and return with result in fp0.
 //
 end_bstr:
 	moveml	(%a7)+,%d0-%d7
 	rts
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/bugfix.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/bugfix.s
@@ -0,0 +1,496 @@
 //
 //	bugfix.sa 3.2 1/31/91
 //
 //
 //	This file contains workarounds for bugs in the 040
 //	relating to the Floating-Point Software Package (FPSP)
 //
 //	Fixes for bugs: 1238
 //
 //	Bug: 1238 
 //
 //
 //    /* The following dirty_bit clear should be left in
 //     * the handler permanently to improve throughput.
 //     * The dirty_bits are located at bits [23:16] in
 //     * longword $08 in the busy frame $4x60.  Bit 16
 //     * corresponds to FP0, bit 17 corresponds to FP1,
 //     * and so on.
 //     */
 //    if  (E3_exception_just_serviced)   {
 //         dirty_bit[cmdreg3b[9:7]] = 0;
 //         }
 //
 //    if  (fsave_format_version != $40)  {goto NOFIX}
 //
 //    if !(E3_exception_just_serviced)   {goto NOFIX}
 //    if  (cupc == 0000000)              {goto NOFIX}
 //    if  ((cmdreg1b[15:13] != 000) &&
 //         (cmdreg1b[15:10] != 010001))  {goto NOFIX}
 //    if (((cmdreg1b[15:13] != 000) || ((cmdreg1b[12:10] != cmdreg2b[9:7]) &&
 //				      (cmdreg1b[12:10] != cmdreg3b[9:7]))  ) &&
 //	 ((cmdreg1b[ 9: 7] != cmdreg2b[9:7]) &&
 //	  (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) )  {goto NOFIX}
 //
 //    /* Note: for 6d43b or 8d43b, you may want to add the following code
 //     * to get better coverage.  (If you do not insert this code, the part
 //     * won't lock up; it will simply get the wrong answer.)
 //     * Do NOT insert this code for 10d43b or later parts.
 //     *
 //     *  if (fpiarcu == integer stack return address) {
 //     *       cupc = 0000000;
 //     *       goto NOFIX;
 //     *       }
 //     */
 //
 //    if (cmdreg1b[15:13] != 000)   {goto FIX_OPCLASS2}
 //    FIX_OPCLASS0:
 //    if (((cmdreg1b[12:10] == cmdreg2b[9:7]) ||
 //	 (cmdreg1b[ 9: 7] == cmdreg2b[9:7])) &&
 //	(cmdreg1b[12:10] != cmdreg3b[9:7]) &&
 //	(cmdreg1b[ 9: 7] != cmdreg3b[9:7]))  {  /* xu conflict only */
 //	/* We execute the following code if there is an
 //	   xu conflict and NOT an nu conflict */
 //
 //	/* first save some values on the fsave frame */
 //	stag_temp     = STAG[fsave_frame];
 //	cmdreg1b_temp = CMDREG1B[fsave_frame];
 //	dtag_temp     = DTAG[fsave_frame];
 //	ete15_temp    = ETE15[fsave_frame];
 //
 //	CUPC[fsave_frame] = 0000000;
 //	FRESTORE
 //	FSAVE
 //
 //	/* If the xu instruction is exceptional, we punt.
 //	 * Otherwise, we would have to include OVFL/UNFL handler
 //	 * code here to get the correct answer.
 //	 */
 //	if (fsave_frame_format == $4060) {goto KILL_PROCESS}
 //
 //	fsave_frame = /* build a long frame of all zeros */
 //	fsave_frame_format = $4060;  /* label it as long frame */
 //
 //	/* load it with the temps we saved */
 //	STAG[fsave_frame]     =  stag_temp;
 //	CMDREG1B[fsave_frame] =  cmdreg1b_temp;
 //	DTAG[fsave_frame]     =  dtag_temp;
 //	ETE15[fsave_frame]    =  ete15_temp;
 //
 //	/* Make sure that the cmdreg3b dest reg is not going to
 //	 * be destroyed by a FMOVEM at the end of all this code.
 //	 * If it is, you should move the current value of the reg
 //	 * onto the stack so that the reg will loaded with that value.
 //	 */
 //
 //	/* All done.  Proceed with the code below */
 //    }
 //
 //    etemp  = FP_reg_[cmdreg1b[12:10]];
 //    ete15  = ~ete14;
 //    cmdreg1b[15:10] = 010010;
 //    clear(bug_flag_procIDxxxx);
 //    FRESTORE and return;
 //
 //
 //    FIX_OPCLASS2:
 //    if ((cmdreg1b[9:7] == cmdreg2b[9:7]) &&
 //	(cmdreg1b[9:7] != cmdreg3b[9:7]))  {  /* xu conflict only */
 //	/* We execute the following code if there is an
 //	   xu conflict and NOT an nu conflict */
 //
 //	/* first save some values on the fsave frame */
 //	stag_temp     = STAG[fsave_frame];
 //	cmdreg1b_temp = CMDREG1B[fsave_frame];
 //	dtag_temp     = DTAG[fsave_frame];
 //	ete15_temp    = ETE15[fsave_frame];
 //	etemp_temp    = ETEMP[fsave_frame];
 //
 //	CUPC[fsave_frame] = 0000000;
 //	FRESTORE
 //	FSAVE
 //
 //
 //	/* If the xu instruction is exceptional, we punt.
 //	 * Otherwise, we would have to include OVFL/UNFL handler
 //	 * code here to get the correct answer.
 //	 */
 //	if (fsave_frame_format == $4060) {goto KILL_PROCESS}
 //
 //	fsave_frame = /* build a long frame of all zeros */
 //	fsave_frame_format = $4060;  /* label it as long frame */
 //
 //	/* load it with the temps we saved */
 //	STAG[fsave_frame]     =  stag_temp;
 //	CMDREG1B[fsave_frame] =  cmdreg1b_temp;
 //	DTAG[fsave_frame]     =  dtag_temp;
 //	ETE15[fsave_frame]    =  ete15_temp;
 //	ETEMP[fsave_frame]    =  etemp_temp;
 //
 //	/* Make sure that the cmdreg3b dest reg is not going to
 //	 * be destroyed by a FMOVEM at the end of all this code.
 //	 * If it is, you should move the current value of the reg
 //	 * onto the stack so that the reg will loaded with that value.
 //	 */
 //
 //	/* All done.  Proceed with the code below */
 //    }
 //
 //    if (etemp_exponent == min_sgl)   etemp_exponent = min_dbl;
 //    if (etemp_exponent == max_sgl)   etemp_exponent = max_dbl;
 //    cmdreg1b[15:10] = 010101;
 //    clear(bug_flag_procIDxxxx);
 //    FRESTORE and return;
 //
 //
 //    NOFIX:
 //    clear(bug_flag_procIDxxxx);
 //    FRESTORE and return;
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //BUGFIX    idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	|xref	fpsp_fmt_error
 	.global	b1238_fix
 b1238_fix:
 //
 // This code is entered only on completion of the handling of an 
 // nu-generated ovfl, unfl, or inex exception.  If the version 
 // number of the fsave is not $40, this handler is not necessary.
 // Simply branch to fix_done and exit normally.
 //
 	cmpib	#VER_40,4(%a7)
 	bne	fix_done
 //
 // Test for cu_savepc equal to zero.  If not, this is not a bug
 // #1238 case.
 //
 	moveb	CU_SAVEPC(%a6),%d0
 	andib	#0xFE,%d0
 	beq 	fix_done	//if zero, this is not bug #1238
 //
 // Test the register conflict aspect.  If opclass0, check for
 // cu src equal to xu dest or equal to nu dest.  If so, go to 
 // op0.  Else, or if opclass2, check for cu dest equal to
 // xu dest or equal to nu dest.  If so, go to tst_opcl.  Else,
 // exit, it is not the bug case.
 //
 // Check for opclass 0.  If not, go and check for opclass 2 and sgl.
 //
 	movew	CMDREG1B(%a6),%d0
 	andiw	#0xE000,%d0		//strip all but opclass
 	bne	op2sgl			//not opclass 0, check op2
 //
 // Check for cu and nu register conflict.  If one exists, this takes
 // priority over a cu and xu conflict. 
 //
 	bfextu	CMDREG1B(%a6){#3:#3},%d0	//get 1st src 
 	bfextu	CMDREG3B(%a6){#6:#3},%d1	//get 3rd dest
 	cmpb	%d0,%d1
 	beqs	op0			//if equal, continue bugfix
 //
 // Check for cu dest equal to nu dest.  If so, go and fix the 
 // bug condition.  Otherwise, exit.
 //
 	bfextu	CMDREG1B(%a6){#6:#3},%d0	//get 1st dest 
 	cmpb	%d0,%d1			//cmp 1st dest with 3rd dest
 	beqs	op0			//if equal, continue bugfix
 //
 // Check for cu and xu register conflict.
 //
 	bfextu	CMDREG2B(%a6){#6:#3},%d1	//get 2nd dest
 	cmpb	%d0,%d1			//cmp 1st dest with 2nd dest
 	beqs	op0_xu			//if equal, continue bugfix
 	bfextu	CMDREG1B(%a6){#3:#3},%d0	//get 1st src 
 	cmpb	%d0,%d1			//cmp 1st src with 2nd dest
 	beq	op0_xu
 	bne	fix_done		//if the reg checks fail, exit
 //
 // We have the opclass 0 situation.
 //
 op0:
 	bfextu	CMDREG1B(%a6){#3:#3},%d0	//get source register no
 	movel	#7,%d1
 	subl	%d0,%d1
 	clrl	%d0
 	bsetl	%d1,%d0
 	fmovemx %d0,ETEMP(%a6)		//load source to ETEMP
 	moveb	#0x12,%d0
 	bfins	%d0,CMDREG1B(%a6){#0:#6}	//opclass 2, extended
 //
 //	Set ETEMP exponent bit 15 as the opposite of ete14
 //
 	btst	#6,ETEMP_EX(%a6)		//check etemp exponent bit 14
 	beq	setete15
 	bclr	#etemp15_bit,STAG(%a6)
 	bra	finish
 setete15:
 	bset	#etemp15_bit,STAG(%a6)
 	bra	finish
 //
 // We have the case in which a conflict exists between the cu src or
 // dest and the dest of the xu.  We must clear the instruction in 
 // the cu and restore the state, allowing the instruction in the
 // xu to complete.  Remember, the instruction in the nu
 // was exceptional, and was completed by the appropriate handler.
 // If the result of the xu instruction is not exceptional, we can
 // restore the instruction from the cu to the frame and continue
 // processing the original exception.  If the result is also
 // exceptional, we choose to kill the process.
 //
 //	Items saved from the stack:
 //	
 //		$3c stag     - L_SCR1
 //		$40 cmdreg1b - L_SCR2
 //		$44 dtag     - L_SCR3
 //
 // The cu savepc is set to zero, and the frame is restored to the
 // fpu.
 //
 op0_xu:
 	movel	STAG(%a6),L_SCR1(%a6)	
 	movel	CMDREG1B(%a6),L_SCR2(%a6)	
 	movel	DTAG(%a6),L_SCR3(%a6)
 	andil	#0xe0000000,L_SCR3(%a6)
 	moveb	#0,CU_SAVEPC(%a6)
 	movel	(%a7)+,%d1		//save return address from bsr
 	frestore (%a7)+
 	fsave	-(%a7)
 //
 // Check if the instruction which just completed was exceptional.
 // 
 	cmpw	#0x4060,(%a7)
 	beq	op0_xb
 // 
 // It is necessary to isolate the result of the instruction in the
 // xu if it is to fp0 - fp3 and write that value to the USER_FPn
 // locations on the stack.  The correct destination register is in 
 // cmdreg2b.
 //
 	bfextu	CMDREG2B(%a6){#6:#3},%d0	//get dest register no
 	cmpil	#3,%d0
 	bgts	op0_xi
 	beqs	op0_fp3
 	cmpil	#1,%d0
 	blts	op0_fp0
 	beqs	op0_fp1
 op0_fp2:
 	fmovemx %fp2-%fp2,USER_FP2(%a6)
 	bras	op0_xi
 op0_fp1:
 	fmovemx %fp1-%fp1,USER_FP1(%a6)
 	bras	op0_xi
 op0_fp0:
 	fmovemx %fp0-%fp0,USER_FP0(%a6)
 	bras	op0_xi
 op0_fp3:
 	fmovemx %fp3-%fp3,USER_FP3(%a6)
 //
 // The frame returned is idle.  We must build a busy frame to hold
 // the cu state information and setup etemp.
 //
 op0_xi:
 	movel	#22,%d0		//clear 23 lwords
 	clrl	(%a7)
 op0_loop:
 	clrl	-(%a7)
 	dbf	%d0,op0_loop
 	movel	#0x40600000,-(%a7)
 	movel	L_SCR1(%a6),STAG(%a6)
 	movel	L_SCR2(%a6),CMDREG1B(%a6)
 	movel	L_SCR3(%a6),DTAG(%a6)
 	moveb	#0x6,CU_SAVEPC(%a6)
 	movel	%d1,-(%a7)		//return bsr return address
 	bfextu	CMDREG1B(%a6){#3:#3},%d0	//get source register no
 	movel	#7,%d1
 	subl	%d0,%d1
 	clrl	%d0
 	bsetl	%d1,%d0
 	fmovemx %d0,ETEMP(%a6)		//load source to ETEMP
 	moveb	#0x12,%d0
 	bfins	%d0,CMDREG1B(%a6){#0:#6}	//opclass 2, extended
 //
 //	Set ETEMP exponent bit 15 as the opposite of ete14
 //
 	btst	#6,ETEMP_EX(%a6)		//check etemp exponent bit 14
 	beq	op0_sete15
 	bclr	#etemp15_bit,STAG(%a6)
 	bra	finish
 op0_sete15:
 	bset	#etemp15_bit,STAG(%a6)
 	bra	finish
 //
 // The frame returned is busy.  It is not possible to reconstruct
 // the code sequence to allow completion.  We will jump to 
 // fpsp_fmt_error and allow the kernel to kill the process.
 //
 op0_xb:
 	jmp	fpsp_fmt_error
 //
 // Check for opclass 2 and single size.  If not both, exit.
 //
 op2sgl:
 	movew	CMDREG1B(%a6),%d0
 	andiw	#0xFC00,%d0		//strip all but opclass and size
 	cmpiw	#0x4400,%d0		//test for opclass 2 and size=sgl
 	bne	fix_done		//if not, it is not bug 1238
 //
 // Check for cu dest equal to nu dest or equal to xu dest, with 
 // a cu and nu conflict taking priority an nu conflict.  If either,
 // go and fix the bug condition.  Otherwise, exit.
 //
 	bfextu	CMDREG1B(%a6){#6:#3},%d0	//get 1st dest 
 	bfextu	CMDREG3B(%a6){#6:#3},%d1	//get 3rd dest
 	cmpb	%d0,%d1			//cmp 1st dest with 3rd dest
 	beq	op2_com			//if equal, continue bugfix
 	bfextu	CMDREG2B(%a6){#6:#3},%d1	//get 2nd dest 
 	cmpb	%d0,%d1			//cmp 1st dest with 2nd dest
 	bne	fix_done		//if the reg checks fail, exit
 //
 // We have the case in which a conflict exists between the cu src or
 // dest and the dest of the xu.  We must clear the instruction in 
 // the cu and restore the state, allowing the instruction in the
 // xu to complete.  Remember, the instruction in the nu
 // was exceptional, and was completed by the appropriate handler.
 // If the result of the xu instruction is not exceptional, we can
 // restore the instruction from the cu to the frame and continue
 // processing the original exception.  If the result is also
 // exceptional, we choose to kill the process.
 //
 //	Items saved from the stack:
 //	
 //		$3c stag     - L_SCR1
 //		$40 cmdreg1b - L_SCR2
 //		$44 dtag     - L_SCR3
 //		etemp        - FP_SCR2
 //
 // The cu savepc is set to zero, and the frame is restored to the
 // fpu.
 //
 op2_xu:
 	movel	STAG(%a6),L_SCR1(%a6)	
 	movel	CMDREG1B(%a6),L_SCR2(%a6)	
 	movel	DTAG(%a6),L_SCR3(%a6)	
 	andil	#0xe0000000,L_SCR3(%a6)
 	moveb	#0,CU_SAVEPC(%a6)
 	movel	ETEMP(%a6),FP_SCR2(%a6)
 	movel	ETEMP_HI(%a6),FP_SCR2+4(%a6)
 	movel	ETEMP_LO(%a6),FP_SCR2+8(%a6)
 	movel	(%a7)+,%d1		//save return address from bsr
 	frestore (%a7)+
 	fsave	-(%a7)
 //
 // Check if the instruction which just completed was exceptional.
 // 
 	cmpw	#0x4060,(%a7)
 	beq	op2_xb
 // 
 // It is necessary to isolate the result of the instruction in the
 // xu if it is to fp0 - fp3 and write that value to the USER_FPn
 // locations on the stack.  The correct destination register is in 
 // cmdreg2b.
 //
 	bfextu	CMDREG2B(%a6){#6:#3},%d0	//get dest register no
 	cmpil	#3,%d0
 	bgts	op2_xi
 	beqs	op2_fp3
 	cmpil	#1,%d0
 	blts	op2_fp0
 	beqs	op2_fp1
 op2_fp2:
 	fmovemx %fp2-%fp2,USER_FP2(%a6)
 	bras	op2_xi
 op2_fp1:
 	fmovemx %fp1-%fp1,USER_FP1(%a6)
 	bras	op2_xi
 op2_fp0:
 	fmovemx %fp0-%fp0,USER_FP0(%a6)
 	bras	op2_xi
 op2_fp3:
 	fmovemx %fp3-%fp3,USER_FP3(%a6)
 //
 // The frame returned is idle.  We must build a busy frame to hold
 // the cu state information and fix up etemp.
 //
 op2_xi:
 	movel	#22,%d0		//clear 23 lwords
 	clrl	(%a7)
 op2_loop:
 	clrl	-(%a7)
 	dbf	%d0,op2_loop
 	movel	#0x40600000,-(%a7)
 	movel	L_SCR1(%a6),STAG(%a6)
 	movel	L_SCR2(%a6),CMDREG1B(%a6)
 	movel	L_SCR3(%a6),DTAG(%a6)
 	moveb	#0x6,CU_SAVEPC(%a6)
 	movel	FP_SCR2(%a6),ETEMP(%a6)
 	movel	FP_SCR2+4(%a6),ETEMP_HI(%a6)
 	movel	FP_SCR2+8(%a6),ETEMP_LO(%a6)
 	movel	%d1,-(%a7)
 	bra	op2_com
 //
 // We have the opclass 2 single source situation.
 //
 op2_com:
 	moveb	#0x15,%d0
 	bfins	%d0,CMDREG1B(%a6){#0:#6}	//opclass 2, double
 	cmpw	#0x407F,ETEMP_EX(%a6)	//single +max
 	bnes	case2
 	movew	#0x43FF,ETEMP_EX(%a6)	//to double +max
 	bra	finish
 case2:	
 	cmpw	#0xC07F,ETEMP_EX(%a6)	//single -max
 	bnes	case3
 	movew	#0xC3FF,ETEMP_EX(%a6)	//to double -max
 	bra	finish
 case3:	
 	cmpw	#0x3F80,ETEMP_EX(%a6)	//single +min
 	bnes	case4
 	movew	#0x3C00,ETEMP_EX(%a6)	//to double +min
 	bra	finish
 case4:
 	cmpw	#0xBF80,ETEMP_EX(%a6)	//single -min
 	bne	fix_done
 	movew	#0xBC00,ETEMP_EX(%a6)	//to double -min
 	bra	finish
 //
 // The frame returned is busy.  It is not possible to reconstruct
 // the code sequence to allow completion.  fpsp_fmt_error causes
 // an fline illegal instruction to be executed.
 //
 // You should replace the jump to fpsp_fmt_error with a jump
 // to the entry point used to kill a process. 
 //
 op2_xb:
 	jmp	fpsp_fmt_error
 //
 // Enter here if the case is not of the situations affected by
 // bug #1238, or if the fix is completed, and exit.
 //
 finish:
 fix_done:
 	rts
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/decbin.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/decbin.s
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/do_func.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/do_func.s
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/fpsp.defs
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/fpsp.defs
@@ -0,0 +1,348 @@
 |
 |	fpsp.h 3.3 3.3
 |
 |		Copyright (C) Motorola, Inc. 1990
 |			All Rights Reserved
 |
 |	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 |	The copyright notice above does not evidence any  
 |	actual or intended publication of such source code.
 |	fpsp.h --- stack frame offsets during FPSP exception handling
 |
 |	These equates are used to access the exception frame, the fsave
 |	frame and any local variables needed by the FPSP package.
 |	
 |	All FPSP handlers begin by executing:
 |
 |		link	a6,#-LOCAL_SIZE
 |		fsave	-(a7)
 |		movem.l	d0-d1/a0-a1,USER_DA(a6)
 |		fmovem.x fp0-fp3,USER_FP0(a6)
 |		fmove.l	fpsr/fpcr/fpiar,USER_FPSR(a6)
 |
 |	After initialization, the stack looks like this:
 |
 |	A7 --->	+-------------------------------+
 |		|				|
 |		|	FPU fsave area		|
 |		|				|
 |		+-------------------------------+
 |		|				|
 |		|	FPSP Local Variables	|
 |		|	     including		|
 |		|	  saved registers	|
 |		|				|
 |		+-------------------------------+
 |	A6 --->	|	Saved A6		|
 |		+-------------------------------+
 |		|				|
 |		|	Exception Frame		|
 |		|				|
 |		|				|
 |
 |	Positive offsets from A6 refer to the exception frame.  Negative
 |	offsets refer to the Local Variable area and the fsave area.
 |	The fsave frame is also accessible 'from the top' via A7.
 |
 |	On exit, the handlers execute:
 |
 |		movem.l	USER_DA(a6),d0-d1/a0-a1
 |		fmovem.x USER_FP0(a6),fp0-fp3
 |		fmove.l	USER_FPSR(a6),fpsr/fpcr/fpiar
 |		frestore (a7)+
 |		unlk	a6
 |
 |	and then either 'bra fpsp_done' if the exception was completely
 |	handled	by the package, or 'bra real_xxxx' which is an external
 |	label to a routine that will process a real exception of the
 |	type that was generated.  Some handlers may omit the 'frestore'
 |	if the FPU state after the exception is idle.
 |
 |	Sometimes the exception handler will transform the fsave area
 |	because it needs to report an exception back to the user.  This
 |	can happen if the package is entered for an unimplemented float
 |	instruction that generates (say) an underflow.  Alternatively,
 |	a second fsave frame can be pushed onto the stack and the
 |	handler	exit code will reload the new frame and discard the old.
 |
 |	The registers d0, d1, a0, a1 and fp0-fp3 are always saved and
 |	restored from the 'local variable' area and can be used as
 |	temporaries.  If a routine needs to change any
 |	of these registers, it should modify the saved copy and let
 |	the handler exit code restore the value.
 |
 |----------------------------------------------------------------------
 |
 |	Local Variables on the stack
 |
 	.set		LOCAL_SIZE,192	| bytes needed for local variables
 	.set		LV,-LOCAL_SIZE	| convenient base value
 |
 	.set		USER_DA,LV+0	| save space for D0-D1,A0-A1
 	.set		USER_D0,LV+0	| saved user D0
 	.set		USER_D1,LV+4	| saved user D1
 	.set		USER_A0,LV+8	| saved user A0
 	.set		USER_A1,LV+12	| saved user A1
 	.set		USER_FP0,LV+16	| saved user FP0
 	.set		USER_FP1,LV+28	| saved user FP1
 	.set		USER_FP2,LV+40	| saved user FP2
 	.set		USER_FP3,LV+52	| saved user FP3
 	.set		USER_FPCR,LV+64	| saved user FPCR
 	.set		FPCR_ENABLE,USER_FPCR+2	| FPCR exception enable 
 	.set		FPCR_MODE,USER_FPCR+3	| FPCR rounding mode control
 	.set		USER_FPSR,LV+68	| saved user FPSR
 	.set		FPSR_CC,USER_FPSR+0	| FPSR condition code
 	.set		FPSR_QBYTE,USER_FPSR+1	| FPSR quotient
 	.set		FPSR_EXCEPT,USER_FPSR+2	| FPSR exception
 	.set		FPSR_AEXCEPT,USER_FPSR+3	| FPSR accrued exception
 	.set		USER_FPIAR,LV+72	| saved user FPIAR
 	.set		FP_SCR1,LV+76	| room for a temporary float value
 	.set		FP_SCR2,LV+92	| room for a temporary float value
 	.set		L_SCR1,LV+108	| room for a temporary long value
 	.set		L_SCR2,LV+112	| room for a temporary long value
 	.set		STORE_FLG,LV+116
 	.set		BINDEC_FLG,LV+117	| used in bindec
 	.set		DNRM_FLG,LV+118	| used in res_func
 	.set		RES_FLG,LV+119	| used in res_func
 	.set		DY_MO_FLG,LV+120	| dyadic/monadic flag
 	.set		UFLG_TMP,LV+121	| temporary for uflag errata
 	.set		CU_ONLY,LV+122	| cu-only flag
 	.set		VER_TMP,LV+123	| temp holding for version number
 	.set		L_SCR3,LV+124	| room for a temporary long value
 	.set		FP_SCR3,LV+128	| room for a temporary float value
 	.set		FP_SCR4,LV+144	| room for a temporary float value
 	.set		FP_SCR5,LV+160	| room for a temporary float value
 	.set		FP_SCR6,LV+176
 |
 |NEXT		equ	LV+192		;need to increase LOCAL_SIZE
 |
 |--------------------------------------------------------------------------
 |
 |	fsave offsets and bit definitions
 |
 |	Offsets are defined from the end of an fsave because the last 10
 |	words of a busy frame are the same as the unimplemented frame.
 |
 	.set		CU_SAVEPC,LV-92	| micro-pc for CU (1 byte)
 	.set		FPR_DIRTY_BITS,LV-91	| fpr dirty bits
 |
 	.set		WBTEMP,LV-76	| write back temp (12 bytes)
 	.set		WBTEMP_EX,WBTEMP	| wbtemp sign and exponent (2 bytes)
 	.set		WBTEMP_HI,WBTEMP+4	| wbtemp mantissa [63:32] (4 bytes)
 	.set		WBTEMP_LO,WBTEMP+8	| wbtemp mantissa [31:00] (4 bytes)
 |
 	.set		WBTEMP_SGN,WBTEMP+2	| used to store sign
 |
 	.set		FPSR_SHADOW,LV-64	| fpsr shadow reg
 |
 	.set		FPIARCU,LV-60	| Instr. addr. reg. for CU (4 bytes)
 |
 	.set		CMDREG2B,LV-52	| cmd reg for machine 2
 	.set		CMDREG3B,LV-48	| cmd reg for E3 exceptions (2 bytes)
 |
 	.set		NMNEXC,LV-44	| NMNEXC (unsup,snan bits only)
 	.set		nmn_unsup_bit,1
 	.set		nmn_snan_bit,0
 |
 	.set		NMCEXC,LV-43	| NMNEXC & NMCEXC
 	.set		nmn_operr_bit,7
 	.set		nmn_ovfl_bit,6
 	.set		nmn_unfl_bit,5
 	.set		nmc_unsup_bit,4
 	.set		nmc_snan_bit,3
 	.set		nmc_operr_bit,2
 	.set		nmc_ovfl_bit,1
 	.set		nmc_unfl_bit,0
 |
 	.set		STAG,LV-40	| source tag (1 byte)
 	.set		WBTEMP_GRS,LV-40	| alias wbtemp guard, round, sticky
 	.set		guard_bit,1	| guard bit is bit number 1
 	.set		round_bit,0	| round bit is bit number 0
 	.set		stag_mask,0xE0	| upper 3 bits are source tag type
 	.set		denorm_bit,7	| bit determins if denorm or unnorm
 	.set		etemp15_bit,4	| etemp exponent bit #15
 	.set		wbtemp66_bit,2	| wbtemp mantissa bit #66
 	.set		wbtemp1_bit,1	| wbtemp mantissa bit #1
 	.set		wbtemp0_bit,0	| wbtemp mantissa bit #0
 |
 	.set		STICKY,LV-39	| holds sticky bit
 	.set		sticky_bit,7
 |
 	.set		CMDREG1B,LV-36	| cmd reg for E1 exceptions (2 bytes)
 	.set		kfact_bit,12	| distinguishes static/dynamic k-factor
 |					;on packed move outs.  NOTE: this
 |					;equate only works when CMDREG1B is in
 |					;a register.
 |
 	.set		CMDWORD,LV-35	| command word in cmd1b
 	.set		direction_bit,5	| bit 0 in opclass
 	.set		size_bit2,12	| bit 2 in size field
 |
 	.set		DTAG,LV-32	| dest tag (1 byte)
 	.set		dtag_mask,0xE0	| upper 3 bits are dest type tag
 	.set		fptemp15_bit,4	| fptemp exponent bit #15
 |
 	.set		WB_BYTE,LV-31	| holds WBTE15 bit (1 byte)
 	.set		wbtemp15_bit,4	| wbtemp exponent bit #15
 |
 	.set		E_BYTE,LV-28	| holds E1 and E3 bits (1 byte)
 	.set		E1,2		| which bit is E1 flag
 	.set		E3,1		| which bit is E3 flag
 	.set		SFLAG,0		| which bit is S flag
 |
 	.set		T_BYTE,LV-27	| holds T and U bits (1 byte)
 	.set		XFLAG,7		| which bit is X flag
 	.set		UFLAG,5		| which bit is U flag
 	.set		TFLAG,4		| which bit is T flag
 |
 	.set		FPTEMP,LV-24	| fptemp (12 bytes)
 	.set		FPTEMP_EX,FPTEMP	| fptemp sign and exponent (2 bytes)
 	.set		FPTEMP_HI,FPTEMP+4	| fptemp mantissa [63:32] (4 bytes)
 	.set		FPTEMP_LO,FPTEMP+8	| fptemp mantissa [31:00] (4 bytes)
 |
 	.set		FPTEMP_SGN,FPTEMP+2	| used to store sign
 |
 	.set		ETEMP,LV-12	| etemp (12 bytes)
 	.set		ETEMP_EX,ETEMP	| etemp sign and exponent (2 bytes)
 	.set		ETEMP_HI,ETEMP+4	| etemp mantissa [63:32] (4 bytes)
 	.set		ETEMP_LO,ETEMP+8	| etemp mantissa [31:00] (4 bytes)
 |
 	.set		ETEMP_SGN,ETEMP+2	| used to store sign
 |
 	.set		EXC_SR,4	| exception frame status register
 	.set		EXC_PC,6	| exception frame program counter
 	.set		EXC_VEC,10	| exception frame vector (format+vector#)
 	.set		EXC_EA,12	| exception frame effective address
 |
 |--------------------------------------------------------------------------
 |
 |	FPSR/FPCR bits
 |
 	.set		neg_bit,3	| negative result
 	.set		z_bit,2		| zero result
 	.set		inf_bit,1	| infinity result
 	.set		nan_bit,0	| not-a-number result
 |
 	.set		q_sn_bit,7	| sign bit of quotient byte
 |
 	.set		bsun_bit,7	| branch on unordered
 	.set		snan_bit,6	| signalling nan
 	.set		operr_bit,5	| operand error
 	.set		ovfl_bit,4	| overflow
 	.set		unfl_bit,3	| underflow
 	.set		dz_bit,2	| divide by zero
 	.set		inex2_bit,1	| inexact result 2
 	.set		inex1_bit,0	| inexact result 1
 |
 	.set		aiop_bit,7	| accrued illegal operation
 	.set		aovfl_bit,6	| accrued overflow
 	.set		aunfl_bit,5	| accrued underflow
 	.set		adz_bit,4	| accrued divide by zero
 	.set		ainex_bit,3	| accrued inexact
 |
 |	FPSR individual bit masks
 |
 	.set		neg_mask,0x08000000
 	.set		z_mask,0x04000000
 	.set		inf_mask,0x02000000
 	.set		nan_mask,0x01000000
 |
 	.set		bsun_mask,0x00008000
 	.set		snan_mask,0x00004000
 	.set		operr_mask,0x00002000
 	.set		ovfl_mask,0x00001000
 	.set		unfl_mask,0x00000800
 	.set		dz_mask,0x00000400
 	.set		inex2_mask,0x00000200
 	.set		inex1_mask,0x00000100
 |
 	.set		aiop_mask,0x00000080	| accrued illegal operation
 	.set		aovfl_mask,0x00000040	| accrued overflow
 	.set		aunfl_mask,0x00000020	| accrued underflow
 	.set		adz_mask,0x00000010	| accrued divide by zero
 	.set		ainex_mask,0x00000008	| accrued inexact
 |
 |	FPSR combinations used in the FPSP
 |
 	.set		dzinf_mask,inf_mask+dz_mask+adz_mask
 	.set		opnan_mask,nan_mask+operr_mask+aiop_mask
 	.set		nzi_mask,0x01ffffff	| clears N, Z, and I
 	.set		unfinx_mask,unfl_mask+inex2_mask+aunfl_mask+ainex_mask
 	.set		unf2inx_mask,unfl_mask+inex2_mask+ainex_mask
 	.set		ovfinx_mask,ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
 	.set		inx1a_mask,inex1_mask+ainex_mask
 	.set		inx2a_mask,inex2_mask+ainex_mask
 	.set		snaniop_mask,nan_mask+snan_mask+aiop_mask
 	.set		naniop_mask,nan_mask+aiop_mask
 	.set		neginf_mask,neg_mask+inf_mask
 	.set		infaiop_mask,inf_mask+aiop_mask
 	.set		negz_mask,neg_mask+z_mask
 	.set		opaop_mask,operr_mask+aiop_mask
 	.set		unfl_inx_mask,unfl_mask+aunfl_mask+ainex_mask
 	.set		ovfl_inx_mask,ovfl_mask+aovfl_mask+ainex_mask
 |
 |--------------------------------------------------------------------------
 |
 |	FPCR rounding modes
 |
 	.set		x_mode,0x00	| round to extended
 	.set		s_mode,0x40	| round to single
 	.set		d_mode,0x80	| round to double
 |
 	.set		rn_mode,0x00	| round nearest
 	.set		rz_mode,0x10	| round to zero
 	.set		rm_mode,0x20	| round to minus infinity
 	.set		rp_mode,0x30	| round to plus infinity
 |
 |--------------------------------------------------------------------------
 |
 |	Miscellaneous equates
 |
 	.set		signan_bit,6	| signalling nan bit in mantissa
 	.set		sign_bit,7
 |
 	.set		rnd_stky_bit,29	| round/sticky bit of mantissa
 |				this can only be used if in a data register
 	.set		sx_mask,0x01800000	| set s and x bits in word $48
 |
 	.set		LOCAL_EX,0
 	.set		LOCAL_SGN,2
 	.set		LOCAL_HI,4
 	.set		LOCAL_LO,8
 	.set		LOCAL_GRS,12	| valid ONLY for FP_SCR1, FP_SCR2
 |
 |
 	.set		norm_tag,0x00	| tag bits in {7:5} position
 	.set		zero_tag,0x20
 	.set		inf_tag,0x40
 	.set		nan_tag,0x60
 	.set		dnrm_tag,0x80
 |
 |	fsave sizes and formats
 |
 	.set		VER_4,0x40	| fpsp compatible version numbers
 |					are in the $40s {$40-$4f}
 	.set		VER_40,0x40	| original version number
 	.set		VER_41,0x41	| revision version number
 |
 	.set		BUSY_SIZE,100	| size of busy frame
 	.set		BUSY_FRAME,LV-BUSY_SIZE	| start of busy frame
 |
 	.set		UNIMP_40_SIZE,44	| size of orig unimp frame
 	.set		UNIMP_41_SIZE,52	| size of rev unimp frame
 |
 	.set		IDLE_SIZE,4	| size of idle frame
 	.set		IDLE_FRAME,LV-IDLE_SIZE	| start of idle frame
 |
 |	exception vectors
 |
 	.set		TRACE_VEC,0x2024	| trace trap
 	.set		FLINE_VEC,0x002C	| 'real' F-line
 	.set		UNIMP_VEC,0x202C	| unimplemented
 	.set		INEX_VEC,0x00C4
 |
 	.set		dbl_thresh,0x3C01
 	.set		sgl_thresh,0x3F81
 |
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/gen_except.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/gen_except.s
@@ -0,0 +1,468 @@
 //
 //	gen_except.sa 3.7 1/16/92
 //
 //	gen_except --- FPSP routine to detect reportable exceptions
 //	
 //	This routine compares the exception enable byte of the
 //	user_fpcr on the stack with the exception status byte
 //	of the user_fpsr. 
 //
 //	Any routine which may report an exceptions must load
 //	the stack frame in memory with the exceptional operand(s).
 //
 //	Priority for exceptions is:
 //
 //	Highest:	bsun
 //			snan
 //			operr
 //			ovfl
 //			unfl
 //			dz
 //			inex2
 //	Lowest:		inex1
 //
 //	Note: The IEEE standard specifies that inex2 is to be
 //	reported if ovfl occurs and the ovfl enable bit is not
 //	set but the inex2 enable bit is.  
 //
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 GEN_EXCEPT:    //idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section 8
 	.include "fpsp.defs"
 	|xref	real_trace
 	|xref	fpsp_done
 	|xref	fpsp_fmt_error
 exc_tbl:
 	.long	bsun_exc
 	.long	commonE1
 	.long	commonE1
 	.long	ovfl_unfl
 	.long	ovfl_unfl
 	.long	commonE1
 	.long	commonE3
 	.long	commonE3
 	.long	no_match
 	.global	gen_except
 gen_except:
 	cmpib	#IDLE_SIZE-4,1(%a7)	//test for idle frame
 	beq	do_check		//go handle idle frame
 	cmpib	#UNIMP_40_SIZE-4,1(%a7)	//test for orig unimp frame
 	beqs	unimp_x			//go handle unimp frame
 	cmpib	#UNIMP_41_SIZE-4,1(%a7)	//test for rev unimp frame
 	beqs	unimp_x			//go handle unimp frame
 	cmpib	#BUSY_SIZE-4,1(%a7)	//if size <> $60, fmt error
 	bnel	fpsp_fmt_error
 	leal	BUSY_SIZE+LOCAL_SIZE(%a7),%a1 //init a1 so fpsp.h
 //					;equates will work
 // Fix up the new busy frame with entries from the unimp frame
 //
 	movel	ETEMP_EX(%a6),ETEMP_EX(%a1) //copy etemp from unimp
 	movel	ETEMP_HI(%a6),ETEMP_HI(%a1) //frame to busy frame
 	movel	ETEMP_LO(%a6),ETEMP_LO(%a1) 
 	movel	CMDREG1B(%a6),CMDREG1B(%a1) //set inst in frame to unimp
 	movel	CMDREG1B(%a6),%d0		//fix cmd1b to make it
 	andl	#0x03c30000,%d0		//work for cmd3b
 	bfextu	CMDREG1B(%a6){#13:#1},%d1	//extract bit 2
 	lsll	#5,%d1			
 	swap	%d1
 	orl	%d1,%d0			//put it in the right place
 	bfextu	CMDREG1B(%a6){#10:#3},%d1	//extract bit 3,4,5
 	lsll	#2,%d1
 	swap	%d1
 	orl	%d1,%d0			//put them in the right place
 	movel	%d0,CMDREG3B(%a1)		//in the busy frame
 //
 // Or in the FPSR from the emulation with the USER_FPSR on the stack.
 //
 	fmovel	%FPSR,%d0		
 	orl	%d0,USER_FPSR(%a6)
 	movel	USER_FPSR(%a6),FPSR_SHADOW(%a1) //set exc bits
 	orl	#sx_mask,E_BYTE(%a1)
 	bra	do_clean
 //
 // Frame is an unimp frame possible resulting from an fmove <ea>,fp0
 // that caused an exception
 //
 // a1 is modified to point into the new frame allowing fpsp equates
 // to be valid.
 //
 unimp_x:
 	cmpib	#UNIMP_40_SIZE-4,1(%a7)	//test for orig unimp frame
 	bnes	test_rev
 	leal	UNIMP_40_SIZE+LOCAL_SIZE(%a7),%a1
 	bras	unimp_con
 test_rev:
 	cmpib	#UNIMP_41_SIZE-4,1(%a7)	//test for rev unimp frame
 	bnel	fpsp_fmt_error		//if not $28 or $30
 	leal	UNIMP_41_SIZE+LOCAL_SIZE(%a7),%a1
 unimp_con:
 //
 // Fix up the new unimp frame with entries from the old unimp frame
 //
 	movel	CMDREG1B(%a6),CMDREG1B(%a1) //set inst in frame to unimp
 //
 // Or in the FPSR from the emulation with the USER_FPSR on the stack.
 //
 	fmovel	%FPSR,%d0		
 	orl	%d0,USER_FPSR(%a6)
 	bra	do_clean
 //
 // Frame is idle, so check for exceptions reported through
 // USER_FPSR and set the unimp frame accordingly.  
 // A7 must be incremented to the point before the
 // idle fsave vector to the unimp vector.
 //
 do_check:
 	addl	#4,%a7			//point A7 back to unimp frame
 //
 // Or in the FPSR from the emulation with the USER_FPSR on the stack.
 //
 	fmovel	%FPSR,%d0		
 	orl	%d0,USER_FPSR(%a6)
 //
 // On a busy frame, we must clear the nmnexc bits.
 //
 	cmpib	#BUSY_SIZE-4,1(%a7)	//check frame type
 	bnes	check_fr		//if busy, clr nmnexc
 	clrw	NMNEXC(%a6)		//clr nmnexc & nmcexc
 	btstb	#5,CMDREG1B(%a6)		//test for fmove out
 	bnes	frame_com
 	movel	USER_FPSR(%a6),FPSR_SHADOW(%a6) //set exc bits
 	orl	#sx_mask,E_BYTE(%a6)
 	bras	frame_com
 check_fr:
 	cmpb	#UNIMP_40_SIZE-4,1(%a7)
 	beqs	frame_com
 	clrw	NMNEXC(%a6)
 frame_com:
 	moveb	FPCR_ENABLE(%a6),%d0	//get fpcr enable byte
 	andb	FPSR_EXCEPT(%a6),%d0	//and in the fpsr exc byte
 	bfffo	%d0{#24:#8},%d1		//test for first set bit
 	leal	exc_tbl,%a0		//load jmp table address
 	subib	#24,%d1			//normalize bit offset to 0-8
 	movel	(%a0,%d1.w*4),%a0		//load routine address based
 //					;based on first enabled exc
 	jmp	(%a0)			//jump to routine
 //
 // Bsun is not possible in unimp or unsupp
 //
 bsun_exc:
 	bra	do_clean
 //
 // The typical work to be done to the unimp frame to report an 
 // exception is to set the E1/E3 byte and clr the U flag.
 // commonE1 does this for E1 exceptions, which are snan, 
 // operr, and dz.  commonE3 does this for E3 exceptions, which 
 // are inex2 and inex1, and also clears the E1 exception bit
 // left over from the unimp exception.
 //
 commonE1:
 	bsetb	#E1,E_BYTE(%a6)		//set E1 flag
 	bra	commonE			//go clean and exit
 commonE3:
 	tstb	UFLG_TMP(%a6)		//test flag for unsup/unimp state
 	bnes	unsE3
 uniE3:
 	bsetb	#E3,E_BYTE(%a6)		//set E3 flag
 	bclrb	#E1,E_BYTE(%a6)		//clr E1 from unimp
 	bra	commonE
 unsE3:
 	tstb	RES_FLG(%a6)
 	bnes	unsE3_0	
 unsE3_1:
 	bsetb	#E3,E_BYTE(%a6)		//set E3 flag
 unsE3_0:
 	bclrb	#E1,E_BYTE(%a6)		//clr E1 flag
 	movel	CMDREG1B(%a6),%d0
 	andl	#0x03c30000,%d0		//work for cmd3b
 	bfextu	CMDREG1B(%a6){#13:#1},%d1	//extract bit 2
 	lsll	#5,%d1			
 	swap	%d1
 	orl	%d1,%d0			//put it in the right place
 	bfextu	CMDREG1B(%a6){#10:#3},%d1	//extract bit 3,4,5
 	lsll	#2,%d1
 	swap	%d1
 	orl	%d1,%d0			//put them in the right place
 	movel	%d0,CMDREG3B(%a6)		//in the busy frame
 commonE:
 	bclrb	#UFLAG,T_BYTE(%a6)	//clr U flag from unimp
 	bra	do_clean		//go clean and exit
 //
 // No bits in the enable byte match existing exceptions.  Check for
 // the case of the ovfl exc without the ovfl enabled, but with
 // inex2 enabled.
 //
 no_match:
 	btstb	#inex2_bit,FPCR_ENABLE(%a6) //check for ovfl/inex2 case
 	beqs	no_exc			//if clear, exit
 	btstb	#ovfl_bit,FPSR_EXCEPT(%a6) //now check ovfl
 	beqs	no_exc			//if clear, exit
 	bras	ovfl_unfl		//go to unfl_ovfl to determine if
 //					;it is an unsupp or unimp exc
 // No exceptions are to be reported.  If the instruction was 
 // unimplemented, no FPU restore is necessary.  If it was
 // unsupported, we must perform the restore.
 no_exc:
 	tstb	UFLG_TMP(%a6)	//test flag for unsupp/unimp state
 	beqs	uni_no_exc
 uns_no_exc:
 	tstb	RES_FLG(%a6)	//check if frestore is needed
 	bne	do_clean 	//if clear, no frestore needed
 uni_no_exc:
 	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx USER_FP0(%a6),%fp0-%fp3
 	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	unlk	%a6
 	bra	finish_up
 //
 // Unsupported Data Type Handler:
 // Ovfl:
 //   An fmoveout that results in an overflow is reported this way.
 // Unfl:
 //   An fmoveout that results in an underflow is reported this way.
 //
 // Unimplemented Instruction Handler:
 // Ovfl:
 //   Only scosh, setox, ssinh, stwotox, and scale can set overflow in 
 //   this manner.
 // Unfl:
 //   Stwotox, setox, and scale can set underflow in this manner.
 //   Any of the other Library Routines such that f(x)=x in which
 //   x is an extended denorm can report an underflow exception. 
 //   It is the responsibility of the exception-causing exception 
 //   to make sure that WBTEMP is correct.
 //
 //   The exceptional operand is in FP_SCR1.
 //
 ovfl_unfl:
 	tstb	UFLG_TMP(%a6)	//test flag for unsupp/unimp state
 	beqs	ofuf_con
 //
 // The caller was from an unsupported data type trap.  Test if the
 // caller set CU_ONLY.  If so, the exceptional operand is expected in
 // FPTEMP, rather than WBTEMP.
 //
 	tstb	CU_ONLY(%a6)		//test if inst is cu-only
 	beq	unsE3
 //	move.w	#$fe,CU_SAVEPC(%a6)
 	clrb	CU_SAVEPC(%a6)
 	bsetb	#E1,E_BYTE(%a6)		//set E1 exception flag
 	movew	ETEMP_EX(%a6),FPTEMP_EX(%a6)
 	movel	ETEMP_HI(%a6),FPTEMP_HI(%a6)
 	movel	ETEMP_LO(%a6),FPTEMP_LO(%a6)
 	bsetb	#fptemp15_bit,DTAG(%a6)	//set fpte15
 	bclrb	#UFLAG,T_BYTE(%a6)	//clr U flag from unimp
 	bra	do_clean		//go clean and exit
 ofuf_con:
 	moveb	(%a7),VER_TMP(%a6)	//save version number
 	cmpib	#BUSY_SIZE-4,1(%a7)	//check for busy frame
 	beqs	busy_fr			//if unimp, grow to busy
 	cmpib	#VER_40,(%a7)		//test for orig unimp frame
 	bnes	try_41			//if not, test for rev frame
 	moveql	#13,%d0			//need to zero 14 lwords
 	bras	ofuf_fin
 try_41:
 	cmpib	#VER_41,(%a7)		//test for rev unimp frame
 	bnel	fpsp_fmt_error		//if neither, exit with error
 	moveql	#11,%d0			//need to zero 12 lwords
 ofuf_fin:
 	clrl	(%a7)
 loop1:
 	clrl	-(%a7)			//clear and dec a7
 	dbra	%d0,loop1
 	moveb	VER_TMP(%a6),(%a7)
 	moveb	#BUSY_SIZE-4,1(%a7)		//write busy fmt word.
 busy_fr:
 	movel	FP_SCR1(%a6),WBTEMP_EX(%a6)	//write
 	movel	FP_SCR1+4(%a6),WBTEMP_HI(%a6)	//exceptional op to
 	movel	FP_SCR1+8(%a6),WBTEMP_LO(%a6)	//wbtemp
 	bsetb	#E3,E_BYTE(%a6)			//set E3 flag
 	bclrb	#E1,E_BYTE(%a6)			//make sure E1 is clear
 	bclrb	#UFLAG,T_BYTE(%a6)		//clr U flag
 	movel	USER_FPSR(%a6),FPSR_SHADOW(%a6)
 	orl	#sx_mask,E_BYTE(%a6)
 	movel	CMDREG1B(%a6),%d0		//fix cmd1b to make it
 	andl	#0x03c30000,%d0		//work for cmd3b
 	bfextu	CMDREG1B(%a6){#13:#1},%d1	//extract bit 2
 	lsll	#5,%d1			
 	swap	%d1
 	orl	%d1,%d0			//put it in the right place
 	bfextu	CMDREG1B(%a6){#10:#3},%d1	//extract bit 3,4,5
 	lsll	#2,%d1
 	swap	%d1
 	orl	%d1,%d0			//put them in the right place
 	movel	%d0,CMDREG3B(%a6)		//in the busy frame
 //
 // Check if the frame to be restored is busy or unimp.
 //** NOTE *** Bug fix for errata (0d43b #3)
 // If the frame is unimp, we must create a busy frame to 
 // fix the bug with the nmnexc bits in cases in which they
 // are set by a previous instruction and not cleared by
 // the save. The frame will be unimp only if the final 
 // instruction in an emulation routine caused the exception
 // by doing an fmove <ea>,fp0.  The exception operand, in
 // internal format, is in fptemp.
 //
 do_clean:
 	cmpib	#UNIMP_40_SIZE-4,1(%a7)
 	bnes	do_con
 	moveql	#13,%d0			//in orig, need to zero 14 lwords
 	bras	do_build
 do_con:
 	cmpib	#UNIMP_41_SIZE-4,1(%a7)
 	bnes	do_restore		//frame must be busy
 	moveql	#11,%d0			//in rev, need to zero 12 lwords
 do_build:
 	moveb	(%a7),VER_TMP(%a6)
 	clrl	(%a7)
 loop2:
 	clrl	-(%a7)			//clear and dec a7
 	dbra	%d0,loop2
 //
 // Use a1 as pointer into new frame.  a6 is not correct if an unimp or
 // busy frame was created as the result of an exception on the final
 // instruction of an emulation routine.
 //
 // We need to set the nmcexc bits if the exception is E1. Otherwise,
 // the exc taken will be inex2.
 //
 	leal	BUSY_SIZE+LOCAL_SIZE(%a7),%a1	//init a1 for new frame
 	moveb	VER_TMP(%a6),(%a7)	//write busy fmt word
 	moveb	#BUSY_SIZE-4,1(%a7)
 	movel	FP_SCR1(%a6),WBTEMP_EX(%a1) 	//write
 	movel	FP_SCR1+4(%a6),WBTEMP_HI(%a1)	//exceptional op to
 	movel	FP_SCR1+8(%a6),WBTEMP_LO(%a1)	//wbtemp
 //	btst.b	#E1,E_BYTE(%a1)
 //	beq.b	do_restore
 	bfextu	USER_FPSR(%a6){#17:#4},%d0	//get snan/operr/ovfl/unfl bits
 	bfins	%d0,NMCEXC(%a1){#4:#4}	//and insert them in nmcexc
 	movel	USER_FPSR(%a6),FPSR_SHADOW(%a1) //set exc bits
 	orl	#sx_mask,E_BYTE(%a1)
 do_restore:
 	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx USER_FP0(%a6),%fp0-%fp3
 	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore (%a7)+
 	tstb	RES_FLG(%a6)	//RES_FLG indicates a "continuation" frame
 	beq	cont
 	bsr	bug1384
 cont:
 	unlk	%a6
 //
 // If trace mode enabled, then go to trace handler.  This handler 
 // cannot have any fp instructions.  If there are fp inst's and an 
 // exception has been restored into the machine then the exception 
 // will occur upon execution of the fp inst.  This is not desirable 
 // in the kernel (supervisor mode).  See MC68040 manual Section 9.3.8.
 //
 finish_up:
 	btstb	#7,(%a7)		//test T1 in SR
 	bnes	g_trace
 	btstb	#6,(%a7)		//test T0 in SR
 	bnes	g_trace
 	bral	fpsp_done
 //
 // Change integer stack to look like trace stack
 // The address of the instruction that caused the
 // exception is already in the integer stack (is
 // the same as the saved friar)
 //
 // If the current frame is already a 6-word stack then all
 // that needs to be done is to change the vector# to TRACE.
 // If the frame is only a 4-word stack (meaning we got here
 // on an Unsupported data type exception), then we need to grow
 // the stack an extra 2 words and get the FPIAR from the FPU.
 //
 g_trace:
 	bftst	EXC_VEC-4(%sp){#0:#4}
 	bne	g_easy
 	subw	#4,%sp		// make room
 	movel	4(%sp),(%sp)
 	movel	8(%sp),4(%sp)
 	subw	#BUSY_SIZE,%sp
 	fsave	(%sp)
 	fmovel	%fpiar,BUSY_SIZE+EXC_EA-4(%sp)
 	frestore (%sp)
 	addw	#BUSY_SIZE,%sp
 g_easy:
 	movew	#TRACE_VEC,EXC_VEC-4(%a7)
 	bral	real_trace
 //
 //  This is a work-around for hardware bug 1384.
 //
 bug1384:
 	link	%a5,#0
 	fsave	-(%sp)
 	cmpib	#0x41,(%sp)	// check for correct frame
 	beq	frame_41
 	bgt	nofix		// if more advanced mask, do nada
 frame_40:
 	tstb	1(%sp)		// check to see if idle
 	bne	notidle
 idle40:
 	clrl	(%sp)		// get rid of old fsave frame
        movel  %d1,USER_D1(%a6)  // save d1
 	movew	#8,%d1		// place unimp frame instead
 loop40:	clrl	-(%sp)
 	dbra	%d1,loop40
        movel  USER_D1(%a6),%d1  // restore d1
 	movel	#0x40280000,-(%sp)
 	frestore (%sp)+
 	unlk  	%a5	
 	rts
 frame_41:
 	tstb	1(%sp)		// check to see if idle
 	bne	notidle	
 idle41:
 	clrl	(%sp)		// get rid of old fsave frame
        movel  %d1,USER_D1(%a6)  // save d1
 	movew	#10,%d1		// place unimp frame instead
 loop41:	clrl	-(%sp)
 	dbra	%d1,loop41
        movel  USER_D1(%a6),%d1  // restore d1
 	movel	#0x41300000,-(%sp)
 	frestore (%sp)+
 	unlk	%a5	
 	rts
 notidle:
 	bclrb	#etemp15_bit,-40(%a5) 
 	frestore (%sp)+
 	unlk	%a5	
 	rts
 nofix:
 	frestore (%sp)+
 	unlk	%a5	
 	rts
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/get_op.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/get_op.s
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/kernel_ex.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/kernel_ex.s
@@ -0,0 +1,494 @@
 //
 //	kernel_ex.sa 3.3 12/19/90 
 //
 // This file contains routines to force exception status in the 
 // fpu for exceptional cases detected or reported within the
 // transcendental functions.  Typically, the t_xx routine will
 // set the appropriate bits in the USER_FPSR word on the stack.
 // The bits are tested in gen_except.sa to determine if an exceptional
 // situation needs to be created on return from the FPSP. 
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 KERNEL_EX:    //idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section    8
 	.include "fpsp.defs"
 mns_inf:  .long 0xffff0000,0x00000000,0x00000000
 pls_inf:  .long 0x7fff0000,0x00000000,0x00000000
 nan:      .long 0x7fff0000,0xffffffff,0xffffffff
 huge:     .long 0x7ffe0000,0xffffffff,0xffffffff
 	|xref	  ovf_r_k
 	|xref	  unf_sub
 	|xref	  nrm_set
 	.global   	  t_dz
 	.global      t_dz2
 	.global      t_operr
 	.global      t_unfl
 	.global      t_ovfl
 	.global      t_ovfl2
 	.global      t_inx2
 	.global	  t_frcinx
 	.global	  t_extdnrm
 	.global	  t_resdnrm
 	.global	  dst_nan
 	.global	  src_nan
 //
 //	DZ exception
 //
 //
 //	if dz trap disabled
 //		store properly signed inf (use sign of etemp) into fp0
 //		set FPSR exception status dz bit, condition code 
 //		inf bit, and accrued dz bit
 //		return
 //		frestore the frame into the machine (done by unimp_hd)
 //
 //	else dz trap enabled
 //		set exception status bit & accrued bits in FPSR
 //		set flag to disable sto_res from corrupting fp register
 //		return
 //		frestore the frame into the machine (done by unimp_hd)
 //
 // t_dz2 is used by monadic functions such as flogn (from do_func).
 // t_dz is used by monadic functions such as satanh (from the 
 // transcendental function).
 //
 t_dz2:
 	bsetb	#neg_bit,FPSR_CC(%a6)	//set neg bit in FPSR
 	fmovel	#0,%FPSR			//clr status bits (Z set)
 	btstb	#dz_bit,FPCR_ENABLE(%a6)	//test FPCR for dz exc enabled
 	bnes	dz_ena_end
 	bras	m_inf			//flogx always returns -inf
 t_dz:
 	fmovel	#0,%FPSR			//clr status bits (Z set)
 	btstb	#dz_bit,FPCR_ENABLE(%a6)	//test FPCR for dz exc enabled
 	bnes	dz_ena
 //
 //	dz disabled
 //
 	btstb	#sign_bit,ETEMP_EX(%a6)	//check sign for neg or pos
 	beqs	p_inf			//branch if pos sign
 m_inf:
 	fmovemx mns_inf,%fp0-%fp0		//load -inf
 	bsetb	#neg_bit,FPSR_CC(%a6)	//set neg bit in FPSR
 	bras	set_fpsr
 p_inf:
 	fmovemx pls_inf,%fp0-%fp0		//load +inf
 set_fpsr:
 	orl	#dzinf_mask,USER_FPSR(%a6) //set I,DZ,ADZ
 	rts
 //
 //	dz enabled
 //
 dz_ena:
 	btstb	#sign_bit,ETEMP_EX(%a6)	//check sign for neg or pos
 	beqs	dz_ena_end
 	bsetb	#neg_bit,FPSR_CC(%a6)	//set neg bit in FPSR
 dz_ena_end:
 	orl	#dzinf_mask,USER_FPSR(%a6) //set I,DZ,ADZ
 	st	STORE_FLG(%a6)
 	rts
 //
 //	OPERR exception
 //
 //	if (operr trap disabled)
 //		set FPSR exception status operr bit, condition code 
 //		nan bit; Store default NAN into fp0
 //		frestore the frame into the machine (done by unimp_hd)
 //	
 //	else (operr trap enabled)
 //		set FPSR exception status operr bit, accrued operr bit
 //		set flag to disable sto_res from corrupting fp register
 //		frestore the frame into the machine (done by unimp_hd)
 //
 t_operr:
 	orl	#opnan_mask,USER_FPSR(%a6) //set NaN, OPERR, AIOP
 	btstb	#operr_bit,FPCR_ENABLE(%a6) //test FPCR for operr enabled
 	bnes	op_ena
 	fmovemx nan,%fp0-%fp0		//load default nan
 	rts
 op_ena:
 	st	STORE_FLG(%a6)		//do not corrupt destination
 	rts
 //
 //	t_unfl --- UNFL exception
 //
 // This entry point is used by all routines requiring unfl, inex2,
 // aunfl, and ainex to be set on exit.
 //
 // On entry, a0 points to the exceptional operand.  The final exceptional
 // operand is built in FP_SCR1 and only the sign from the original operand
 // is used.
 //
 t_unfl:
 	clrl	FP_SCR1(%a6)		//set exceptional operand to zero
 	clrl	FP_SCR1+4(%a6)
 	clrl	FP_SCR1+8(%a6)
 	tstb	(%a0)			//extract sign from caller's exop
 	bpls	unfl_signok
 	bset	#sign_bit,FP_SCR1(%a6)
 unfl_signok:
 	leal	FP_SCR1(%a6),%a0
 	orl	#unfinx_mask,USER_FPSR(%a6)
 //					;set UNFL, INEX2, AUNFL, AINEX
 unfl_con:
 	btstb	#unfl_bit,FPCR_ENABLE(%a6)
 	beqs	unfl_dis
 unfl_ena:
 	bfclr	STAG(%a6){#5:#3}		//clear wbtm66,wbtm1,wbtm0
 	bsetb	#wbtemp15_bit,WB_BYTE(%a6) //set wbtemp15
 	bsetb	#sticky_bit,STICKY(%a6)	//set sticky bit
 	bclrb	#E1,E_BYTE(%a6)
 unfl_dis:
 	bfextu	FPCR_MODE(%a6){#0:#2},%d0	//get round precision
 	bclrb	#sign_bit,LOCAL_EX(%a0)
 	sne	LOCAL_SGN(%a0)		//convert to internal ext format
 	bsr	unf_sub			//returns IEEE result at a0
 //					;and sets FPSR_CC accordingly
 	bfclr	LOCAL_SGN(%a0){#0:#8}	//convert back to IEEE ext format
 	beqs	unfl_fin
 	bsetb	#sign_bit,LOCAL_EX(%a0)
 	bsetb	#sign_bit,FP_SCR1(%a6)	//set sign bit of exc operand
 unfl_fin:
 	fmovemx (%a0),%fp0-%fp0		//store result in fp0
 	rts
 //
 //	t_ovfl2 --- OVFL exception (without inex2 returned)
 //
 // This entry is used by scale to force catastrophic overflow.  The
 // ovfl, aovfl, and ainex bits are set, but not the inex2 bit.
 //
 t_ovfl2:
 	orl	#ovfl_inx_mask,USER_FPSR(%a6)
 	movel	ETEMP(%a6),FP_SCR1(%a6)
 	movel	ETEMP_HI(%a6),FP_SCR1+4(%a6)
 	movel	ETEMP_LO(%a6),FP_SCR1+8(%a6)
 //
 // Check for single or double round precision.  If single, check if
 // the lower 40 bits of ETEMP are zero; if not, set inex2.  If double,
 // check if the lower 21 bits are zero; if not, set inex2.
 //
 	moveb	FPCR_MODE(%a6),%d0
 	andib	#0xc0,%d0
 	beq	t_work		//if extended, finish ovfl processing
 	cmpib	#0x40,%d0		//test for single
 	bnes	t_dbl
 t_sgl:
 	tstb	ETEMP_LO(%a6)
 	bnes	t_setinx2
 	movel	ETEMP_HI(%a6),%d0
 	andil	#0xff,%d0		//look at only lower 8 bits
 	bnes	t_setinx2
 	bra	t_work
 t_dbl:
 	movel	ETEMP_LO(%a6),%d0
 	andil	#0x7ff,%d0	//look at only lower 11 bits
 	beq	t_work
 t_setinx2:
 	orl	#inex2_mask,USER_FPSR(%a6)
 	bras	t_work
 //
 //	t_ovfl --- OVFL exception
 //
 //** Note: the exc operand is returned in ETEMP.
 //
 t_ovfl:
 	orl	#ovfinx_mask,USER_FPSR(%a6)
 t_work:
 	btstb	#ovfl_bit,FPCR_ENABLE(%a6) //test FPCR for ovfl enabled
 	beqs	ovf_dis
 ovf_ena:
 	clrl	FP_SCR1(%a6)		//set exceptional operand
 	clrl	FP_SCR1+4(%a6)
 	clrl	FP_SCR1+8(%a6)
 	bfclr	STAG(%a6){#5:#3}		//clear wbtm66,wbtm1,wbtm0
 	bclrb	#wbtemp15_bit,WB_BYTE(%a6) //clear wbtemp15
 	bsetb	#sticky_bit,STICKY(%a6)	//set sticky bit
 	bclrb	#E1,E_BYTE(%a6)
 //					;fall through to disabled case
 // For disabled overflow call 'ovf_r_k'.  This routine loads the
 // correct result based on the rounding precision, destination
 // format, rounding mode and sign.
 //
 ovf_dis:
 	bsr	ovf_r_k			//returns unsigned ETEMP_EX
 //					;and sets FPSR_CC accordingly.
 	bfclr	ETEMP_SGN(%a6){#0:#8}	//fix sign
 	beqs	ovf_pos
 	bsetb	#sign_bit,ETEMP_EX(%a6)
 	bsetb	#sign_bit,FP_SCR1(%a6)	//set exceptional operand sign
 ovf_pos:
 	fmovemx ETEMP(%a6),%fp0-%fp0		//move the result to fp0
 	rts
 //
 //	INEX2 exception
 //
 // The inex2 and ainex bits are set.
 //
 t_inx2:
 	orl	#inx2a_mask,USER_FPSR(%a6) //set INEX2, AINEX
 	rts
 //
 //	Force Inex2
 //
 // This routine is called by the transcendental routines to force
 // the inex2 exception bits set in the FPSR.  If the underflow bit
 // is set, but the underflow trap was not taken, the aunfl bit in
 // the FPSR must be set.
 //
 t_frcinx:
 	orl	#inx2a_mask,USER_FPSR(%a6) //set INEX2, AINEX
 	btstb	#unfl_bit,FPSR_EXCEPT(%a6) //test for unfl bit set
 	beqs	no_uacc1		//if clear, do not set aunfl
 	bsetb	#aunfl_bit,FPSR_AEXCEPT(%a6)
 no_uacc1:
 	rts
 //
 //	DST_NAN
 //
 // Determine if the destination nan is signalling or non-signalling,
 // and set the FPSR bits accordingly.  See the MC68040 User's Manual 
 // section 3.2.2.5 NOT-A-NUMBERS.
 //
 dst_nan:
 	btstb	#sign_bit,FPTEMP_EX(%a6) //test sign of nan
 	beqs	dst_pos			//if clr, it was positive
 	bsetb	#neg_bit,FPSR_CC(%a6)	//set N bit
 dst_pos:
 	btstb	#signan_bit,FPTEMP_HI(%a6) //check if signalling 
 	beqs	dst_snan		//branch if signalling
 	fmovel	%d1,%fpcr			//restore user's rmode/prec
 	fmovex FPTEMP(%a6),%fp0		//return the non-signalling nan
 //
 // Check the source nan.  If it is signalling, snan will be reported.
 //
 	moveb	STAG(%a6),%d0
 	andib	#0xe0,%d0
 	cmpib	#0x60,%d0
 	bnes	no_snan
 	btstb	#signan_bit,ETEMP_HI(%a6) //check if signalling 
 	bnes	no_snan
 	orl	#snaniop_mask,USER_FPSR(%a6) //set NAN, SNAN, AIOP
 no_snan:
 	rts	
 dst_snan:
 	btstb	#snan_bit,FPCR_ENABLE(%a6) //check if trap enabled 
 	beqs	dst_dis			//branch if disabled
 	orb	#nan_tag,DTAG(%a6)	//set up dtag for nan
 	st	STORE_FLG(%a6)		//do not store a result
 	orl	#snaniop_mask,USER_FPSR(%a6) //set NAN, SNAN, AIOP
 	rts
 dst_dis:
 	bsetb	#signan_bit,FPTEMP_HI(%a6) //set SNAN bit in sop 
 	fmovel	%d1,%fpcr			//restore user's rmode/prec
 	fmovex FPTEMP(%a6),%fp0		//load non-sign. nan 
 	orl	#snaniop_mask,USER_FPSR(%a6) //set NAN, SNAN, AIOP
 	rts
 //
 //	SRC_NAN
 //
 // Determine if the source nan is signalling or non-signalling,
 // and set the FPSR bits accordingly.  See the MC68040 User's Manual 
 // section 3.2.2.5 NOT-A-NUMBERS.
 //
 src_nan:
 	btstb	#sign_bit,ETEMP_EX(%a6) //test sign of nan
 	beqs	src_pos			//if clr, it was positive
 	bsetb	#neg_bit,FPSR_CC(%a6)	//set N bit
 src_pos:
 	btstb	#signan_bit,ETEMP_HI(%a6) //check if signalling 
 	beqs	src_snan		//branch if signalling
 	fmovel	%d1,%fpcr			//restore user's rmode/prec
 	fmovex ETEMP(%a6),%fp0		//return the non-signalling nan
 	rts	
 src_snan:
 	btstb	#snan_bit,FPCR_ENABLE(%a6) //check if trap enabled 
 	beqs	src_dis			//branch if disabled
 	bsetb	#signan_bit,ETEMP_HI(%a6) //set SNAN bit in sop 
 	orb	#norm_tag,DTAG(%a6)	//set up dtag for norm
 	orb	#nan_tag,STAG(%a6)	//set up stag for nan
 	st	STORE_FLG(%a6)		//do not store a result
 	orl	#snaniop_mask,USER_FPSR(%a6) //set NAN, SNAN, AIOP
 	rts
 src_dis:
 	bsetb	#signan_bit,ETEMP_HI(%a6) //set SNAN bit in sop 
 	fmovel	%d1,%fpcr			//restore user's rmode/prec
 	fmovex ETEMP(%a6),%fp0		//load non-sign. nan 
 	orl	#snaniop_mask,USER_FPSR(%a6) //set NAN, SNAN, AIOP
 	rts
 //
 // For all functions that have a denormalized input and that f(x)=x,
 // this is the entry point
 //
 t_extdnrm:
 	orl	#unfinx_mask,USER_FPSR(%a6)
 //					;set UNFL, INEX2, AUNFL, AINEX
 	bras	xdnrm_con
 //
 // Entry point for scale with extended denorm.  The function does
 // not set inex2, aunfl, or ainex.  
 //
 t_resdnrm:
 	orl	#unfl_mask,USER_FPSR(%a6)
 xdnrm_con:
 	btstb	#unfl_bit,FPCR_ENABLE(%a6)
 	beqs	xdnrm_dis
 //
 // If exceptions are enabled, the additional task of setting up WBTEMP
 // is needed so that when the underflow exception handler is entered,
 // the user perceives no difference between what the 040 provides vs.
 // what the FPSP provides.
 //
 xdnrm_ena:
 	movel	%a0,-(%a7)
 	movel	LOCAL_EX(%a0),FP_SCR1(%a6)
 	movel	LOCAL_HI(%a0),FP_SCR1+4(%a6)
 	movel	LOCAL_LO(%a0),FP_SCR1+8(%a6)
 	lea	FP_SCR1(%a6),%a0
 	bclrb	#sign_bit,LOCAL_EX(%a0)
 	sne	LOCAL_SGN(%a0)		//convert to internal ext format
 	tstw	LOCAL_EX(%a0)		//check if input is denorm
 	beqs	xdnrm_dn		//if so, skip nrm_set
 	bsr	nrm_set			//normalize the result (exponent
 //					;will be negative
 xdnrm_dn:
 	bclrb	#sign_bit,LOCAL_EX(%a0)	//take off false sign
 	bfclr	LOCAL_SGN(%a0){#0:#8}	//change back to IEEE ext format
 	beqs	xdep
 	bsetb	#sign_bit,LOCAL_EX(%a0)
 xdep:	
 	bfclr	STAG(%a6){#5:#3}		//clear wbtm66,wbtm1,wbtm0
 	bsetb	#wbtemp15_bit,WB_BYTE(%a6) //set wbtemp15
 	bclrb	#sticky_bit,STICKY(%a6)	//clear sticky bit
 	bclrb	#E1,E_BYTE(%a6)
 	movel	(%a7)+,%a0
 xdnrm_dis:
 	bfextu	FPCR_MODE(%a6){#0:#2},%d0	//get round precision
 	bnes	not_ext			//if not round extended, store
 //					;IEEE defaults
 is_ext:
 	btstb	#sign_bit,LOCAL_EX(%a0)
 	beqs	xdnrm_store
 	bsetb	#neg_bit,FPSR_CC(%a6)	//set N bit in FPSR_CC
 	bras	xdnrm_store
 not_ext:
 	bclrb	#sign_bit,LOCAL_EX(%a0)
 	sne	LOCAL_SGN(%a0)		//convert to internal ext format
 	bsr	unf_sub			//returns IEEE result pointed by
 //					;a0; sets FPSR_CC accordingly
 	bfclr	LOCAL_SGN(%a0){#0:#8}	//convert back to IEEE ext format
 	beqs	xdnrm_store
 	bsetb	#sign_bit,LOCAL_EX(%a0)
 xdnrm_store:
 	fmovemx (%a0),%fp0-%fp0		//store result in fp0
 	rts
 //
 // This subroutine is used for dyadic operations that use an extended
 // denorm within the kernel. The approach used is to capture the frame,
 // fix/restore.
 //
 	.global	t_avoid_unsupp
 t_avoid_unsupp:
 	link	%a2,#-LOCAL_SIZE		//so that a2 fpsp.h negative 
 //					;offsets may be used
 	fsave	-(%a7)
 	tstb	1(%a7)			//check if idle, exit if so
 	beq	idle_end
 	btstb	#E1,E_BYTE(%a2)		//check for an E1 exception if
 //					;enabled, there is an unsupp
 	beq	end_avun		//else, exit
 	btstb	#7,DTAG(%a2)		//check for denorm destination
 	beqs	src_den			//else, must be a source denorm
 //
 // handle destination denorm
 //
 	lea	FPTEMP(%a2),%a0
 	btstb	#sign_bit,LOCAL_EX(%a0)
 	sne	LOCAL_SGN(%a0)		//convert to internal ext format
 	bclrb	#7,DTAG(%a2)		//set DTAG to norm
 	bsr	nrm_set			//normalize result, exponent
 //					;will become negative
 	bclrb	#sign_bit,LOCAL_EX(%a0)	//get rid of fake sign
 	bfclr	LOCAL_SGN(%a0){#0:#8}	//convert back to IEEE ext format
 	beqs	ck_src_den		//check if source is also denorm
 	bsetb	#sign_bit,LOCAL_EX(%a0)
 ck_src_den:
 	btstb	#7,STAG(%a2)
 	beqs	end_avun
 src_den:
 	lea	ETEMP(%a2),%a0
 	btstb	#sign_bit,LOCAL_EX(%a0)
 	sne	LOCAL_SGN(%a0)		//convert to internal ext format
 	bclrb	#7,STAG(%a2)		//set STAG to norm
 	bsr	nrm_set			//normalize result, exponent
 //					;will become negative
 	bclrb	#sign_bit,LOCAL_EX(%a0)	//get rid of fake sign
 	bfclr	LOCAL_SGN(%a0){#0:#8}	//convert back to IEEE ext format
 	beqs	den_com
 	bsetb	#sign_bit,LOCAL_EX(%a0)
 den_com:
 	moveb	#0xfe,CU_SAVEPC(%a2)	//set continue frame
 	clrw	NMNEXC(%a2)		//clear NMNEXC
 	bclrb	#E1,E_BYTE(%a2)
 //	fmove.l	%FPSR,FPSR_SHADOW(%a2)
 //	bset.b	#SFLAG,E_BYTE(%a2)
 //	bset.b	#XFLAG,T_BYTE(%a2)
 end_avun:
 	frestore (%a7)+
 	unlk	%a2
 	rts
 idle_end:
 	addl	#4,%a7
 	unlk	%a2
 	rts
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/res_func.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/res_func.s
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/round.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/round.s
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/rtems_fpsp.c
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/rtems_fpsp.c
@@ -0,0 +1,77 @@
 #include <rtems/system.h>
 /*
 #include <rtems/score/isr.h>
 */
 /*
 * User exception handlers
 */
 proc_ptr M68040FPSPUserExceptionHandlers[9];
 /*
 * Intercept requests to install an exception handler.
 * FPSP exceptions get special treatment.
 */
 static int
 FPSP_install_raw_handler (unsigned32 vector, proc_ptr new_handler, proc_ptr *old_handler)
 {
  int fpspVector;
  switch (vector) {
  default:	return 0;	/* Non-FPSP vector */
  case 11:	fpspVector = 0;	break;	/* F-line */
  case 48:	fpspVector = 1;	break;	/* BSUN */
  case 49:	fpspVector = 2;	break;	/* INEXACT */
  case 50:	fpspVector = 3;	break;	/* DIVIDE-BY-ZERO */
  case 51:	fpspVector = 4;	break;	/* UNDERFLOW */
  case 52:	fpspVector = 5;	break;	/* OPERAND ERROR */
  case 53:	fpspVector = 6;	break;	/* OVERFLOW */
  case 54:	fpspVector = 7;	break;	/* SIGNALLING NAN */
  case 55:	fpspVector = 8;	break;	/* UNIMPLEMENTED DATA TYPE */
  }
  *old_handler = M68040FPSPUserExceptionHandlers[fpspVector];
  M68040FPSPUserExceptionHandlers[fpspVector] = new_handler;
  return 1;
 }
 /*
 * Attach floating point exception vectors to M68040FPSP entry points
 *
 *  NOTE: Uses M68K rather than M68040 in the name so all CPUs having
 *        an FPSP can share the same code in RTEMS proper.
 */
 void
 M68KFPSPInstallExceptionHandlers (void)
 {
  extern void _fpspEntry_fline();
  extern void _fpspEntry_bsun();
  extern void _fpspEntry_inex();
  extern void _fpspEntry_dz();
  extern void _fpspEntry_unfl();
  extern void _fpspEntry_ovfl();
  extern void _fpspEntry_operr();
  extern void _fpspEntry_snan();
  extern void _fpspEntry_unsupp();
  static struct {
    int  vector_number;
    void  (*handler)();
  } fpspHandlers[] = {
    { 11,  _fpspEntry_fline },
    { 48,  _fpspEntry_bsun },
    { 49,  _fpspEntry_inex },
    { 50,  _fpspEntry_dz },
    { 51,  _fpspEntry_unfl },
    { 52,  _fpspEntry_operr },
    { 53,  _fpspEntry_ovfl },
    { 54,  _fpspEntry_snan },
    { 55,  _fpspEntry_unsupp },
  };
  int i;
  proc_ptr oldHandler;
  for (i = 0 ; i < sizeof fpspHandlers / sizeof fpspHandlers[0] ; i++) {
    _CPU_ISR_install_raw_handler(fpspHandlers[i].vector_number, fpspHandlers[i].handler, &oldHandler);
      M68040FPSPUserExceptionHandlers[i] = oldHandler;
  }
  _FPSP_install_raw_handler = FPSP_install_raw_handler;
 }
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/rtems_skel.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/rtems_skel.s
@@ -0,0 +1,394 @@
 //
 //	skeleton.sa 3.2 4/26/91
 //
 //	This file contains code that is system dependent and will
 //	need to be modified to install the FPSP.
 //
 //	Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'.
 //	Put any target system specific handling that must be done immediately
 //	before the jump instruction.  If there no handling necessary, then
 //	the 'fpsp_xxxx' handler entry point should be placed in the exception
 //	table so that the 'jmp' can be eliminated. If the FPSP determines that the
 //	exception is one that must be reported then there will be a
 //	return from the package by a 'jmp real_xxxx'.  At that point
 //	the machine state will be identical to the state before
 //	the FPSP was entered.  In particular, whatever condition
 //	that caused the exception will still be pending when the FPSP
 //	package returns.  Thus, there will be system specific code
 //	to handle the exception.
 //
 //	If the exception was completely handled by the package, then
 //	the return will be via a 'jmp fpsp_done'.  Unless there is 
 //	OS specific work to be done (such as handling a context switch or
 //	interrupt) the user program can be resumed via 'rte'.
 //
 //	In the following skeleton code, some typical 'real_xxxx' handling
 //	code is shown.  This code may need to be moved to an appropriate
 //	place in the target system, or rewritten.
 //	
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //
 //	Modified for Linux-1.3.x by Jes Sorensen (jds@kom.auc.dk)
 //
 #include <asm.h>
 //SKELETON	idnt    2,1 | Motorola 040 Floating Point Software Package
 	.include "fpsp.defs"
 //
 //	Divide by Zero exception
 //
 //	All dz exceptions are 'real', hence no fpsp_dz entry point.
 //
 	.global	SYM(_fpspEntry_dz)
 SYM(_fpspEntry_dz):
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%sp)
 	bclrb		#E1,E_BYTE(%a6)
 	frestore	(%sp)+
 	unlk		%a6
 	jmp		([SYM(M68040FPSPUserExceptionHandlers)+3*4],za0)
 //
 //	Inexact exception
 //
 //	All inexact exceptions are real, but the 'real' handler
 //	will probably want to clear the pending exception.
 //	The provided code will clear the E3 exception (if pending), 
 //	otherwise clear the E1 exception.  The frestore is not really
 //	necessary for E1 exceptions.
 //
 // Code following the 'inex' label is to handle bug #1232.  In this
 // bug, if an E1 snan, ovfl, or unfl occurred, and the process was
 // swapped out before taking the exception, the exception taken on
 // return was inex, rather than the correct exception.  The snan, ovfl,
 // and unfl exception to be taken must not have been enabled.  The
 // fix is to check for E1, and the existence of one of snan, ovfl,
 // or unfl bits set in the fpsr.  If any of these are set, branch
 // to the appropriate  handler for the exception in the fpsr.  Note
 // that this fix is only for d43b parts, and is skipped if the
 // version number is not $40.
 // 
 //
 	.global	SYM(_fpspEntry_inex)
 	.global	real_inex
 SYM(_fpspEntry_inex):
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%sp)
 	cmpib		#VER_40,(%sp)		//test version number
 	bnes		not_fmt40
 	fmovel		%fpsr,-(%sp)
 	btstb		#E1,E_BYTE(%a6)		//test for E1 set
 	beqs		not_b1232
 	btstb		#snan_bit,2(%sp) //test for snan
 	beq		inex_ckofl
 	addl		#4,%sp
 	frestore	(%sp)+
 	unlk		%a6
 	bra		snan
 inex_ckofl:
 	btstb		#ovfl_bit,2(%sp) //test for ovfl
 	beq		inex_ckufl 
 	addl		#4,%sp
 	frestore	(%sp)+
 	unlk		%a6
 	bra		SYM(_fpspEntry_ovfl)
 inex_ckufl:
 	btstb		#unfl_bit,2(%sp) //test for unfl
 	beq		not_b1232
 	addl		#4,%sp
 	frestore	(%sp)+
 	unlk		%a6
 	bra		SYM(_fpspEntry_unfl)
 //
 // We do not have the bug 1232 case.  Clean up the stack and call
 // real_inex.
 //
 not_b1232:
 	addl		#4,%sp
 	frestore	(%sp)+
 	unlk		%a6
 real_inex:
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%sp)
 not_fmt40:
 	bclrb		#E3,E_BYTE(%a6)		//clear and test E3 flag
 	beqs		inex_cke1
 //
 // Clear dirty bit on dest resister in the frame before branching
 // to b1238_fix.
 //
 	moveml		%d0/%d1,USER_DA(%a6)
 	bfextu		CMDREG1B(%a6){#6:#3},%d0		//get dest reg no
 	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
 	bsrl		b1238_fix		//test for bug1238 case
 	moveml		USER_DA(%a6),%d0/%d1
 	bras		inex_done
 inex_cke1:
 	bclrb		#E1,E_BYTE(%a6)
 inex_done:
 	frestore	(%sp)+
 	unlk		%a6
 	jmp	([SYM(M68040FPSPUserExceptionHandlers)+2*4],za0)
 //
 //	Overflow exception
 //
 	.global	SYM(_fpspEntry_ovfl)
 	.global	real_ovfl
 SYM(_fpspEntry_ovfl):
 	jmp	fpsp_ovfl
 real_ovfl:
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%sp)
 	bclrb		#E3,E_BYTE(%a6)		//clear and test E3 flag
 	bnes		ovfl_done
 	bclrb		#E1,E_BYTE(%a6)
 ovfl_done:
 	frestore	(%sp)+
 	unlk		%a6
 	jmp	([SYM(M68040FPSPUserExceptionHandlers)+6*4],za0)
 //
 //	Underflow exception
 //
 	.global	SYM(_fpspEntry_unfl)
 	.global	real_unfl
 SYM(_fpspEntry_unfl):
 	jmp	fpsp_unfl
 real_unfl:
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%sp)
 	bclrb		#E3,E_BYTE(%a6)		//clear and test E3 flag
 	bnes		unfl_done
 	bclrb		#E1,E_BYTE(%a6)
 unfl_done:
 	frestore	(%sp)+
 	unlk		%a6
 	jmp	([SYM(M68040FPSPUserExceptionHandlers)+4*4],za0)
 //
 //	Signalling NAN exception
 //
 	.global	SYM(_fpspEntry_snan)
 	.global	real_snan
 SYM(_fpspEntry_snan):
 snan:
 	jmp	fpsp_snan
 real_snan:
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%sp)
 	bclrb		#E1,E_BYTE(%a6)	//snan is always an E1 exception
 	frestore	(%sp)+
 	unlk		%a6
 	jmp	([SYM(M68040FPSPUserExceptionHandlers)+7*4],za0)
 //
 //	Operand Error exception
 //
 	.global	SYM(_fpspEntry_operr)
 	.global	real_operr
 SYM(_fpspEntry_operr):
 	jmp	fpsp_operr
 real_operr:
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%sp)
 	bclrb		#E1,E_BYTE(%a6)	//operr is always an E1 exception
 	frestore	(%sp)+
 	unlk		%a6
 	jmp	([SYM(M68040FPSPUserExceptionHandlers)+5*4],za0)
 //
 //	BSUN exception
 //
 //	This sample handler simply clears the nan bit in the FPSR.
 //
 	.global	SYM(_fpspEntry_bsun)
 	.global	real_bsun
 SYM(_fpspEntry_bsun):
 	jmp	fpsp_bsun
 real_bsun:
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%sp)
 	bclrb		#E1,E_BYTE(%a6)	//bsun is always an E1 exception
 	fmovel		%FPSR,-(%sp)
 	bclrb		#nan_bit,(%sp)
 	fmovel		(%sp)+,%FPSR
 	frestore	(%sp)+
 	unlk		%a6
 	jmp	([SYM(M68040FPSPUserExceptionHandlers)+1*4],za0)
 //
 //	F-line exception
 //
 //	A 'real' F-line exception is one that the FPSP is not supposed to 
 //	handle. E.g. an instruction with a co-processor ID that is not 1.
 //
 	.global	SYM(_fpspEntry_fline)
 	.global	real_fline
 SYM(_fpspEntry_fline):
 	jmp	fpsp_fline
 real_fline:
 	jmp	([SYM(M68040FPSPUserExceptionHandlers)+0*4],za0)
 //
 //	Unsupported data type exception
 //
 	.global	SYM(_fpspEntry_unsupp)
 	.global	real_unsupp
 SYM(_fpspEntry_unsupp):
 	jmp	fpsp_unsupp
 real_unsupp:
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%sp)
 	bclrb		#E1,E_BYTE(%a6)	//unsupp is always an E1 exception
 	frestore	(%sp)+
 	unlk		%a6
 	jmp	([SYM(M68040FPSPUserExceptionHandlers)+8*4],za0)
 //
 //	Trace exception
 //
 	.global	real_trace
 real_trace:
 	trap	#10
 //
 //	fpsp_fmt_error --- exit point for frame format error
 //
 //	The fpu stack frame does not match the frames existing
 //	or planned at the time of this writing.  The fpsp is
 //	unable to handle frame sizes not in the following
 //	version:size pairs:
 //
 //	{4060, 4160} - busy frame
 //	{4028, 4130} - unimp frame
 //	{4000, 4100} - idle frame
 //
 	.global	fpsp_fmt_error
 fpsp_fmt_error:
 	trap	#11
 //
 //	fpsp_done --- FPSP exit point
 //
 //	The exception has been handled by the package and we are ready
 //	to return to user mode, but there may be OS specific code
 //	to execute before we do.  If there is, do it now.
 //
 // For now, the RTEMS does not bother looking at the
 // possibility that it is time to reschedule....
 //
 	.global	fpsp_done
 fpsp_done:
 	rte
 //
 //	mem_write --- write to user or supervisor address space
 //
 // Writes to memory while in supervisor mode.
 //
 //	a0 - supervisor source address
 //	a1 - user/supervisor destination address
 //	d0 - number of bytes to write (maximum count is 12)
 //
 	.global	mem_write
 mem_write:
 	btstb	#5,EXC_SR(%a6)	//check for supervisor state
 	beqs	user_write
 super_write:
 	moveb	(%a0)+,(%a1)+
 	subql	#1,%d0
 	bnes	super_write
 	rts
 user_write:
 	movel	%d1,-(%sp)	//preserve d1 just in case
 	movel	%d0,-(%sp)
 	movel	%a1,-(%sp)
 	movel	%a0,-(%sp)
 	jsr		copyout
 	addw	#12,%sp
 	movel	(%sp)+,%d1
 	rts
 //
 //	mem_read --- read from user or supervisor address space
 //
 // Reads from memory while in supervisor mode.
 //
 // The FPSP calls mem_read to read the original F-line instruction in order
 // to extract the data register number when the 'Dn' addressing mode is
 // used.
 //
 //Input:
 //	a0 - user/supervisor source address
 //	a1 - supervisor destination address
 //	d0 - number of bytes to read (maximum count is 12)
 //
 // Like mem_write, mem_read always reads with a supervisor 
 // destination address on the supervisor stack.  Also like mem_write,
 // the EXC_SR is checked and a simple memory copy is done if reading
 // from supervisor space is indicated.
 //
 	.global	mem_read
 mem_read:
 	btstb	#5,EXC_SR(%a6)	//check for supervisor state
 	beqs	user_read
 super_read:
 	moveb	(%a0)+,(%a1)+
 	subql	#1,%d0
 	bnes	super_read
 	rts
 user_read:
 	movel	%d1,-(%sp)	//preserve d1 just in case
 	movel	%d0,-(%sp)
 	movel	%a1,-(%sp)
 	movel	%a0,-(%sp)
 	jsr		copyin
 	addw	#12,%sp
 	movel	(%sp)+,%d1
 	rts
 //
 // Use these routines if your kernel does not have copyout/copyin equivalents.
 // Assumes that D0/D1/A0/A1 are scratch registers. copyout overwrites DFC,
 // and copyin overwrites SFC.
 //
 copyout:
 	movel	4(%sp),%a0	// source
 	movel	8(%sp),%a1	// destination
 	movel	12(%sp),%d0	// count
 	subl	#1,%d0		// dec count by 1 for dbra
 	movel	#1,%d1
 	movec	%d1,%DFC		// set dfc for user data space
 moreout:
 	moveb	(%a0)+,%d1	// fetch supervisor byte
 	movesb	%d1,(%a1)+	// write user byte
 	dbf	%d0,moreout
 	rts
 copyin:
 	movel	4(%sp),%a0	// source
 	movel	8(%sp),%a1	// destination
 	movel	12(%sp),%d0	// count
 	subl	#1,%d0		// dec count by 1 for dbra
 	movel	#1,%d1
 	movec	%d1,%SFC		// set sfc for user space
 morein:
 	movesb	(%a0)+,%d1	// fetch user byte
 	moveb	%d1,(%a1)+	// write supervisor byte
 	dbf	%d0,morein
 	rts
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/sacos.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/sacos.s
@@ -0,0 +1,115 @@
 //
 //	sacos.sa 3.3 12/19/90
 //
 //	Description: The entry point sAcos computes the inverse cosine of
 //		an input argument; sAcosd does the same except for denormalized
 //		input.
 //
 //	Input: Double-extended number X in location pointed to
 //		by address register a0.
 //
 //	Output: The value arccos(X) returned in floating-point register Fp0.
 //
 //	Accuracy and Monotonicity: The returned result is within 3 ulps in
 //		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
 //		result is subsequently rounded to double precision. The 
 //		result is provably monotonic in double precision.
 //
 //	Speed: The program sCOS takes approximately 310 cycles.
 //
 //	Algorithm:
 //
 //	ACOS
 //	1. If |X| >= 1, go to 3.
 //
 //	2. (|X| < 1) Calculate acos(X) by
 //		z := (1-X) / (1+X)
 //		acos(X) = 2 * atan( sqrt(z) ).
 //		Exit.
 //
 //	3. If |X| > 1, go to 5.
 //
 //	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.
 //
 //	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
 //		Exit.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //SACOS	idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 PI:	.long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
 PIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
 	|xref	t_operr
 	|xref	t_frcinx
 	|xref	satan
 	.global	sacosd
 sacosd:
 //--ACOS(X) = PI/2 FOR DENORMALIZED X
 	fmovel		%d1,%fpcr		// ...load user's rounding mode/precision
 	fmovex		PIBY2,%fp0
 	bra		t_frcinx
 	.global	sacos
 sacos:
 	fmovex		(%a0),%fp0	// ...LOAD INPUT
 	movel		(%a0),%d0		// ...pack exponent with upper 16 fraction
 	movew		4(%a0),%d0
 	andil		#0x7FFFFFFF,%d0
 	cmpil		#0x3FFF8000,%d0
 	bges		ACOSBIG
 //--THIS IS THE USUAL CASE, |X| < 1
 //--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) )	)
 	fmoves		#0x3F800000,%fp1
 	faddx		%fp0,%fp1	 	// ...1+X
 	fnegx		%fp0	 	// ... -X
 	fadds		#0x3F800000,%fp0	// ...1-X
 	fdivx		%fp1,%fp0	 	// ...(1-X)/(1+X)
 	fsqrtx		%fp0		// ...SQRT((1-X)/(1+X))
 	fmovemx	%fp0-%fp0,(%a0)	// ...overwrite input
 	movel		%d1,-(%sp)	//save original users fpcr
 	clrl		%d1
 	bsr		satan		// ...ATAN(SQRT([1-X]/[1+X]))
 	fmovel		(%sp)+,%fpcr	//restore users exceptions
 	faddx		%fp0,%fp0	 	// ...2 * ATAN( STUFF )
 	bra		t_frcinx
 ACOSBIG:
 	fabsx		%fp0
 	fcmps		#0x3F800000,%fp0
 	fbgt		t_operr		//cause an operr exception
 //--|X| = 1, ACOS(X) = 0 OR PI
 	movel		(%a0),%d0		// ...pack exponent with upper 16 fraction
 	movew		4(%a0),%d0
 	cmpl		#0,%d0		//D0 has original exponent+fraction
 	bgts		ACOSP1
 //--X = -1
 //Returns PI and inexact exception
 	fmovex		PI,%fp0
 	fmovel		%d1,%FPCR
 	fadds		#0x00800000,%fp0	//cause an inexact exception to be put
 //					;into the 040 - will not trap until next
 //					;fp inst.
 	bra		t_frcinx
 ACOSP1:
 	fmovel		%d1,%FPCR
 	fmoves		#0x00000000,%fp0
 	rts				//Facos ; of +1 is exact	
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/sasin.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/sasin.s
@@ -0,0 +1,104 @@
 //
 //	sasin.sa 3.3 12/19/90
 //
 //	Description: The entry point sAsin computes the inverse sine of
 //		an input argument; sAsind does the same except for denormalized
 //		input.
 //
 //	Input: Double-extended number X in location pointed to
 //		by address register a0.
 //
 //	Output: The value arcsin(X) returned in floating-point register Fp0.
 //
 //	Accuracy and Monotonicity: The returned result is within 3 ulps in
 //		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
 //		result is subsequently rounded to double precision. The 
 //		result is provably monotonic in double precision.
 //
 //	Speed: The program sASIN takes approximately 310 cycles.
 //
 //	Algorithm:
 //
 //	ASIN
 //	1. If |X| >= 1, go to 3.
 //
 //	2. (|X| < 1) Calculate asin(X) by
 //		z := sqrt( [1-X][1+X] )
 //		asin(X) = atan( x / z ).
 //		Exit.
 //
 //	3. If |X| > 1, go to 5.
 //
 //	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.
 //
 //	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
 //		Exit.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //SASIN	idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 PIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
 	|xref	t_operr
 	|xref	t_frcinx
 	|xref	t_extdnrm
 	|xref	satan
 	.global	sasind
 sasind:
 //--ASIN(X) = X FOR DENORMALIZED X
 	bra		t_extdnrm
 	.global	sasin
 sasin:
 	fmovex		(%a0),%fp0	// ...LOAD INPUT
 	movel		(%a0),%d0
 	movew		4(%a0),%d0
 	andil		#0x7FFFFFFF,%d0
 	cmpil		#0x3FFF8000,%d0
 	bges		asinbig
 //--THIS IS THE USUAL CASE, |X| < 1
 //--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
 	fmoves		#0x3F800000,%fp1
 	fsubx		%fp0,%fp1		// ...1-X
 	fmovemx	%fp2-%fp2,-(%a7)
 	fmoves		#0x3F800000,%fp2
 	faddx		%fp0,%fp2		// ...1+X
 	fmulx		%fp2,%fp1		// ...(1+X)(1-X)
 	fmovemx	(%a7)+,%fp2-%fp2
 	fsqrtx		%fp1		// ...SQRT([1-X][1+X])
 	fdivx		%fp1,%fp0	 	// ...X/SQRT([1-X][1+X])
 	fmovemx	%fp0-%fp0,(%a0)
 	bsr		satan
 	bra		t_frcinx
 asinbig:
 	fabsx		%fp0	 // ...|X|
 	fcmps		#0x3F800000,%fp0
 	fbgt		t_operr		//cause an operr exception
 //--|X| = 1, ASIN(X) = +- PI/2.
 	fmovex		PIBY2,%fp0
 	movel		(%a0),%d0
 	andil		#0x80000000,%d0	// ...SIGN BIT OF X
 	oril		#0x3F800000,%d0	// ...+-1 IN SGL FORMAT
 	movel		%d0,-(%sp)	// ...push SIGN(X) IN SGL-FMT
 	fmovel		%d1,%FPCR		
 	fmuls		(%sp)+,%fp0
 	bra		t_frcinx
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/satan.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/satan.s
@@ -0,0 +1,478 @@
 //
 //	satan.sa 3.3 12/19/90
 //
 //	The entry point satan computes the arctangent of an
 //	input value. satand does the same except the input value is a
 //	denormalized number.
 //
 //	Input: Double-extended value in memory location pointed to by address
 //		register a0.
 //
 //	Output:	Arctan(X) returned in floating-point register Fp0.
 //
 //	Accuracy and Monotonicity: The returned result is within 2 ulps in
 //		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
 //		result is subsequently rounded to double precision. The
 //		result is provably monotonic in double precision.
 //
 //	Speed: The program satan takes approximately 160 cycles for input
 //		argument X such that 1/16 < |X| < 16. For the other arguments,
 //		the program will run no worse than 10% slower.
 //
 //	Algorithm:
 //	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
 //
 //	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
 //		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
 //		of X with a bit-1 attached at the 6-th bit position. Define u
 //		to be u = (X-F) / (1 + X*F).
 //
 //	Step 3. Approximate arctan(u) by a polynomial poly.
 //
 //	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
 //		calculated beforehand. Exit.
 //
 //	Step 5. If |X| >= 16, go to Step 7.
 //
 //	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
 //
 //	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
 //		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //satan	idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 BOUNDS1:	.long 0x3FFB8000,0x4002FFFF
 ONE:	.long 0x3F800000
 	.long 0x00000000
 ATANA3:	.long 0xBFF6687E,0x314987D8
 ATANA2:	.long 0x4002AC69,0x34A26DB3
 ATANA1:	.long 0xBFC2476F,0x4E1DA28E
 ATANB6:	.long 0x3FB34444,0x7F876989
 ATANB5:	.long 0xBFB744EE,0x7FAF45DB
 ATANB4:	.long 0x3FBC71C6,0x46940220
 ATANB3:	.long 0xBFC24924,0x921872F9
 ATANB2:	.long 0x3FC99999,0x99998FA9
 ATANB1:	.long 0xBFD55555,0x55555555
 ATANC5:	.long 0xBFB70BF3,0x98539E6A
 ATANC4:	.long 0x3FBC7187,0x962D1D7D
 ATANC3:	.long 0xBFC24924,0x827107B8
 ATANC2:	.long 0x3FC99999,0x9996263E
 ATANC1:	.long 0xBFD55555,0x55555536
 PPIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
 NPIBY2:	.long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
 PTINY:	.long 0x00010000,0x80000000,0x00000000,0x00000000
 NTINY:	.long 0x80010000,0x80000000,0x00000000,0x00000000
 ATANTBL:
 	.long	0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
 	.long	0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
 	.long	0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
 	.long	0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
 	.long	0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
 	.long	0x3FFB0000,0xAB98E943,0x62765619,0x00000000
 	.long	0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
 	.long	0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
 	.long	0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
 	.long	0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
 	.long	0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
 	.long	0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
 	.long	0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
 	.long	0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
 	.long	0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
 	.long	0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
 	.long	0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
 	.long	0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
 	.long	0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
 	.long	0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
 	.long	0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
 	.long	0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
 	.long	0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
 	.long	0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
 	.long	0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
 	.long	0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
 	.long	0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
 	.long	0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
 	.long	0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
 	.long	0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
 	.long	0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
 	.long	0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
 	.long	0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
 	.long	0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
 	.long	0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
 	.long	0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
 	.long	0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
 	.long	0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
 	.long	0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
 	.long	0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
 	.long	0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
 	.long	0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
 	.long	0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
 	.long	0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
 	.long	0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
 	.long	0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
 	.long	0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
 	.long	0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
 	.long	0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
 	.long	0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
 	.long	0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
 	.long	0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
 	.long	0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
 	.long	0x3FFE0000,0x97731420,0x365E538C,0x00000000
 	.long	0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
 	.long	0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
 	.long	0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
 	.long	0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
 	.long	0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
 	.long	0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
 	.long	0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
 	.long	0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
 	.long	0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
 	.long	0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
 	.long	0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
 	.long	0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
 	.long	0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
 	.long	0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
 	.long	0x3FFE0000,0xE8771129,0xC4353259,0x00000000
 	.long	0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
 	.long	0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
 	.long	0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
 	.long	0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
 	.long	0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
 	.long	0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
 	.long	0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
 	.long	0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
 	.long	0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
 	.long	0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
 	.long	0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
 	.long	0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
 	.long	0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
 	.long	0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
 	.long	0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
 	.long	0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
 	.long	0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
 	.long	0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
 	.long	0x3FFF0000,0x9F100575,0x006CC571,0x00000000
 	.long	0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
 	.long	0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
 	.long	0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
 	.long	0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
 	.long	0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
 	.long	0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
 	.long	0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
 	.long	0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
 	.long	0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
 	.long	0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
 	.long	0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
 	.long	0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
 	.long	0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
 	.long	0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
 	.long	0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
 	.long	0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
 	.long	0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
 	.long	0x3FFF0000,0xB525529D,0x562246BD,0x00000000
 	.long	0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
 	.long	0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
 	.long	0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
 	.long	0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
 	.long	0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
 	.long	0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
 	.long	0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
 	.long	0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
 	.long	0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
 	.long	0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
 	.long	0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
 	.long	0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
 	.long	0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
 	.long	0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
 	.long	0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
 	.long	0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
 	.long	0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
 	.long	0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
 	.long	0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
 	.long	0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
 	.long	0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
 	.long	0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
 	.set	X,FP_SCR1
 	.set	XDCARE,X+2
 	.set	XFRAC,X+4
 	.set	XFRACLO,X+8
 	.set	ATANF,FP_SCR2
 	.set	ATANFHI,ATANF+4
 	.set	ATANFLO,ATANF+8
 	| xref	t_frcinx
 	|xref	t_extdnrm
 	.global	satand
 satand:
 //--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
 	bra		t_extdnrm
 	.global	satan
 satan:
 //--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
 	fmovex		(%a0),%fp0	// ...LOAD INPUT
 	movel		(%a0),%d0
 	movew		4(%a0),%d0
 	fmovex		%fp0,X(%a6)
 	andil		#0x7FFFFFFF,%d0
 	cmpil		#0x3FFB8000,%d0		// ...|X| >= 1/16?
 	bges		ATANOK1
 	bra		ATANSM
 ATANOK1:
 	cmpil		#0x4002FFFF,%d0		// ...|X| < 16 ?
 	bles		ATANMAIN
 	bra		ATANBIG
 //--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
 //--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
 //--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
 //--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
 //--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
 //--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
 //--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
 //--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
 //--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
 //--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
 //--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
 //--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
 //--WILL INVOLVE A VERY LONG POLYNOMIAL.
 //--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
 //--WE CHOSE F TO BE +-2^K * 1.BBBB1
 //--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
 //--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
 //--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
 //-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
 ATANMAIN:
 	movew		#0x0000,XDCARE(%a6)	// ...CLEAN UP X JUST IN CASE
 	andil		#0xF8000000,XFRAC(%a6)	// ...FIRST 5 BITS
 	oril		#0x04000000,XFRAC(%a6)	// ...SET 6-TH BIT TO 1
 	movel		#0x00000000,XFRACLO(%a6)	// ...LOCATION OF X IS NOW F
 	fmovex		%fp0,%fp1			// ...FP1 IS X
 	fmulx		X(%a6),%fp1		// ...FP1 IS X*F, NOTE THAT X*F > 0
 	fsubx		X(%a6),%fp0		// ...FP0 IS X-F
 	fadds		#0x3F800000,%fp1		// ...FP1 IS 1 + X*F
 	fdivx		%fp1,%fp0			// ...FP0 IS U = (X-F)/(1+X*F)
 //--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
 //--CREATE ATAN(F) AND STORE IT IN ATANF, AND
 //--SAVE REGISTERS FP2.
 	movel		%d2,-(%a7)	// ...SAVE d2 TEMPORARILY
 	movel		%d0,%d2		// ...THE EXPO AND 16 BITS OF X
 	andil		#0x00007800,%d0	// ...4 VARYING BITS OF F'S FRACTION
 	andil		#0x7FFF0000,%d2	// ...EXPONENT OF F
 	subil		#0x3FFB0000,%d2	// ...K+4
 	asrl		#1,%d2
 	addl		%d2,%d0		// ...THE 7 BITS IDENTIFYING F
 	asrl		#7,%d0		// ...INDEX INTO TBL OF ATAN(|F|)
 	lea		ATANTBL,%a1
 	addal		%d0,%a1		// ...ADDRESS OF ATAN(|F|)
 	movel		(%a1)+,ATANF(%a6)
 	movel		(%a1)+,ATANFHI(%a6)
 	movel		(%a1)+,ATANFLO(%a6)	// ...ATANF IS NOW ATAN(|F|)
 	movel		X(%a6),%d0		// ...LOAD SIGN AND EXPO. AGAIN
 	andil		#0x80000000,%d0	// ...SIGN(F)
 	orl		%d0,ATANF(%a6)	// ...ATANF IS NOW SIGN(F)*ATAN(|F|)
 	movel		(%a7)+,%d2	// ...RESTORE d2
 //--THAT'S ALL I HAVE TO DO FOR NOW,
 //--BUT ALAS, THE DIVIDE IS STILL CRANKING!
 //--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
 //--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
 //--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
 //--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
 //--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
 //--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
 //--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
 	fmovex		%fp0,%fp1
 	fmulx		%fp1,%fp1
 	fmoved		ATANA3,%fp2
 	faddx		%fp1,%fp2		// ...A3+V
 	fmulx		%fp1,%fp2		// ...V*(A3+V)
 	fmulx		%fp0,%fp1		// ...U*V
 	faddd		ATANA2,%fp2	// ...A2+V*(A3+V)
 	fmuld		ATANA1,%fp1	// ...A1*U*V
 	fmulx		%fp2,%fp1		// ...A1*U*V*(A2+V*(A3+V))
 	faddx		%fp1,%fp0		// ...ATAN(U), FP1 RELEASED
 	fmovel		%d1,%FPCR		//restore users exceptions
 	faddx		ATANF(%a6),%fp0	// ...ATAN(X)
 	bra		t_frcinx
 ATANBORS:
 //--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
 //--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
 	cmpil		#0x3FFF8000,%d0
 	bgt		ATANBIG	// ...I.E. |X| >= 16
 ATANSM:
 //--|X| <= 1/16
 //--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
 //--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
 //--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
 //--WHERE Y = X*X, AND Z = Y*Y.
 	cmpil		#0x3FD78000,%d0
 	blt		ATANTINY
 //--COMPUTE POLYNOMIAL
 	fmulx		%fp0,%fp0	// ...FP0 IS Y = X*X
 	movew		#0x0000,XDCARE(%a6)
 	fmovex		%fp0,%fp1
 	fmulx		%fp1,%fp1		// ...FP1 IS Z = Y*Y
 	fmoved		ATANB6,%fp2
 	fmoved		ATANB5,%fp3
 	fmulx		%fp1,%fp2		// ...Z*B6
 	fmulx		%fp1,%fp3		// ...Z*B5
 	faddd		ATANB4,%fp2	// ...B4+Z*B6
 	faddd		ATANB3,%fp3	// ...B3+Z*B5
 	fmulx		%fp1,%fp2		// ...Z*(B4+Z*B6)
 	fmulx		%fp3,%fp1		// ...Z*(B3+Z*B5)
 	faddd		ATANB2,%fp2	// ...B2+Z*(B4+Z*B6)
 	faddd		ATANB1,%fp1	// ...B1+Z*(B3+Z*B5)
 	fmulx		%fp0,%fp2		// ...Y*(B2+Z*(B4+Z*B6))
 	fmulx		X(%a6),%fp0		// ...X*Y
 	faddx		%fp2,%fp1		// ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
 	fmulx		%fp1,%fp0	// ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
 	fmovel		%d1,%FPCR		//restore users exceptions
 	faddx		X(%a6),%fp0
 	bra		t_frcinx
 ATANTINY:
 //--|X| < 2^(-40), ATAN(X) = X
 	movew		#0x0000,XDCARE(%a6)
 	fmovel		%d1,%FPCR		//restore users exceptions
 	fmovex		X(%a6),%fp0	//last inst - possible exception set
 	bra		t_frcinx
 ATANBIG:
 //--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
 //--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
 	cmpil		#0x40638000,%d0
 	bgt		ATANHUGE
 //--APPROXIMATE ATAN(-1/X) BY
 //--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
 //--THIS CAN BE RE-WRITTEN AS
 //--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
 	fmoves		#0xBF800000,%fp1	// ...LOAD -1
 	fdivx		%fp0,%fp1		// ...FP1 IS -1/X
 //--DIVIDE IS STILL CRANKING
 	fmovex		%fp1,%fp0		// ...FP0 IS X'
 	fmulx		%fp0,%fp0		// ...FP0 IS Y = X'*X'
 	fmovex		%fp1,X(%a6)		// ...X IS REALLY X'
 	fmovex		%fp0,%fp1
 	fmulx		%fp1,%fp1		// ...FP1 IS Z = Y*Y
 	fmoved		ATANC5,%fp3
 	fmoved		ATANC4,%fp2
 	fmulx		%fp1,%fp3		// ...Z*C5
 	fmulx		%fp1,%fp2		// ...Z*B4
 	faddd		ATANC3,%fp3	// ...C3+Z*C5
 	faddd		ATANC2,%fp2	// ...C2+Z*C4
 	fmulx		%fp3,%fp1		// ...Z*(C3+Z*C5), FP3 RELEASED
 	fmulx		%fp0,%fp2		// ...Y*(C2+Z*C4)
 	faddd		ATANC1,%fp1	// ...C1+Z*(C3+Z*C5)
 	fmulx		X(%a6),%fp0		// ...X'*Y
 	faddx		%fp2,%fp1		// ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
 	fmulx		%fp1,%fp0		// ...X'*Y*([B1+Z*(B3+Z*B5)]
 //					...	+[Y*(B2+Z*(B4+Z*B6))])
 	faddx		X(%a6),%fp0
 	fmovel		%d1,%FPCR		//restore users exceptions
 	btstb		#7,(%a0)
 	beqs		pos_big
 neg_big:
 	faddx		NPIBY2,%fp0
 	bra		t_frcinx
 pos_big:
 	faddx		PPIBY2,%fp0
 	bra		t_frcinx
 ATANHUGE:
 //--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
 	btstb		#7,(%a0)
 	beqs		pos_huge
 neg_huge:
 	fmovex		NPIBY2,%fp0
 	fmovel		%d1,%fpcr
 	fsubx		NTINY,%fp0
 	bra		t_frcinx
 pos_huge:
 	fmovex		PPIBY2,%fp0
 	fmovel		%d1,%fpcr
 	fsubx		PTINY,%fp0
 	bra		t_frcinx
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/satanh.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/satanh.s
@@ -0,0 +1,104 @@
 //
 //	satanh.sa 3.3 12/19/90
 //
 //	The entry point satanh computes the inverse
 //	hyperbolic tangent of
 //	an input argument; satanhd does the same except for denormalized
 //	input.
 //
 //	Input: Double-extended number X in location pointed to
 //		by address register a0.
 //
 //	Output: The value arctanh(X) returned in floating-point register Fp0.
 //
 //	Accuracy and Monotonicity: The returned result is within 3 ulps in
 //		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
 //		result is subsequently rounded to double precision. The 
 //		result is provably monotonic in double precision.
 //
 //	Speed: The program satanh takes approximately 270 cycles.
 //
 //	Algorithm:
 //
 //	ATANH
 //	1. If |X| >= 1, go to 3.
 //
 //	2. (|X| < 1) Calculate atanh(X) by
 //		sgn := sign(X)
 //		y := |X|
 //		z := 2y/(1-y)
 //		atanh(X) := sgn * (1/2) * logp1(z)
 //		Exit.
 //
 //	3. If |X| > 1, go to 5.
 //
 //	4. (|X| = 1) Generate infinity with an appropriate sign and
 //		divide-by-zero by	
 //		sgn := sign(X)
 //		atan(X) := sgn / (+0).
 //		Exit.
 //
 //	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
 //		Exit.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //satanh	idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	|xref	t_dz
 	|xref	t_operr
 	|xref	t_frcinx
 	|xref	t_extdnrm
 	|xref	slognp1
 	.global	satanhd
 satanhd:
 //--ATANH(X) = X FOR DENORMALIZED X
 	bra		t_extdnrm
 	.global	satanh
 satanh:
 	movel		(%a0),%d0
 	movew		4(%a0),%d0
 	andil		#0x7FFFFFFF,%d0
 	cmpil		#0x3FFF8000,%d0
 	bges		ATANHBIG
 //--THIS IS THE USUAL CASE, |X| < 1
 //--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
 	fabsx		(%a0),%fp0	// ...Y = |X|
 	fmovex		%fp0,%fp1
 	fnegx		%fp1		// ...-Y
 	faddx		%fp0,%fp0		// ...2Y
 	fadds		#0x3F800000,%fp1	// ...1-Y
 	fdivx		%fp1,%fp0		// ...2Y/(1-Y)
 	movel		(%a0),%d0
 	andil		#0x80000000,%d0
 	oril		#0x3F000000,%d0	// ...SIGN(X)*HALF
 	movel		%d0,-(%sp)
 	fmovemx	%fp0-%fp0,(%a0)	// ...overwrite input
 	movel		%d1,-(%sp)
 	clrl		%d1
 	bsr		slognp1		// ...LOG1P(Z)
 	fmovel		(%sp)+,%fpcr
 	fmuls		(%sp)+,%fp0
 	bra		t_frcinx
 ATANHBIG:
 	fabsx		(%a0),%fp0	// ...|X|
 	fcmps		#0x3F800000,%fp0
 	fbgt		t_operr
 	bra		t_dz
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/scale.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/scale.s
@@ -0,0 +1,371 @@
 //
 //	scale.sa 3.3 7/30/91
 //
 //	The entry point sSCALE computes the destination operand
 //	scaled by the source operand.  If the absolute value of
 //	the source operand is (>= 2^14) an overflow or underflow
 //	is returned.
 //
 //	The entry point sscale is called from do_func to emulate
 //	the fscale unimplemented instruction.
 //
 //	Input: Double-extended destination operand in FPTEMP, 
 //		double-extended source operand in ETEMP.
 //
 //	Output: The function returns scale(X,Y) to fp0.
 //
 //	Modifies: fp0.
 //
 //	Algorithm:
 //		
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //SCALE    idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	|xref	t_ovfl2
 	|xref	t_unfl
 	|xref	round
 	|xref	t_resdnrm
 SRC_BNDS: .short	0x3fff,0x400c
 //
 // This entry point is used by the unimplemented instruction exception
 // handler.
 //
 //
 //
 //	FSCALE
 //
 	.global	sscale
 sscale:
 	fmovel		#0,%fpcr		//clr user enabled exc
 	clrl		%d1
 	movew		FPTEMP(%a6),%d1	//get dest exponent
 	smi		L_SCR1(%a6)	//use L_SCR1 to hold sign
 	andil		#0x7fff,%d1	//strip sign
 	movew		ETEMP(%a6),%d0	//check src bounds
 	andiw		#0x7fff,%d0	//clr sign bit
 	cmp2w		SRC_BNDS,%d0
 	bccs		src_in
 	cmpiw		#0x400c,%d0	//test for too large
 	bge		src_out
 //
 // The source input is below 1, so we check for denormalized numbers
 // and set unfl.
 //
 src_small:
 	moveb		DTAG(%a6),%d0
 	andib		#0xe0,%d0
 	tstb		%d0
 	beqs		no_denorm
 	st		STORE_FLG(%a6)	//dest already contains result
 	orl		#unfl_mask,USER_FPSR(%a6) //set UNFL
 den_done:
 	leal		FPTEMP(%a6),%a0
 	bra		t_resdnrm
 no_denorm:
 	fmovel		USER_FPCR(%a6),%FPCR
 	fmovex		FPTEMP(%a6),%fp0	//simply return dest
 	rts
 //
 // Source is within 2^14 range.  To perform the int operation,
 // move it to d0.
 //
 src_in:
 	fmovex		ETEMP(%a6),%fp0	//move in src for int
 	fmovel		#rz_mode,%fpcr	//force rz for src conversion
 	fmovel		%fp0,%d0		//int src to d0
 	fmovel		#0,%FPSR		//clr status from above
 	tstw		ETEMP(%a6)	//check src sign
 	blt		src_neg
 //
 // Source is positive.  Add the src to the dest exponent.
 // The result can be denormalized, if src = 0, or overflow,
 // if the result of the add sets a bit in the upper word.
 //
 src_pos:
 	tstw		%d1		//check for denorm
 	beq		dst_dnrm
 	addl		%d0,%d1		//add src to dest exp
 	beqs		denorm		//if zero, result is denorm
 	cmpil		#0x7fff,%d1	//test for overflow
 	bges		ovfl
 	tstb		L_SCR1(%a6)
 	beqs		spos_pos
 	orw		#0x8000,%d1
 spos_pos:
 	movew		%d1,FPTEMP(%a6)	//result in FPTEMP
 	fmovel		USER_FPCR(%a6),%FPCR
 	fmovex		FPTEMP(%a6),%fp0	//write result to fp0
 	rts
 ovfl:
 	tstb		L_SCR1(%a6)
 	beqs		sovl_pos
 	orw		#0x8000,%d1
 sovl_pos:
 	movew		FPTEMP(%a6),ETEMP(%a6)	//result in ETEMP
 	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
 	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
 	bra		t_ovfl2
 denorm:
 	tstb		L_SCR1(%a6)
 	beqs		den_pos
 	orw		#0x8000,%d1
 den_pos:
 	tstl		FPTEMP_HI(%a6)	//check j bit
 	blts		nden_exit	//if set, not denorm
 	movew		%d1,ETEMP(%a6)	//input expected in ETEMP
 	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
 	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
 	orl		#unfl_bit,USER_FPSR(%a6)	//set unfl
 	leal		ETEMP(%a6),%a0
 	bra		t_resdnrm
 nden_exit:
 	movew		%d1,FPTEMP(%a6)	//result in FPTEMP
 	fmovel		USER_FPCR(%a6),%FPCR
 	fmovex		FPTEMP(%a6),%fp0	//write result to fp0
 	rts
 //
 // Source is negative.  Add the src to the dest exponent.
 // (The result exponent will be reduced).  The result can be
 // denormalized.
 //
 src_neg:
 	addl		%d0,%d1		//add src to dest
 	beqs		denorm		//if zero, result is denorm
 	blts		fix_dnrm	//if negative, result is 
 //					;needing denormalization
 	tstb		L_SCR1(%a6)
 	beqs		sneg_pos
 	orw		#0x8000,%d1
 sneg_pos:
 	movew		%d1,FPTEMP(%a6)	//result in FPTEMP
 	fmovel		USER_FPCR(%a6),%FPCR
 	fmovex		FPTEMP(%a6),%fp0	//write result to fp0
 	rts
 //
 // The result exponent is below denorm value.  Test for catastrophic
 // underflow and force zero if true.  If not, try to shift the 
 // mantissa right until a zero exponent exists.
 //
 fix_dnrm:
 	cmpiw		#0xffc0,%d1	//lower bound for normalization
 	blt		fix_unfl	//if lower, catastrophic unfl
 	movew		%d1,%d0		//use d0 for exp
 	movel		%d2,-(%a7)	//free d2 for norm
 	movel		FPTEMP_HI(%a6),%d1
 	movel		FPTEMP_LO(%a6),%d2
 	clrl		L_SCR2(%a6)
 fix_loop:
 	addw		#1,%d0		//drive d0 to 0
 	lsrl		#1,%d1		//while shifting the
 	roxrl		#1,%d2		//mantissa to the right
 	bccs		no_carry
 	st		L_SCR2(%a6)	//use L_SCR2 to capture inex
 no_carry:
 	tstw		%d0		//it is finished when
 	blts		fix_loop	//d0 is zero or the mantissa
 	tstb		L_SCR2(%a6)
 	beqs		tst_zero
 	orl		#unfl_inx_mask,USER_FPSR(%a6)
 //					;set unfl, aunfl, ainex
 //
 // Test for zero. If zero, simply use fmove to return +/- zero
 // to the fpu.
 //
 tst_zero:
 	clrw		FPTEMP_EX(%a6)
 	tstb		L_SCR1(%a6)	//test for sign
 	beqs		tst_con
 	orw		#0x8000,FPTEMP_EX(%a6) //set sign bit
 tst_con:
 	movel		%d1,FPTEMP_HI(%a6)
 	movel		%d2,FPTEMP_LO(%a6)
 	movel		(%a7)+,%d2
 	tstl		%d1
 	bnes		not_zero
 	tstl		FPTEMP_LO(%a6)
 	bnes		not_zero
 //
 // Result is zero.  Check for rounding mode to set lsb.  If the
 // mode is rp, and the zero is positive, return smallest denorm.
 // If the mode is rm, and the zero is negative, return smallest
 // negative denorm.
 //
 	btstb		#5,FPCR_MODE(%a6) //test if rm or rp
 	beqs		no_dir
 	btstb		#4,FPCR_MODE(%a6) //check which one
 	beqs		zer_rm
 zer_rp:
 	tstb		L_SCR1(%a6)	//check sign
 	bnes		no_dir		//if set, neg op, no inc
 	movel		#1,FPTEMP_LO(%a6) //set lsb
 	bras		sm_dnrm
 zer_rm:
 	tstb		L_SCR1(%a6)	//check sign
 	beqs		no_dir		//if clr, neg op, no inc
 	movel		#1,FPTEMP_LO(%a6) //set lsb
 	orl		#neg_mask,USER_FPSR(%a6) //set N
 	bras		sm_dnrm
 no_dir:
 	fmovel		USER_FPCR(%a6),%FPCR
 	fmovex		FPTEMP(%a6),%fp0	//use fmove to set cc's
 	rts
 //
 // The rounding mode changed the zero to a smallest denorm. Call 
 // t_resdnrm with exceptional operand in ETEMP.
 //
 sm_dnrm:
 	movel		FPTEMP_EX(%a6),ETEMP_EX(%a6)
 	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
 	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
 	leal		ETEMP(%a6),%a0
 	bra		t_resdnrm
 //
 // Result is still denormalized.
 //
 not_zero:
 	orl		#unfl_mask,USER_FPSR(%a6) //set unfl
 	tstb		L_SCR1(%a6)	//check for sign
 	beqs		fix_exit
 	orl		#neg_mask,USER_FPSR(%a6) //set N
 fix_exit:
 	bras		sm_dnrm
 //
 // The result has underflowed to zero. Return zero and set
 // unfl, aunfl, and ainex.
 //
 fix_unfl:
 	orl		#unfl_inx_mask,USER_FPSR(%a6)
 	btstb		#5,FPCR_MODE(%a6) //test if rm or rp
 	beqs		no_dir2
 	btstb		#4,FPCR_MODE(%a6) //check which one
 	beqs		zer_rm2
 zer_rp2:
 	tstb		L_SCR1(%a6)	//check sign
 	bnes		no_dir2		//if set, neg op, no inc
 	clrl		FPTEMP_EX(%a6)
 	clrl		FPTEMP_HI(%a6)
 	movel		#1,FPTEMP_LO(%a6) //set lsb
 	bras		sm_dnrm		//return smallest denorm
 zer_rm2:
 	tstb		L_SCR1(%a6)	//check sign
 	beqs		no_dir2		//if clr, neg op, no inc
 	movew		#0x8000,FPTEMP_EX(%a6)
 	clrl		FPTEMP_HI(%a6)
 	movel		#1,FPTEMP_LO(%a6) //set lsb
 	orl		#neg_mask,USER_FPSR(%a6) //set N
 	bra		sm_dnrm		//return smallest denorm
 no_dir2:
 	tstb		L_SCR1(%a6)
 	bges		pos_zero
 neg_zero:
 	clrl		FP_SCR1(%a6)	//clear the exceptional operand
 	clrl		FP_SCR1+4(%a6)	//for gen_except.
 	clrl		FP_SCR1+8(%a6)
 	fmoves		#0x80000000,%fp0	
 	rts
 pos_zero:
 	clrl		FP_SCR1(%a6)	//clear the exceptional operand
 	clrl		FP_SCR1+4(%a6)	//for gen_except.
 	clrl		FP_SCR1+8(%a6)
 	fmoves		#0x00000000,%fp0
 	rts
 //
 // The destination is a denormalized number.  It must be handled
 // by first shifting the bits in the mantissa until it is normalized,
 // then adding the remainder of the source to the exponent.
 //
 dst_dnrm:
 	moveml		%d2/%d3,-(%a7)	
 	movew		FPTEMP_EX(%a6),%d1
 	movel		FPTEMP_HI(%a6),%d2
 	movel		FPTEMP_LO(%a6),%d3
 dst_loop:
 	tstl		%d2		//test for normalized result
 	blts		dst_norm	//exit loop if so
 	tstl		%d0		//otherwise, test shift count
 	beqs		dst_fin		//if zero, shifting is done
 	subil		#1,%d0		//dec src
 	lsll		#1,%d3
 	roxll		#1,%d2
 	bras		dst_loop
 //
 // Destination became normalized.  Simply add the remaining 
 // portion of the src to the exponent.
 //
 dst_norm:
 	addw		%d0,%d1		//dst is normalized; add src
 	tstb		L_SCR1(%a6)
 	beqs		dnrm_pos
 	orl		#0x8000,%d1
 dnrm_pos:
 	movemw		%d1,FPTEMP_EX(%a6)
 	moveml		%d2,FPTEMP_HI(%a6)
 	moveml		%d3,FPTEMP_LO(%a6)
 	fmovel		USER_FPCR(%a6),%FPCR
 	fmovex		FPTEMP(%a6),%fp0
 	moveml		(%a7)+,%d2/%d3
 	rts
 //
 // Destination remained denormalized.  Call t_excdnrm with
 // exceptional operand in ETEMP.
 //
 dst_fin:
 	tstb		L_SCR1(%a6)	//check for sign
 	beqs		dst_exit
 	orl		#neg_mask,USER_FPSR(%a6) //set N
 	orl		#0x8000,%d1
 dst_exit:
 	movemw		%d1,ETEMP_EX(%a6)
 	moveml		%d2,ETEMP_HI(%a6)
 	moveml		%d3,ETEMP_LO(%a6)
 	orl		#unfl_mask,USER_FPSR(%a6) //set unfl
 	moveml		(%a7)+,%d2/%d3
 	leal		ETEMP(%a6),%a0
 	bra		t_resdnrm
 //
 // Source is outside of 2^14 range.  Test the sign and branch
 // to the appropriate exception handler.
 //
 src_out:
 	tstb		L_SCR1(%a6)
 	beqs		scro_pos
 	orl		#0x8000,%d1
 scro_pos:
 	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
 	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
 	tstw		ETEMP(%a6)
 	blts		res_neg
 res_pos:
 	movew		%d1,ETEMP(%a6)	//result in ETEMP
 	bra		t_ovfl2
 res_neg:
 	movew		%d1,ETEMP(%a6)	//result in ETEMP
 	leal		ETEMP(%a6),%a0
 	bra		t_unfl
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/scosh.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/scosh.s
@@ -0,0 +1,132 @@
 //
 //	scosh.sa 3.1 12/10/90
 //
 //	The entry point sCosh computes the hyperbolic cosine of
 //	an input argument; sCoshd does the same except for denormalized
 //	input.
 //
 //	Input: Double-extended number X in location pointed to
 //		by address register a0.
 //
 //	Output: The value cosh(X) returned in floating-point register Fp0.
 //
 //	Accuracy and Monotonicity: The returned result is within 3 ulps in
 //		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
 //		result is subsequently rounded to double precision. The
 //		result is provably monotonic in double precision.
 //
 //	Speed: The program sCOSH takes approximately 250 cycles.
 //
 //	Algorithm:
 //
 //	COSH
 //	1. If |X| > 16380 log2, go to 3.
 //
 //	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae
 //		y = |X|, z = exp(Y), and
 //		cosh(X) = (1/2)*( z + 1/z ).
 //		Exit.
 //
 //	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.
 //
 //	4. (16380 log2 < |X| <= 16480 log2)
 //		cosh(X) = sign(X) * exp(|X|)/2.
 //		However, invoking exp(|X|) may cause premature overflow.
 //		Thus, we calculate sinh(X) as follows:
 //		Y	:= |X|
 //		Fact	:=	2**(16380)
 //		Y'	:= Y - 16381 log2
 //		cosh(X) := Fact * exp(Y').
 //		Exit.
 //
 //	5. (|X| > 16480 log2) sinh(X) must overflow. Return
 //		Huge*Huge to generate overflow and an infinity with
 //		the appropriate sign. Huge is the largest finite number in
 //		extended format. Exit.
 //
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //SCOSH	idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	|xref	t_ovfl
 	|xref	t_frcinx
 	|xref	setox
 T1:	.long 0x40C62D38,0xD3D64634 // ... 16381 LOG2 LEAD
 T2:	.long 0x3D6F90AE,0xB1E75CC7 // ... 16381 LOG2 TRAIL
 TWO16380: .long 0x7FFB0000,0x80000000,0x00000000,0x00000000
 	.global	scoshd
 scoshd:
 //--COSH(X) = 1 FOR DENORMALIZED X
 	fmoves		#0x3F800000,%fp0
 	fmovel		%d1,%FPCR
 	fadds		#0x00800000,%fp0
 	bra		t_frcinx
 	.global	scosh
 scosh:
 	fmovex		(%a0),%fp0	// ...LOAD INPUT
 	movel		(%a0),%d0
 	movew		4(%a0),%d0
 	andil		#0x7FFFFFFF,%d0
 	cmpil		#0x400CB167,%d0
 	bgts		COSHBIG
 //--THIS IS THE USUAL CASE, |X| < 16380 LOG2
 //--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
 	fabsx		%fp0		// ...|X|
 	movel		%d1,-(%sp)
 	clrl		%d1
 	fmovemx	%fp0-%fp0,(%a0)	//pass parameter to setox
 	bsr		setox		// ...FP0 IS EXP(|X|)
 	fmuls		#0x3F000000,%fp0	// ...(1/2)EXP(|X|)
 	movel		(%sp)+,%d1
 	fmoves		#0x3E800000,%fp1	// ...(1/4)
 	fdivx		%fp0,%fp1	 	// ...1/(2 EXP(|X|))
 	fmovel		%d1,%FPCR
 	faddx		%fp1,%fp0
 	bra		t_frcinx
 COSHBIG:
 	cmpil		#0x400CB2B3,%d0
 	bgts		COSHHUGE
 	fabsx		%fp0
 	fsubd		T1(%pc),%fp0		// ...(|X|-16381LOG2_LEAD)
 	fsubd		T2(%pc),%fp0		// ...|X| - 16381 LOG2, ACCURATE
 	movel		%d1,-(%sp)
 	clrl		%d1
 	fmovemx	%fp0-%fp0,(%a0)
 	bsr		setox
 	fmovel		(%sp)+,%fpcr
 	fmulx		TWO16380(%pc),%fp0
 	bra		t_frcinx
 COSHHUGE:
 	fmovel		#0,%fpsr		//clr N bit if set by source
 	bclrb		#7,(%a0)		//always return positive value
 	fmovemx	(%a0),%fp0-%fp0
 	bra		t_ovfl
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/setox.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/setox.s
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/sgetem.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/sgetem.s
@@ -0,0 +1,141 @@
 //
 //	sgetem.sa 3.1 12/10/90
 //
 //	The entry point sGETEXP returns the exponent portion 
 //	of the input argument.  The exponent bias is removed
 //	and the exponent value is returned as an extended 
 //	precision number in fp0.  sGETEXPD handles denormalized
 //	numbers.
 //
 //	The entry point sGETMAN extracts the mantissa of the 
 //	input argument.  The mantissa is converted to an 
 //	extended precision number and returned in fp0.  The
 //	range of the result is [1.0 - 2.0).
 //
 //
 //	Input:  Double-extended number X in the ETEMP space in
 //		the floating-point save stack.
 //
 //	Output:	The functions return exp(X) or man(X) in fp0.
 //
 //	Modified: fp0.
 //
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //SGETEM	idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section 8
 	.include "fpsp.defs"
 	|xref	nrm_set
 //
 // This entry point is used by the unimplemented instruction exception
 // handler.  It points a0 to the input operand.
 //
 //
 //
 //	SGETEXP
 //
 	.global	sgetexp
 sgetexp:
 	movew	LOCAL_EX(%a0),%d0	//get the exponent
 	bclrl	#15,%d0		//clear the sign bit
 	subw	#0x3fff,%d0	//subtract off the bias
 	fmovew  %d0,%fp0		//move the exp to fp0
 	rts
 	.global	sgetexpd
 sgetexpd:
 	bclrb	#sign_bit,LOCAL_EX(%a0)
 	bsr	nrm_set		//normalize (exp will go negative)
 	movew	LOCAL_EX(%a0),%d0	//load resulting exponent into d0
 	subw	#0x3fff,%d0	//subtract off the bias
 	fmovew	%d0,%fp0		//move the exp to fp0
 	rts
 //
 //
 // This entry point is used by the unimplemented instruction exception
 // handler.  It points a0 to the input operand.
 //
 //
 //
 //	SGETMAN
 //
 //
 // For normalized numbers, leave the mantissa alone, simply load
 // with an exponent of +/- $3fff.
 //
 	.global	sgetman
 sgetman:
 	movel	USER_FPCR(%a6),%d0
 	andil	#0xffffff00,%d0	//clear rounding precision and mode
 	fmovel	%d0,%fpcr		//this fpcr setting is used by the 882
 	movew	LOCAL_EX(%a0),%d0	//get the exp (really just want sign bit)
 	orw	#0x7fff,%d0	//clear old exp
 	bclrl	#14,%d0	 	//make it the new exp +-3fff
 	movew	%d0,LOCAL_EX(%a0)	//move the sign & exp back to fsave stack
 	fmovex	(%a0),%fp0	//put new value back in fp0
 	rts
 //
 // For denormalized numbers, shift the mantissa until the j-bit = 1,
 // then load the exponent with +/1 $3fff.
 //
 	.global	sgetmand
 sgetmand:
 	movel	LOCAL_HI(%a0),%d0	//load ms mant in d0
 	movel	LOCAL_LO(%a0),%d1	//load ls mant in d1
 	bsr	shft		//shift mantissa bits till msbit is set
 	movel	%d0,LOCAL_HI(%a0)	//put ms mant back on stack
 	movel	%d1,LOCAL_LO(%a0)	//put ls mant back on stack
 	bras	sgetman
 //
 //	SHFT
 //
 //	Shifts the mantissa bits until msbit is set.
 //	input:
 //		ms mantissa part in d0
 //		ls mantissa part in d1
 //	output:
 //		shifted bits in d0 and d1
 shft:
 	tstl	%d0		//if any bits set in ms mant
 	bnes	upper		//then branch
 //				;else no bits set in ms mant
 	tstl	%d1		//test if any bits set in ls mant
 	bnes	cont		//if set then continue
 	bras	shft_end	//else return
 cont:
 	movel	%d3,-(%a7)	//save d3
 	exg	%d0,%d1		//shift ls mant to ms mant
 	bfffo	%d0{#0:#32},%d3	//find first 1 in ls mant to d0
 	lsll	%d3,%d0		//shift first 1 to integer bit in ms mant
 	movel	(%a7)+,%d3	//restore d3
 	bras	shft_end
 upper:
 	moveml	%d3/%d5/%d6,-(%a7)	//save registers
 	bfffo	%d0{#0:#32},%d3	//find first 1 in ls mant to d0
 	lsll	%d3,%d0		//shift ms mant until j-bit is set
 	movel	%d1,%d6		//save ls mant in d6
 	lsll	%d3,%d1		//shift ls mant by count
 	movel	#32,%d5
 	subl	%d3,%d5		//sub 32 from shift for ls mant
 	lsrl	%d5,%d6		//shift off all bits but those that will
 //				;be shifted into ms mant
 	orl	%d6,%d0		//shift the ls mant bits into the ms mant
 	moveml	(%a7)+,%d3/%d5/%d6	//restore registers
 shft_end:
 	rts
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/sint.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/sint.s
@@ -0,0 +1,247 @@
 //
 //	sint.sa 3.1 12/10/90
 //
 //	The entry point sINT computes the rounded integer 
 //	equivalent of the input argument, sINTRZ computes 
 //	the integer rounded to zero of the input argument.
 //
 //	Entry points sint and sintrz are called from do_func
 //	to emulate the fint and fintrz unimplemented instructions,
 //	respectively.  Entry point sintdo is used by bindec.
 //
 //	Input: (Entry points sint and sintrz) Double-extended
 //		number X in the ETEMP space in the floating-point
 //		save stack.
 //	       (Entry point sintdo) Double-extended number X in
 //		location pointed to by the address register a0.
 //	       (Entry point sintd) Double-extended denormalized
 //		number X in the ETEMP space in the floating-point
 //		save stack.
 //
 //	Output: The function returns int(X) or intrz(X) in fp0.
 //
 //	Modifies: fp0.
 //
 //	Algorithm: (sint and sintrz)
 //
 //	1. If exp(X) >= 63, return X. 
 //	   If exp(X) < 0, return +/- 0 or +/- 1, according to
 //	   the rounding mode.
 //	
 //	2. (X is in range) set rsc = 63 - exp(X). Unnormalize the
 //	   result to the exponent $403e.
 //
 //	3. Round the result in the mode given in USER_FPCR. For
 //	   sintrz, force round-to-zero mode.
 //
 //	4. Normalize the rounded result; store in fp0.
 //
 //	For the denormalized cases, force the correct result
 //	for the given sign and rounding mode.
 //
 //		        Sign(X)
 //		RMODE   +    -
 //		-----  --------
 //		 RN    +0   -0
 //		 RZ    +0   -0
 //		 RM    +0   -1
 //		 RP    +1   -0
 //
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //SINT    idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	|xref	dnrm_lp
 	|xref	nrm_set
 	|xref	round
 	|xref	t_inx2
 	|xref	ld_pone
 	|xref	ld_mone
 	|xref	ld_pzero
 	|xref	ld_mzero
 	|xref	snzrinx
 //
 //	FINT
 //
 	.global	sint
 sint:
 	bfextu	FPCR_MODE(%a6){#2:#2},%d1	//use user's mode for rounding
 //					;implicitly has extend precision
 //					;in upper word. 
 	movel	%d1,L_SCR1(%a6)		//save mode bits
 	bras	sintexc			
 //
 //	FINT with extended denorm inputs.
 //
 	.global	sintd
 sintd:
 	btstb	#5,FPCR_MODE(%a6)
 	beq	snzrinx		//if round nearest or round zero, +/- 0
 	btstb	#4,FPCR_MODE(%a6)
 	beqs	rnd_mns
 rnd_pls:
 	btstb	#sign_bit,LOCAL_EX(%a0)
 	bnes	sintmz
 	bsr	ld_pone		//if round plus inf and pos, answer is +1
 	bra	t_inx2
 rnd_mns:
 	btstb	#sign_bit,LOCAL_EX(%a0)
 	beqs	sintpz
 	bsr	ld_mone		//if round mns inf and neg, answer is -1
 	bra	t_inx2
 sintpz:
 	bsr	ld_pzero
 	bra	t_inx2
 sintmz:
 	bsr	ld_mzero
 	bra	t_inx2
 //
 //	FINTRZ
 //
 	.global	sintrz
 sintrz:
 	movel	#1,L_SCR1(%a6)		//use rz mode for rounding
 //					;implicitly has extend precision
 //					;in upper word. 
 	bras	sintexc			
 //
 //	SINTDO
 //
 //	Input:	a0 points to an IEEE extended format operand
 // 	Output:	fp0 has the result 
 //
 // Exceptions:
 //
 // If the subroutine results in an inexact operation, the inx2 and
 // ainx bits in the USER_FPSR are set.
 //
 //
 	.global	sintdo
 sintdo:
 	bfextu	FPCR_MODE(%a6){#2:#2},%d1	//use user's mode for rounding
 //					;implicitly has ext precision
 //					;in upper word. 
 	movel	%d1,L_SCR1(%a6)		//save mode bits
 //
 // Real work of sint is in sintexc
 //
 sintexc:
 	bclrb	#sign_bit,LOCAL_EX(%a0)	//convert to internal extended
 //					;format
 	sne	LOCAL_SGN(%a0)		
 	cmpw	#0x403e,LOCAL_EX(%a0)	//check if (unbiased) exp > 63
 	bgts	out_rnge			//branch if exp < 63
 	cmpw	#0x3ffd,LOCAL_EX(%a0)	//check if (unbiased) exp < 0
 	bgt	in_rnge			//if 63 >= exp > 0, do calc
 //
 // Input is less than zero.  Restore sign, and check for directed
 // rounding modes.  L_SCR1 contains the rmode in the lower byte.
 //
 un_rnge:
 	btstb	#1,L_SCR1+3(%a6)		//check for rn and rz
 	beqs	un_rnrz
 	tstb	LOCAL_SGN(%a0)		//check for sign
 	bnes	un_rmrp_neg
 //
 // Sign is +.  If rp, load +1.0, if rm, load +0.0
 //
 	cmpib	#3,L_SCR1+3(%a6)		//check for rp
 	beqs	un_ldpone		//if rp, load +1.0
 	bsr	ld_pzero		//if rm, load +0.0
 	bra	t_inx2
 un_ldpone:
 	bsr	ld_pone
 	bra	t_inx2
 //
 // Sign is -.  If rm, load -1.0, if rp, load -0.0
 //
 un_rmrp_neg:
 	cmpib	#2,L_SCR1+3(%a6)		//check for rm
 	beqs	un_ldmone		//if rm, load -1.0
 	bsr	ld_mzero		//if rp, load -0.0
 	bra	t_inx2
 un_ldmone:
 	bsr	ld_mone
 	bra	t_inx2
 //
 // Rmode is rn or rz; return signed zero
 //
 un_rnrz:
 	tstb	LOCAL_SGN(%a0)		//check for sign
 	bnes	un_rnrz_neg
 	bsr	ld_pzero
 	bra	t_inx2
 un_rnrz_neg:
 	bsr	ld_mzero
 	bra	t_inx2
 //
 // Input is greater than 2^63.  All bits are significant.  Return
 // the input.
 //
 out_rnge:
 	bfclr	LOCAL_SGN(%a0){#0:#8}	//change back to IEEE ext format
 	beqs	intps
 	bsetb	#sign_bit,LOCAL_EX(%a0)
 intps:
 	fmovel	%fpcr,-(%sp)
 	fmovel	#0,%fpcr
 	fmovex LOCAL_EX(%a0),%fp0	//if exp > 63
 //					;then return X to the user
 //					;there are no fraction bits
 	fmovel	(%sp)+,%fpcr
 	rts
 in_rnge:
 // 					;shift off fraction bits
 	clrl	%d0			//clear d0 - initial g,r,s for
 //					;dnrm_lp
 	movel	#0x403e,%d1		//set threshold for dnrm_lp
 //					;assumes a0 points to operand
 	bsr	dnrm_lp
 //					;returns unnormalized number
 //					;pointed by a0
 //					;output d0 supplies g,r,s
 //					;used by round
 	movel	L_SCR1(%a6),%d1		//use selected rounding mode
 //
 //
 	bsr	round			//round the unnorm based on users
 //					;input	a0 ptr to ext X
 //					;	d0 g,r,s bits
 //					;	d1 PREC/MODE info
 //					;output a0 ptr to rounded result
 //					;inexact flag set in USER_FPSR
 //					;if initial grs set
 //
 // normalize the rounded result and store value in fp0
 //
 	bsr	nrm_set			//normalize the unnorm
 //					;Input: a0 points to operand to
 //					;be normalized
 //					;Output: a0 points to normalized
 //					;result
 	bfclr	LOCAL_SGN(%a0){#0:#8}
 	beqs	nrmrndp
 	bsetb	#sign_bit,LOCAL_EX(%a0)	//return to IEEE extended format
 nrmrndp:
 	fmovel	%fpcr,-(%sp)
 	fmovel	#0,%fpcr
 	fmovex LOCAL_EX(%a0),%fp0	//move result to fp0
 	fmovel	(%sp)+,%fpcr
 	rts
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/slog2.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/slog2.s
@@ -0,0 +1,188 @@
 //
 //	slog2.sa 3.1 12/10/90
 //
 //       The entry point slog10 computes the base-10 
 //	logarithm of an input argument X.
 //	slog10d does the same except the input value is a 
 //	denormalized number.  
 //	sLog2 and sLog2d are the base-2 analogues.
 //
 //       INPUT:	Double-extended value in memory location pointed to 
 //		by address register a0.
 //
 //       OUTPUT: log_10(X) or log_2(X) returned in floating-point 
 //		register fp0.
 //
 //       ACCURACY and MONOTONICITY: The returned result is within 1.7 
 //		ulps in 64 significant bit, i.e. within 0.5003 ulp 
 //		to 53 bits if the result is subsequently rounded 
 //		to double precision. The result is provably monotonic 
 //		in double precision.
 //
 //       SPEED:	Two timings are measured, both in the copy-back mode. 
 //		The first one is measured when the function is invoked 
 //		the first time (so the instructions and data are not 
 //		in cache), and the second one is measured when the 
 //		function is reinvoked at the same input argument.
 //
 //       ALGORITHM and IMPLEMENTATION NOTES:
 //
 //       slog10d:
 //
 //       Step 0.   If X < 0, create a NaN and raise the invalid operation
 //                 flag. Otherwise, save FPCR in D1; set FpCR to default.
 //       Notes:    Default means round-to-nearest mode, no floating-point
 //                 traps, and precision control = double extended.
 //
 //       Step 1.   Call slognd to obtain Y = log(X), the natural log of X.
 //       Notes:    Even if X is denormalized, log(X) is always normalized.
 //
 //       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).
 //            2.1  Restore the user FPCR
 //            2.2  Return ans := Y * INV_L10.
 //
 //
 //       slog10: 
 //
 //       Step 0.   If X < 0, create a NaN and raise the invalid operation
 //                 flag. Otherwise, save FPCR in D1; set FpCR to default.
 //       Notes:    Default means round-to-nearest mode, no floating-point
 //                 traps, and precision control = double extended.
 //
 //       Step 1.   Call sLogN to obtain Y = log(X), the natural log of X.
 //
 //       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).
 //            2.1  Restore the user FPCR
 //            2.2  Return ans := Y * INV_L10.
 //
 //
 //       sLog2d:
 //
 //       Step 0.   If X < 0, create a NaN and raise the invalid operation
 //                 flag. Otherwise, save FPCR in D1; set FpCR to default.
 //       Notes:    Default means round-to-nearest mode, no floating-point
 //                 traps, and precision control = double extended.
 //
 //       Step 1.   Call slognd to obtain Y = log(X), the natural log of X.
 //       Notes:    Even if X is denormalized, log(X) is always normalized.
 //
 //       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).
 //            2.1  Restore the user FPCR
 //            2.2  Return ans := Y * INV_L2.
 //
 //
 //       sLog2:
 //
 //       Step 0.   If X < 0, create a NaN and raise the invalid operation
 //                 flag. Otherwise, save FPCR in D1; set FpCR to default.
 //       Notes:    Default means round-to-nearest mode, no floating-point
 //                 traps, and precision control = double extended.
 //
 //       Step 1.   If X is not an integer power of two, i.e., X != 2^k,
 //                 go to Step 3.
 //
 //       Step 2.   Return k.
 //            2.1  Get integer k, X = 2^k.
 //            2.2  Restore the user FPCR.
 //            2.3  Return ans := convert-to-double-extended(k).
 //
 //       Step 3.   Call sLogN to obtain Y = log(X), the natural log of X.
 //
 //       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).
 //            4.1  Restore the user FPCR
 //            4.2  Return ans := Y * INV_L2.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //SLOG2    idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	|xref	t_frcinx	
 	|xref	t_operr
 	|xref	slogn
 	|xref	slognd
 INV_L10:  .long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
 INV_L2:   .long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
 	.global	slog10d
 slog10d:
 //--entry point for Log10(X), X is denormalized
 	movel		(%a0),%d0
 	blt		invalid
 	movel		%d1,-(%sp)
 	clrl		%d1
 	bsr		slognd			// ...log(X), X denorm.
 	fmovel		(%sp)+,%fpcr
 	fmulx		INV_L10,%fp0
 	bra		t_frcinx
 	.global	slog10
 slog10:
 //--entry point for Log10(X), X is normalized
 	movel		(%a0),%d0
 	blt		invalid
 	movel		%d1,-(%sp)
 	clrl		%d1
 	bsr		slogn			// ...log(X), X normal.
 	fmovel		(%sp)+,%fpcr
 	fmulx		INV_L10,%fp0
 	bra		t_frcinx
 	.global	slog2d
 slog2d:
 //--entry point for Log2(X), X is denormalized
 	movel		(%a0),%d0
 	blt		invalid
 	movel		%d1,-(%sp)
 	clrl		%d1
 	bsr		slognd			// ...log(X), X denorm.
 	fmovel		(%sp)+,%fpcr
 	fmulx		INV_L2,%fp0
 	bra		t_frcinx
 	.global	slog2
 slog2:
 //--entry point for Log2(X), X is normalized
 	movel		(%a0),%d0
 	blt		invalid
 	movel		8(%a0),%d0
 	bnes		continue		// ...X is not 2^k
 	movel		4(%a0),%d0
 	andl		#0x7FFFFFFF,%d0
 	tstl		%d0
 	bnes		continue
 //--X = 2^k.
 	movew		(%a0),%d0
 	andl		#0x00007FFF,%d0
 	subl		#0x3FFF,%d0
 	fmovel		%d1,%fpcr
 	fmovel		%d0,%fp0
 	bra		t_frcinx
 continue:
 	movel		%d1,-(%sp)
 	clrl		%d1
 	bsr		slogn			// ...log(X), X normal.
 	fmovel		(%sp)+,%fpcr
 	fmulx		INV_L2,%fp0
 	bra		t_frcinx
 invalid:
 	bra		t_operr
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/slogn.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/slogn.s
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/smovecr.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/smovecr.s
@@ -0,0 +1,162 @@
 //
 //	smovecr.sa 3.1 12/10/90
 //
 //	The entry point sMOVECR returns the constant at the
 //	offset given in the instruction field.
 //
 //	Input: An offset in the instruction word.
 //
 //	Output:	The constant rounded to the user's rounding
 //		mode unchecked for overflow.
 //
 //	Modified: fp0.
 //
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //SMOVECR	idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section 8
 	.include "fpsp.defs"
 	|xref	nrm_set
 	|xref	round
 	|xref	PIRN
 	|xref	PIRZRM
 	|xref	PIRP
 	|xref	SMALRN
 	|xref	SMALRZRM
 	|xref	SMALRP
 	|xref	BIGRN
 	|xref	BIGRZRM
 	|xref	BIGRP
 FZERO:	.long	00000000
 //
 //	FMOVECR 
 //
 	.global	smovcr
 smovcr:
 	bfextu	CMDREG1B(%a6){#9:#7},%d0 //get offset
 	bfextu	USER_FPCR(%a6){#26:#2},%d1 //get rmode
 //
 // check range of offset
 //
 	tstb	%d0		//if zero, offset is to pi
 	beqs	PI_TBL		//it is pi
 	cmpib	#0x0a,%d0		//check range $01 - $0a
 	bles	Z_VAL		//if in this range, return zero
 	cmpib	#0x0e,%d0		//check range $0b - $0e
 	bles	SM_TBL		//valid constants in this range
 	cmpib	#0x2f,%d0		//check range $10 - $2f
 	bles	Z_VAL		//if in this range, return zero 
 	cmpib	#0x3f,%d0		//check range $30 - $3f
 	ble  	BG_TBL		//valid constants in this range
 Z_VAL:
 	fmoves	FZERO,%fp0
 	rts
 PI_TBL:
 	tstb	%d1		//offset is zero, check for rmode
 	beqs	PI_RN		//if zero, rn mode
 	cmpib	#0x3,%d1		//check for rp
 	beqs	PI_RP		//if 3, rp mode
 PI_RZRM:
 	leal	PIRZRM,%a0	//rmode is rz or rm, load PIRZRM in a0
 	bra	set_finx
 PI_RN:
 	leal	PIRN,%a0		//rmode is rn, load PIRN in a0
 	bra	set_finx
 PI_RP:
 	leal	PIRP,%a0		//rmode is rp, load PIRP in a0
 	bra	set_finx
 SM_TBL:
 	subil	#0xb,%d0		//make offset in 0 - 4 range
 	tstb	%d1		//check for rmode
 	beqs	SM_RN		//if zero, rn mode
 	cmpib	#0x3,%d1		//check for rp
 	beqs	SM_RP		//if 3, rp mode
 SM_RZRM:
 	leal	SMALRZRM,%a0	//rmode is rz or rm, load SMRZRM in a0
 	cmpib	#0x2,%d0		//check if result is inex
 	ble	set_finx	//if 0 - 2, it is inexact
 	bra	no_finx		//if 3, it is exact
 SM_RN:
 	leal	SMALRN,%a0	//rmode is rn, load SMRN in a0
 	cmpib	#0x2,%d0		//check if result is inex
 	ble	set_finx	//if 0 - 2, it is inexact
 	bra	no_finx		//if 3, it is exact
 SM_RP:
 	leal	SMALRP,%a0	//rmode is rp, load SMRP in a0
 	cmpib	#0x2,%d0		//check if result is inex
 	ble	set_finx	//if 0 - 2, it is inexact
 	bra	no_finx		//if 3, it is exact
 BG_TBL:
 	subil	#0x30,%d0		//make offset in 0 - f range
 	tstb	%d1		//check for rmode
 	beqs	BG_RN		//if zero, rn mode
 	cmpib	#0x3,%d1		//check for rp
 	beqs	BG_RP		//if 3, rp mode
 BG_RZRM:
 	leal	BIGRZRM,%a0	//rmode is rz or rm, load BGRZRM in a0
 	cmpib	#0x1,%d0		//check if result is inex
 	ble	set_finx	//if 0 - 1, it is inexact
 	cmpib	#0x7,%d0		//second check
 	ble	no_finx		//if 0 - 7, it is exact
 	bra	set_finx	//if 8 - f, it is inexact
 BG_RN:
 	leal	BIGRN,%a0	//rmode is rn, load BGRN in a0
 	cmpib	#0x1,%d0		//check if result is inex
 	ble	set_finx	//if 0 - 1, it is inexact
 	cmpib	#0x7,%d0		//second check
 	ble	no_finx		//if 0 - 7, it is exact
 	bra	set_finx	//if 8 - f, it is inexact
 BG_RP:
 	leal	BIGRP,%a0	//rmode is rp, load SMRP in a0
 	cmpib	#0x1,%d0		//check if result is inex
 	ble	set_finx	//if 0 - 1, it is inexact
 	cmpib	#0x7,%d0		//second check
 	ble	no_finx		//if 0 - 7, it is exact
 //	bra	set_finx	;if 8 - f, it is inexact
 set_finx:
 	orl	#inx2a_mask,USER_FPSR(%a6) //set inex2/ainex
 no_finx:
 	mulul	#12,%d0			//use offset to point into tables
 	movel	%d1,L_SCR1(%a6)		//load mode for round call
 	bfextu	USER_FPCR(%a6){#24:#2},%d1	//get precision
 	tstl	%d1			//check if extended precision
 //
 // Precision is extended
 //
 	bnes	not_ext			//if extended, do not call round
 	fmovemx (%a0,%d0),%fp0-%fp0		//return result in fp0
 	rts
 //
 // Precision is single or double
 //
 not_ext:
 	swap	%d1			//rnd prec in upper word of d1
 	addl	L_SCR1(%a6),%d1		//merge rmode in low word of d1
 	movel	(%a0,%d0),FP_SCR1(%a6)	//load first word to temp storage
 	movel	4(%a0,%d0),FP_SCR1+4(%a6)	//load second word
 	movel	8(%a0,%d0),FP_SCR1+8(%a6)	//load third word
 	clrl	%d0			//clear g,r,s
 	lea	FP_SCR1(%a6),%a0
 	btstb	#sign_bit,LOCAL_EX(%a0)
 	sne	LOCAL_SGN(%a0)		//convert to internal ext. format
 	bsr	round			//go round the mantissa
 	bfclr	LOCAL_SGN(%a0){#0:#8}	//convert back to IEEE ext format
 	beqs	fin_fcr
 	bsetb	#sign_bit,LOCAL_EX(%a0)
 fin_fcr:
 	fmovemx (%a0),%fp0-%fp0
 	rts
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/srem_mod.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/srem_mod.s
@@ -0,0 +1,422 @@
 //
 //	srem_mod.sa 3.1 12/10/90
 //
 //      The entry point sMOD computes the floating point MOD of the
 //      input values X and Y. The entry point sREM computes the floating
 //      point (IEEE) REM of the input values X and Y.
 //
 //      INPUT
 //      -----
 //      Double-extended value Y is pointed to by address in register
 //      A0. Double-extended value X is located in -12(A0). The values
 //      of X and Y are both nonzero and finite; although either or both
 //      of them can be denormalized. The special cases of zeros, NaNs,
 //      and infinities are handled elsewhere.
 //
 //      OUTPUT
 //      ------
 //      FREM(X,Y) or FMOD(X,Y), depending on entry point.
 //
 //       ALGORITHM
 //       ---------
 //
 //       Step 1.  Save and strip signs of X and Y: signX := sign(X),
 //                signY := sign(Y), X := |X|, Y := |Y|, 
 //                signQ := signX EOR signY. Record whether MOD or REM
 //                is requested.
 //
 //       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.
 //                If (L < 0) then
 //                   R := X, go to Step 4.
 //                else
 //                   R := 2^(-L)X, j := L.
 //                endif
 //
 //       Step 3.  Perform MOD(X,Y)
 //            3.1 If R = Y, go to Step 9.
 //            3.2 If R > Y, then { R := R - Y, Q := Q + 1}
 //            3.3 If j = 0, go to Step 4.
 //            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to
 //                Step 3.1.
 //
 //       Step 4.  At this point, R = X - QY = MOD(X,Y). Set
 //                Last_Subtract := false (used in Step 7 below). If
 //                MOD is requested, go to Step 6. 
 //
 //       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.
 //            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to
 //                Step 6.
 //            5.2 If R > Y/2, then { set Last_Subtract := true,
 //                Q := Q + 1, Y := signY*Y }. Go to Step 6.
 //            5.3 This is the tricky case of R = Y/2. If Q is odd,
 //                then { Q := Q + 1, signX := -signX }.
 //
 //       Step 6.  R := signX*R.
 //
 //       Step 7.  If Last_Subtract = true, R := R - Y.
 //
 //       Step 8.  Return signQ, last 7 bits of Q, and R as required.
 //
 //       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,
 //                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),
 //                R := 0. Return signQ, last 7 bits of Q, and R.
 //
 //                
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 SREM_MOD:    //idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section    8
 	.include "fpsp.defs"
 	.set	Mod_Flag,L_SCR3
 	.set	SignY,FP_SCR3+4
 	.set	SignX,FP_SCR3+8
 	.set	SignQ,FP_SCR3+12
 	.set	Sc_Flag,FP_SCR4
 	.set	Y,FP_SCR1
 	.set	Y_Hi,Y+4
 	.set	Y_Lo,Y+8
 	.set	R,FP_SCR2
 	.set	R_Hi,R+4
 	.set	R_Lo,R+8
 Scale:     .long	0x00010000,0x80000000,0x00000000,0x00000000
 	|xref	t_avoid_unsupp
        .global        smod
 smod:
   movel               #0,Mod_Flag(%a6)
   bras                Mod_Rem
        .global        srem
 srem:
   movel               #1,Mod_Flag(%a6)
 Mod_Rem:
 //..Save sign of X and Y
   moveml              %d2-%d7,-(%a7)     // ...save data registers
   movew               (%a0),%d3
   movew               %d3,SignY(%a6)
   andil               #0x00007FFF,%d3   // ...Y := |Y|
 //
   movel               4(%a0),%d4
   movel               8(%a0),%d5        // ...(D3,D4,D5) is |Y|
   tstl                %d3
   bnes                Y_Normal
   movel               #0x00003FFE,%d3	// ...$3FFD + 1
   tstl                %d4
   bnes                HiY_not0
 HiY_0:
   movel               %d5,%d4
   clrl                %d5
   subil               #32,%d3
   clrl                %d6
   bfffo                %d4{#0:#32},%d6
   lsll                %d6,%d4
   subl                %d6,%d3           // ...(D3,D4,D5) is normalized
 //                                       ...with bias $7FFD
   bras                Chk_X
 HiY_not0:
   clrl                %d6
   bfffo                %d4{#0:#32},%d6
   subl                %d6,%d3
   lsll                %d6,%d4
   movel               %d5,%d7           // ...a copy of D5
   lsll                %d6,%d5
   negl                %d6
   addil               #32,%d6
   lsrl                %d6,%d7
   orl                 %d7,%d4           // ...(D3,D4,D5) normalized
 //                                       ...with bias $7FFD
   bras                Chk_X
 Y_Normal:
   addil               #0x00003FFE,%d3   // ...(D3,D4,D5) normalized
 //                                       ...with bias $7FFD
 Chk_X:
   movew               -12(%a0),%d0
   movew               %d0,SignX(%a6)
   movew               SignY(%a6),%d1
   eorl                %d0,%d1
   andil               #0x00008000,%d1
   movew               %d1,SignQ(%a6)	// ...sign(Q) obtained
   andil               #0x00007FFF,%d0
   movel               -8(%a0),%d1
   movel               -4(%a0),%d2       // ...(D0,D1,D2) is |X|
   tstl                %d0
   bnes                X_Normal
   movel               #0x00003FFE,%d0
   tstl                %d1
   bnes                HiX_not0
 HiX_0:
   movel               %d2,%d1
   clrl                %d2
   subil               #32,%d0
   clrl                %d6
   bfffo                %d1{#0:#32},%d6
   lsll                %d6,%d1
   subl                %d6,%d0           // ...(D0,D1,D2) is normalized
 //                                       ...with bias $7FFD
   bras                Init
 HiX_not0:
   clrl                %d6
   bfffo                %d1{#0:#32},%d6
   subl                %d6,%d0
   lsll                %d6,%d1
   movel               %d2,%d7           // ...a copy of D2
   lsll                %d6,%d2
   negl                %d6
   addil               #32,%d6
   lsrl                %d6,%d7
   orl                 %d7,%d1           // ...(D0,D1,D2) normalized
 //                                       ...with bias $7FFD
   bras                Init
 X_Normal:
   addil               #0x00003FFE,%d0   // ...(D0,D1,D2) normalized
 //                                       ...with bias $7FFD
 Init:
 //
   movel               %d3,L_SCR1(%a6)   // ...save biased expo(Y)
   movel		%d0,L_SCR2(%a6)	//save d0
   subl                %d3,%d0           // ...L := expo(X)-expo(Y)
 //   Move.L               D0,L            ...D0 is j
   clrl                %d6              // ...D6 := carry <- 0
   clrl                %d3              // ...D3 is Q
   moveal              #0,%a1           // ...A1 is k; j+k=L, Q=0
 //..(Carry,D1,D2) is R
   tstl                %d0
   bges                Mod_Loop
 //..expo(X) < expo(Y). Thus X = mod(X,Y)
 //
   movel		L_SCR2(%a6),%d0	//restore d0
   bra                Get_Mod
 //..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
 Mod_Loop:
   tstl                %d6              // ...test carry bit
   bgts                R_GT_Y
 //..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
   cmpl                %d4,%d1           // ...compare hi(R) and hi(Y)
   bnes                R_NE_Y
   cmpl                %d5,%d2           // ...compare lo(R) and lo(Y)
   bnes                R_NE_Y
 //..At this point, R = Y
   bra                Rem_is_0
 R_NE_Y:
 //..use the borrow of the previous compare
   bcss                R_LT_Y          // ...borrow is set iff R < Y
 R_GT_Y:
 //..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
 //..and Y < (D1,D2) < 2Y. Either way, perform R - Y
   subl                %d5,%d2           // ...lo(R) - lo(Y)
   subxl               %d4,%d1           // ...hi(R) - hi(Y)
   clrl                %d6              // ...clear carry
   addql               #1,%d3           // ...Q := Q + 1
 R_LT_Y:
 //..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
   tstl                %d0              // ...see if j = 0.
   beqs                PostLoop
   addl                %d3,%d3           // ...Q := 2Q
   addl                %d2,%d2           // ...lo(R) = 2lo(R)
   roxll               #1,%d1           // ...hi(R) = 2hi(R) + carry
   scs                  %d6              // ...set Carry if 2(R) overflows
   addql               #1,%a1           // ...k := k+1
   subql               #1,%d0           // ...j := j - 1
 //..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
   bras                Mod_Loop
 PostLoop:
 //..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
 //..normalize R.
   movel               L_SCR1(%a6),%d0           // ...new biased expo of R
   tstl                %d1
   bnes                HiR_not0
 HiR_0:
   movel               %d2,%d1
   clrl                %d2
   subil               #32,%d0
   clrl                %d6
   bfffo                %d1{#0:#32},%d6
   lsll                %d6,%d1
   subl                %d6,%d0           // ...(D0,D1,D2) is normalized
 //                                       ...with bias $7FFD
   bras                Get_Mod
 HiR_not0:
   clrl                %d6
   bfffo                %d1{#0:#32},%d6
   bmis                Get_Mod         // ...already normalized
   subl                %d6,%d0
   lsll                %d6,%d1
   movel               %d2,%d7           // ...a copy of D2
   lsll                %d6,%d2
   negl                %d6
   addil               #32,%d6
   lsrl                %d6,%d7
   orl                 %d7,%d1           // ...(D0,D1,D2) normalized
 //
 Get_Mod:
   cmpil		#0x000041FE,%d0
   bges		No_Scale
 Do_Scale:
   movew		%d0,R(%a6)
   clrw		R+2(%a6)
   movel		%d1,R_Hi(%a6)
   movel		%d2,R_Lo(%a6)
   movel		L_SCR1(%a6),%d6
   movew		%d6,Y(%a6)
   clrw		Y+2(%a6)
   movel		%d4,Y_Hi(%a6)
   movel		%d5,Y_Lo(%a6)
   fmovex		R(%a6),%fp0		// ...no exception
   movel		#1,Sc_Flag(%a6)
   bras		ModOrRem
 No_Scale:
   movel		%d1,R_Hi(%a6)
   movel		%d2,R_Lo(%a6)
   subil		#0x3FFE,%d0
   movew		%d0,R(%a6)
   clrw		R+2(%a6)
   movel		L_SCR1(%a6),%d6
   subil		#0x3FFE,%d6
   movel		%d6,L_SCR1(%a6)
   fmovex		R(%a6),%fp0
   movew		%d6,Y(%a6)
   movel		%d4,Y_Hi(%a6)
   movel		%d5,Y_Lo(%a6)
   movel		#0,Sc_Flag(%a6)
 //
 ModOrRem:
   movel               Mod_Flag(%a6),%d6
   beqs                Fix_Sign
   movel               L_SCR1(%a6),%d6           // ...new biased expo(Y)
   subql               #1,%d6           // ...biased expo(Y/2)
   cmpl                %d6,%d0
   blts                Fix_Sign
   bgts                Last_Sub
   cmpl                %d4,%d1
   bnes                Not_EQ
   cmpl                %d5,%d2
   bnes                Not_EQ
   bra                Tie_Case
 Not_EQ:
   bcss                Fix_Sign
 Last_Sub:
 //
   fsubx		Y(%a6),%fp0		// ...no exceptions
   addql               #1,%d3           // ...Q := Q + 1
 //
 Fix_Sign:
 //..Get sign of X
   movew               SignX(%a6),%d6
   bges		Get_Q
   fnegx		%fp0
 //..Get Q
 //
 Get_Q:
   clrl		%d6		
   movew               SignQ(%a6),%d6        // ...D6 is sign(Q)
   movel               #8,%d7
   lsrl                %d7,%d6           
   andil               #0x0000007F,%d3   // ...7 bits of Q
   orl                 %d6,%d3           // ...sign and bits of Q
   swap                 %d3
   fmovel              %fpsr,%d6
   andil               #0xFF00FFFF,%d6
   orl                 %d3,%d6
   fmovel              %d6,%fpsr         // ...put Q in fpsr
 //
 Restore:
   moveml              (%a7)+,%d2-%d7
   fmovel              USER_FPCR(%a6),%fpcr
   movel               Sc_Flag(%a6),%d0
   beqs                Finish
   fmulx		Scale(%pc),%fp0	// ...may cause underflow
   bra			t_avoid_unsupp	//check for denorm as a
 //					;result of the scaling
 Finish:
 	fmovex		%fp0,%fp0		//capture exceptions & round
 	rts
 Rem_is_0:
 //..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
   addql               #1,%d3
   cmpil               #8,%d0           // ...D0 is j 
   bges                Q_Big
   lsll                %d0,%d3
   bras                Set_R_0
 Q_Big:
   clrl                %d3
 Set_R_0:
   fmoves		#0x00000000,%fp0
   movel		#0,Sc_Flag(%a6)
   bra                Fix_Sign
 Tie_Case:
 //..Check parity of Q
   movel               %d3,%d6
   andil               #0x00000001,%d6
   tstl                %d6
   beq                Fix_Sign	// ...Q is even
 //..Q is odd, Q := Q + 1, signX := -signX
   addql               #1,%d3
   movew               SignX(%a6),%d6
   eoril               #0x00008000,%d6
   movew               %d6,SignX(%a6)
   bra                Fix_Sign
   //end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/ssin.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/ssin.s
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/ssinh.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/ssinh.s
@@ -0,0 +1,135 @@
 //
 //	ssinh.sa 3.1 12/10/90
 //
 //       The entry point sSinh computes the hyperbolic sine of
 //       an input argument; sSinhd does the same except for denormalized
 //       input.
 //
 //       Input: Double-extended number X in location pointed to 
 //		by address register a0.
 //
 //       Output: The value sinh(X) returned in floating-point register Fp0.
 //
 //       Accuracy and Monotonicity: The returned result is within 3 ulps in
 //               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
 //               result is subsequently rounded to double precision. The
 //               result is provably monotonic in double precision.
 //
 //       Speed: The program sSINH takes approximately 280 cycles.
 //
 //       Algorithm:
 //
 //       SINH
 //       1. If |X| > 16380 log2, go to 3.
 //
 //       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formulae
 //               y = |X|, sgn = sign(X), and z = expm1(Y),
 //               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).
 //          Exit.
 //
 //       3. If |X| > 16480 log2, go to 5.
 //
 //       4. (16380 log2 < |X| <= 16480 log2)
 //               sinh(X) = sign(X) * exp(|X|)/2.
 //          However, invoking exp(|X|) may cause premature overflow.
 //          Thus, we calculate sinh(X) as follows:
 //             Y       := |X|
 //             sgn     := sign(X)
 //             sgnFact := sgn * 2**(16380)
 //             Y'      := Y - 16381 log2
 //             sinh(X) := sgnFact * exp(Y').
 //          Exit.
 //
 //       5. (|X| > 16480 log2) sinh(X) must overflow. Return
 //          sign(X)*Huge*Huge to generate overflow and an infinity with
 //          the appropriate sign. Huge is the largest finite number in
 //          extended format. Exit.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //SSINH	idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 T1:	.long 0x40C62D38,0xD3D64634 // ... 16381 LOG2 LEAD
 T2:	.long 0x3D6F90AE,0xB1E75CC7 // ... 16381 LOG2 TRAIL
 	|xref	t_frcinx
 	|xref	t_ovfl
 	|xref	t_extdnrm
 	|xref	setox
 	|xref	setoxm1
 	.global	ssinhd
 ssinhd:
 //--SINH(X) = X FOR DENORMALIZED X
 	bra	t_extdnrm
 	.global	ssinh
 ssinh:
 	fmovex	(%a0),%fp0	// ...LOAD INPUT
 	movel	(%a0),%d0
 	movew	4(%a0),%d0
 	movel	%d0,%a1		// save a copy of original (compacted) operand
 	andl	#0x7FFFFFFF,%d0
 	cmpl	#0x400CB167,%d0
 	bgts	SINHBIG
 //--THIS IS THE USUAL CASE, |X| < 16380 LOG2
 //--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
 	fabsx	%fp0		// ...Y = |X|
 	moveml	%a1/%d1,-(%sp)
 	fmovemx %fp0-%fp0,(%a0)
 	clrl	%d1
 	bsr	setoxm1	 	// ...FP0 IS Z = EXPM1(Y)
 	fmovel	#0,%fpcr
 	moveml	(%sp)+,%a1/%d1
 	fmovex	%fp0,%fp1
 	fadds	#0x3F800000,%fp1	// ...1+Z
 	fmovex	%fp0,-(%sp)
 	fdivx	%fp1,%fp0		// ...Z/(1+Z)
 	movel	%a1,%d0
 	andl	#0x80000000,%d0
 	orl	#0x3F000000,%d0
 	faddx	(%sp)+,%fp0
 	movel	%d0,-(%sp)
 	fmovel	%d1,%fpcr
 	fmuls	(%sp)+,%fp0	//last fp inst - possible exceptions set
 	bra	t_frcinx
 SINHBIG:
 	cmpl	#0x400CB2B3,%d0
 	bgt	t_ovfl
 	fabsx	%fp0
 	fsubd	T1(%pc),%fp0	// ...(|X|-16381LOG2_LEAD)
 	movel	#0,-(%sp)
 	movel	#0x80000000,-(%sp)
 	movel	%a1,%d0
 	andl	#0x80000000,%d0
 	orl	#0x7FFB0000,%d0
 	movel	%d0,-(%sp)	// ...EXTENDED FMT
 	fsubd	T2(%pc),%fp0	// ...|X| - 16381 LOG2, ACCURATE
 	movel	%d1,-(%sp)
 	clrl	%d1
 	fmovemx %fp0-%fp0,(%a0)
 	bsr	setox
 	fmovel	(%sp)+,%fpcr
 	fmulx	(%sp)+,%fp0	//possible exception
 	bra	t_frcinx
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/stan.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/stan.s
@@ -0,0 +1,455 @@
 //
 //	stan.sa 3.3 7/29/91
 //
 //	The entry point stan computes the tangent of
 //	an input argument;
 //	stand does the same except for denormalized input.
 //
 //	Input: Double-extended number X in location pointed to
 //		by address register a0.
 //
 //	Output: The value tan(X) returned in floating-point register Fp0.
 //
 //	Accuracy and Monotonicity: The returned result is within 3 ulp in
 //		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
 //		result is subsequently rounded to double precision. The
 //		result is provably monotonic in double precision.
 //
 //	Speed: The program sTAN takes approximately 170 cycles for
 //		input argument X such that |X| < 15Pi, which is the the usual
 //		situation.
 //
 //	Algorithm:
 //
 //	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
 //
 //	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
 //		k = N mod 2, so in particular, k = 0 or 1.
 //
 //	3. If k is odd, go to 5.
 //
 //	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
 //		rational function U/V where
 //		U = r + r*s*(P1 + s*(P2 + s*P3)), and
 //		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
 //		Exit.
 //
 //	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
 //		rational function U/V where
 //		U = r + r*s*(P1 + s*(P2 + s*P3)), and
 //		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
 //		-Cot(r) = -V/U. Exit.
 //
 //	6. If |X| > 1, go to 8.
 //
 //	7. (|X|<2**(-40)) Tan(X) = X. Exit.
 //
 //	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //STAN	idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 BOUNDS1:	.long 0x3FD78000,0x4004BC7E
 TWOBYPI:	.long 0x3FE45F30,0x6DC9C883
 TANQ4:	.long 0x3EA0B759,0xF50F8688
 TANP3:	.long 0xBEF2BAA5,0xA8924F04
 TANQ3:	.long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
 TANP2:	.long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
 TANQ2:	.long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
 TANP1:	.long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
 TANQ1:	.long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
 INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
 TWOPI1:	.long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
 TWOPI2:	.long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
 //--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
 //--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
 //--MOST 69 BITS LONG.
 	.global	PITBL
 PITBL:
  .long  0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
  .long  0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
  .long  0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
  .long  0xC0040000,0xB6365E22,0xEE46F000,0x21480000
  .long  0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
  .long  0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
  .long  0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
  .long  0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
  .long  0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
  .long  0xC0040000,0x90836524,0x88034B96,0x20B00000
  .long  0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
  .long  0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
  .long  0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
  .long  0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
  .long  0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
  .long  0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
  .long  0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
  .long  0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
  .long  0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
  .long  0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
  .long  0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
  .long  0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
  .long  0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
  .long  0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
  .long  0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
  .long  0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
  .long  0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
  .long  0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
  .long  0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
  .long  0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
  .long  0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
  .long  0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
  .long  0x00000000,0x00000000,0x00000000,0x00000000
  .long  0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
  .long  0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
  .long  0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
  .long  0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
  .long  0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
  .long  0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
  .long  0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
  .long  0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
  .long  0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
  .long  0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
  .long  0x40030000,0x8A3AE64F,0x76F80584,0x21080000
  .long  0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
  .long  0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
  .long  0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
  .long  0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
  .long  0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
  .long  0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
  .long  0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
  .long  0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
  .long  0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
  .long  0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
  .long  0x40040000,0x8A3AE64F,0x76F80584,0x21880000
  .long  0x40040000,0x90836524,0x88034B96,0xA0B00000
  .long  0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
  .long  0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
  .long  0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
  .long  0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
  .long  0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
  .long  0x40040000,0xB6365E22,0xEE46F000,0xA1480000
  .long  0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
  .long  0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
  .long  0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
 	.set	INARG,FP_SCR4
 	.set	TWOTO63,L_SCR1
 	.set	ENDFLAG,L_SCR2
 	.set	N,L_SCR3
 	| xref	t_frcinx
 	|xref	t_extdnrm
 	.global	stand
 stand:
 //--TAN(X) = X FOR DENORMALIZED X
 	bra		t_extdnrm
 	.global	stan
 stan:
 	fmovex		(%a0),%fp0	// ...LOAD INPUT
 	movel		(%a0),%d0
 	movew		4(%a0),%d0
 	andil		#0x7FFFFFFF,%d0
 	cmpil		#0x3FD78000,%d0		// ...|X| >= 2**(-40)?
 	bges		TANOK1
 	bra		TANSM
 TANOK1:
 	cmpil		#0x4004BC7E,%d0		// ...|X| < 15 PI?
 	blts		TANMAIN
 	bra		REDUCEX
 TANMAIN:
 //--THIS IS THE USUAL CASE, |X| <= 15 PI.
 //--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
 	fmovex		%fp0,%fp1
 	fmuld		TWOBYPI,%fp1	// ...X*2/PI
 //--HIDE THE NEXT TWO INSTRUCTIONS
 	leal		PITBL+0x200,%a1 // ...TABLE OF N*PI/2, N = -32,...,32
 //--FP1 IS NOW READY
 	fmovel		%fp1,%d0		// ...CONVERT TO INTEGER
 	asll		#4,%d0
 	addal		%d0,%a1		// ...ADDRESS N*PIBY2 IN Y1, Y2
 	fsubx		(%a1)+,%fp0	// ...X-Y1
 //--HIDE THE NEXT ONE
 	fsubs		(%a1),%fp0	// ...FP0 IS R = (X-Y1)-Y2
 	rorl		#5,%d0
 	andil		#0x80000000,%d0	// ...D0 WAS ODD IFF D0 < 0
 TANCONT:
 	cmpil		#0,%d0
 	blt		NODD
 	fmovex		%fp0,%fp1
 	fmulx		%fp1,%fp1	 	// ...S = R*R
 	fmoved		TANQ4,%fp3
 	fmoved		TANP3,%fp2
 	fmulx		%fp1,%fp3	 	// ...SQ4
 	fmulx		%fp1,%fp2	 	// ...SP3
 	faddd		TANQ3,%fp3	// ...Q3+SQ4
 	faddx		TANP2,%fp2	// ...P2+SP3
 	fmulx		%fp1,%fp3	 	// ...S(Q3+SQ4)
 	fmulx		%fp1,%fp2	 	// ...S(P2+SP3)
 	faddx		TANQ2,%fp3	// ...Q2+S(Q3+SQ4)
 	faddx		TANP1,%fp2	// ...P1+S(P2+SP3)
 	fmulx		%fp1,%fp3	 	// ...S(Q2+S(Q3+SQ4))
 	fmulx		%fp1,%fp2	 	// ...S(P1+S(P2+SP3))
 	faddx		TANQ1,%fp3	// ...Q1+S(Q2+S(Q3+SQ4))
 	fmulx		%fp0,%fp2	 	// ...RS(P1+S(P2+SP3))
 	fmulx		%fp3,%fp1	 	// ...S(Q1+S(Q2+S(Q3+SQ4)))
 	faddx		%fp2,%fp0	 	// ...R+RS(P1+S(P2+SP3))
 	fadds		#0x3F800000,%fp1	// ...1+S(Q1+...)
 	fmovel		%d1,%fpcr		//restore users exceptions
 	fdivx		%fp1,%fp0		//last inst - possible exception set
 	bra		t_frcinx
 NODD:
 	fmovex		%fp0,%fp1
 	fmulx		%fp0,%fp0	 	// ...S = R*R
 	fmoved		TANQ4,%fp3
 	fmoved		TANP3,%fp2
 	fmulx		%fp0,%fp3	 	// ...SQ4
 	fmulx		%fp0,%fp2	 	// ...SP3
 	faddd		TANQ3,%fp3	// ...Q3+SQ4
 	faddx		TANP2,%fp2	// ...P2+SP3
 	fmulx		%fp0,%fp3	 	// ...S(Q3+SQ4)
 	fmulx		%fp0,%fp2	 	// ...S(P2+SP3)
 	faddx		TANQ2,%fp3	// ...Q2+S(Q3+SQ4)
 	faddx		TANP1,%fp2	// ...P1+S(P2+SP3)
 	fmulx		%fp0,%fp3	 	// ...S(Q2+S(Q3+SQ4))
 	fmulx		%fp0,%fp2	 	// ...S(P1+S(P2+SP3))
 	faddx		TANQ1,%fp3	// ...Q1+S(Q2+S(Q3+SQ4))
 	fmulx		%fp1,%fp2	 	// ...RS(P1+S(P2+SP3))
 	fmulx		%fp3,%fp0	 	// ...S(Q1+S(Q2+S(Q3+SQ4)))
 	faddx		%fp2,%fp1	 	// ...R+RS(P1+S(P2+SP3))
 	fadds		#0x3F800000,%fp0	// ...1+S(Q1+...)
 	fmovex		%fp1,-(%sp)
 	eoril		#0x80000000,(%sp)
 	fmovel		%d1,%fpcr	 	//restore users exceptions
 	fdivx		(%sp)+,%fp0	//last inst - possible exception set
 	bra		t_frcinx
 TANBORS:
 //--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
 //--IF |X| < 2**(-40), RETURN X OR 1.
 	cmpil		#0x3FFF8000,%d0
 	bgts		REDUCEX
 TANSM:
 	fmovex		%fp0,-(%sp)
 	fmovel		%d1,%fpcr		 //restore users exceptions
 	fmovex		(%sp)+,%fp0	//last inst - possible exception set
 	bra		t_frcinx
 REDUCEX:
 //--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
 //--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
 //--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
 	fmovemx	%fp2-%fp5,-(%a7)	// ...save FP2 through FP5
 	movel		%d2,-(%a7)
        fmoves         #0x00000000,%fp1
 //--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
 //--there is a danger of unwanted overflow in first LOOP iteration.  In this
 //--case, reduce argument by one remainder step to make subsequent reduction
 //--safe.
 	cmpil	#0x7ffeffff,%d0		//is argument dangerously large?
 	bnes	LOOP
 	movel	#0x7ffe0000,FP_SCR2(%a6)	//yes
 //					;create 2**16383*PI/2
 	movel	#0xc90fdaa2,FP_SCR2+4(%a6)
 	clrl	FP_SCR2+8(%a6)
 	ftstx	%fp0			//test sign of argument
 	movel	#0x7fdc0000,FP_SCR3(%a6)	//create low half of 2**16383*
 //					;PI/2 at FP_SCR3
 	movel	#0x85a308d3,FP_SCR3+4(%a6)
 	clrl   FP_SCR3+8(%a6)
 	fblt	red_neg
 	orw	#0x8000,FP_SCR2(%a6)	//positive arg
 	orw	#0x8000,FP_SCR3(%a6)
 red_neg:
 	faddx  FP_SCR2(%a6),%fp0		//high part of reduction is exact
 	fmovex  %fp0,%fp1		//save high result in fp1
 	faddx  FP_SCR3(%a6),%fp0		//low part of reduction
 	fsubx  %fp0,%fp1			//determine low component of result
 	faddx  FP_SCR3(%a6),%fp1		//fp0/fp1 are reduced argument.
 //--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
 //--integer quotient will be stored in N
 //--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
 LOOP:
 	fmovex		%fp0,INARG(%a6)	// ...+-2**K * F, 1 <= F < 2
 	movew		INARG(%a6),%d0
        movel          %d0,%a1		// ...save a copy of D0
 	andil		#0x00007FFF,%d0
 	subil		#0x00003FFF,%d0	// ...D0 IS K
 	cmpil		#28,%d0
 	bles		LASTLOOP
 CONTLOOP:
 	subil		#27,%d0	 // ...D0 IS L := K-27
 	movel		#0,ENDFLAG(%a6)
 	bras		WORK
 LASTLOOP:
 	clrl		%d0		// ...D0 IS L := 0
 	movel		#1,ENDFLAG(%a6)
 WORK:
 //--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
 //--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
 //--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
 //--2**L * (PIby2_1), 2**L * (PIby2_2)
 	movel		#0x00003FFE,%d2	// ...BIASED EXPO OF 2/PI
 	subl		%d0,%d2		// ...BIASED EXPO OF 2**(-L)*(2/PI)
 	movel		#0xA2F9836E,FP_SCR1+4(%a6)
 	movel		#0x4E44152A,FP_SCR1+8(%a6)
 	movew		%d2,FP_SCR1(%a6)	// ...FP_SCR1 is 2**(-L)*(2/PI)
 	fmovex		%fp0,%fp2
 	fmulx		FP_SCR1(%a6),%fp2
 //--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
 //--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
 //--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
 //--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
 //--US THE DESIRED VALUE IN FLOATING POINT.
 //--HIDE SIX CYCLES OF INSTRUCTION
        movel		%a1,%d2
        swap		%d2
 	andil		#0x80000000,%d2
 	oril		#0x5F000000,%d2	// ...D2 IS SIGN(INARG)*2**63 IN SGL
 	movel		%d2,TWOTO63(%a6)
 	movel		%d0,%d2
 	addil		#0x00003FFF,%d2	// ...BIASED EXPO OF 2**L * (PI/2)
 //--FP2 IS READY
 	fadds		TWOTO63(%a6),%fp2	// ...THE FRACTIONAL PART OF FP1 IS ROUNDED
 //--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
        movew		%d2,FP_SCR2(%a6)
 	clrw           FP_SCR2+2(%a6)
 	movel		#0xC90FDAA2,FP_SCR2+4(%a6)
 	clrl		FP_SCR2+8(%a6)		// ...FP_SCR2 is  2**(L) * Piby2_1	
 //--FP2 IS READY
 	fsubs		TWOTO63(%a6),%fp2		// ...FP2 is N
 	addil		#0x00003FDD,%d0
        movew		%d0,FP_SCR3(%a6)
 	clrw           FP_SCR3+2(%a6)
 	movel		#0x85A308D3,FP_SCR3+4(%a6)
 	clrl		FP_SCR3+8(%a6)		// ...FP_SCR3 is 2**(L) * Piby2_2
 	movel		ENDFLAG(%a6),%d0
 //--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
 //--P2 = 2**(L) * Piby2_2
 	fmovex		%fp2,%fp4
 	fmulx		FP_SCR2(%a6),%fp4		// ...W = N*P1
 	fmovex		%fp2,%fp5
 	fmulx		FP_SCR3(%a6),%fp5		// ...w = N*P2
 	fmovex		%fp4,%fp3
 //--we want P+p = W+w  but  |p| <= half ulp of P
 //--Then, we need to compute  A := R-P   and  a := r-p
 	faddx		%fp5,%fp3			// ...FP3 is P
 	fsubx		%fp3,%fp4			// ...W-P
 	fsubx		%fp3,%fp0			// ...FP0 is A := R - P
        faddx		%fp5,%fp4			// ...FP4 is p = (W-P)+w
 	fmovex		%fp0,%fp3			// ...FP3 A
 	fsubx		%fp4,%fp1			// ...FP1 is a := r - p
 //--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
 //--|r| <= half ulp of R.
 	faddx		%fp1,%fp0			// ...FP0 is R := A+a
 //--No need to calculate r if this is the last loop
 	cmpil		#0,%d0
 	bgt		RESTORE
 //--Need to calculate r
 	fsubx		%fp0,%fp3			// ...A-R
 	faddx		%fp3,%fp1			// ...FP1 is r := (A-R)+a
 	bra		LOOP
 RESTORE:
        fmovel		%fp2,N(%a6)
 	movel		(%a7)+,%d2
 	fmovemx	(%a7)+,%fp2-%fp5
 	movel		N(%a6),%d0
        rorl		#1,%d0
 	bra		TANCONT
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/stanh.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/stanh.s
@@ -0,0 +1,185 @@
 //
 //	stanh.sa 3.1 12/10/90
 //
 //	The entry point sTanh computes the hyperbolic tangent of
 //	an input argument; sTanhd does the same except for denormalized
 //	input.
 //
 //	Input: Double-extended number X in location pointed to
 //		by address register a0.
 //
 //	Output: The value tanh(X) returned in floating-point register Fp0.
 //
 //	Accuracy and Monotonicity: The returned result is within 3 ulps in
 //		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
 //		result is subsequently rounded to double precision. The
 //		result is provably monotonic in double precision.
 //
 //	Speed: The program stanh takes approximately 270 cycles.
 //
 //	Algorithm:
 //
 //	TANH
 //	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.
 //
 //	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by
 //		sgn := sign(X), y := 2|X|, z := expm1(Y), and
 //		tanh(X) = sgn*( z/(2+z) ).
 //		Exit.
 //
 //	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,
 //		go to 7.
 //
 //	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.
 //
 //	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by
 //		sgn := sign(X), y := 2|X|, z := exp(Y),
 //		tanh(X) = sgn - [ sgn*2/(1+z) ].
 //		Exit.
 //
 //	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we
 //		calculate Tanh(X) by
 //		sgn := sign(X), Tiny := 2**(-126),
 //		tanh(X) := sgn - sgn*Tiny.
 //		Exit.
 //
 //	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //STANH	idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	.set	X,FP_SCR5
 	.set	XDCARE,X+2
 	.set	XFRAC,X+4
 	.set	SGN,L_SCR3
 	.set	V,FP_SCR6
 BOUNDS1:	.long 0x3FD78000,0x3FFFDDCE // ... 2^(-40), (5/2)LOG2
 	|xref	t_frcinx
 	|xref	t_extdnrm
 	|xref	setox
 	|xref	setoxm1
 	.global	stanhd
 stanhd:
 //--TANH(X) = X FOR DENORMALIZED X
 	bra		t_extdnrm
 	.global	stanh
 stanh:
 	fmovex		(%a0),%fp0	// ...LOAD INPUT
 	fmovex		%fp0,X(%a6)
 	movel		(%a0),%d0
 	movew		4(%a0),%d0
 	movel		%d0,X(%a6)
 	andl		#0x7FFFFFFF,%d0
 	cmp2l		BOUNDS1(%pc),%d0	// ...2**(-40) < |X| < (5/2)LOG2 ?
 	bcss		TANHBORS
 //--THIS IS THE USUAL CASE
 //--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
 	movel		X(%a6),%d0
 	movel		%d0,SGN(%a6)
 	andl		#0x7FFF0000,%d0
 	addl		#0x00010000,%d0	// ...EXPONENT OF 2|X|
 	movel		%d0,X(%a6)
 	andl		#0x80000000,SGN(%a6)
 	fmovex		X(%a6),%fp0		// ...FP0 IS Y = 2|X|
 	movel		%d1,-(%a7)
 	clrl		%d1
 	fmovemx	%fp0-%fp0,(%a0)
 	bsr		setoxm1	 	// ...FP0 IS Z = EXPM1(Y)
 	movel		(%a7)+,%d1
 	fmovex		%fp0,%fp1
 	fadds		#0x40000000,%fp1	// ...Z+2
 	movel		SGN(%a6),%d0
 	fmovex		%fp1,V(%a6)
 	eorl		%d0,V(%a6)
 	fmovel		%d1,%FPCR		//restore users exceptions
 	fdivx		V(%a6),%fp0
 	bra		t_frcinx
 TANHBORS:
 	cmpl		#0x3FFF8000,%d0
 	blt		TANHSM
 	cmpl		#0x40048AA1,%d0
 	bgt		TANHHUGE
 //-- (5/2) LOG2 < |X| < 50 LOG2,
 //--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
 //--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
 	movel		X(%a6),%d0
 	movel		%d0,SGN(%a6)
 	andl		#0x7FFF0000,%d0
 	addl		#0x00010000,%d0	// ...EXPO OF 2|X|
 	movel		%d0,X(%a6)		// ...Y = 2|X|
 	andl		#0x80000000,SGN(%a6)
 	movel		SGN(%a6),%d0
 	fmovex		X(%a6),%fp0		// ...Y = 2|X|
 	movel		%d1,-(%a7)
 	clrl		%d1
 	fmovemx	%fp0-%fp0,(%a0)
 	bsr		setox		// ...FP0 IS EXP(Y)
 	movel		(%a7)+,%d1
 	movel		SGN(%a6),%d0
 	fadds		#0x3F800000,%fp0	// ...EXP(Y)+1
 	eorl		#0xC0000000,%d0	// ...-SIGN(X)*2
 	fmoves		%d0,%fp1		// ...-SIGN(X)*2 IN SGL FMT
 	fdivx		%fp0,%fp1	 	// ...-SIGN(X)2 / [EXP(Y)+1 ]
 	movel		SGN(%a6),%d0
 	orl		#0x3F800000,%d0	// ...SGN
 	fmoves		%d0,%fp0		// ...SGN IN SGL FMT
 	fmovel		%d1,%FPCR		//restore users exceptions
 	faddx		%fp1,%fp0
 	bra		t_frcinx
 TANHSM:
 	movew		#0x0000,XDCARE(%a6)
 	fmovel		%d1,%FPCR		//restore users exceptions
 	fmovex		X(%a6),%fp0		//last inst - possible exception set
 	bra		t_frcinx
 TANHHUGE:
 //---RETURN SGN(X) - SGN(X)EPS
 	movel		X(%a6),%d0
 	andl		#0x80000000,%d0
 	orl		#0x3F800000,%d0
 	fmoves		%d0,%fp0
 	andl		#0x80000000,%d0
 	eorl		#0x80800000,%d0	// ...-SIGN(X)*EPS
 	fmovel		%d1,%FPCR		//restore users exceptions
 	fadds		%d0,%fp0
 	bra		t_frcinx
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/sto_res.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/sto_res.s
@@ -0,0 +1,98 @@
 //
 //	sto_res.sa 3.1 12/10/90
 //
 //	Takes the result and puts it in where the user expects it.
 //	Library functions return result in fp0.	If fp0 is not the
 //	users destination register then fp0 is moved to the the
 //	correct floating-point destination register.  fp0 and fp1
 //	are then restored to the original contents. 
 //
 //	Input:	result in fp0,fp1 
 //
 //		d2 & a0 should be kept unmodified
 //
 //	Output:	moves the result to the true destination reg or mem
 //
 //	Modifies: destination floating point register
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 STO_RES:	//idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	.global	sto_cos
 sto_cos:
 	bfextu		CMDREG1B(%a6){#13:#3},%d0	//extract cos destination
 	cmpib		#3,%d0		//check for fp0/fp1 cases
 	bles		c_fp0123
 	fmovemx	%fp1-%fp1,-(%a7)
 	moveql		#7,%d1
 	subl		%d0,%d1		//d1 = 7- (dest. reg. no.)
 	clrl		%d0
 	bsetl		%d1,%d0		//d0 is dynamic register mask
 	fmovemx	(%a7)+,%d0
 	rts
 c_fp0123:
 	cmpib		#0,%d0
 	beqs		c_is_fp0
 	cmpib		#1,%d0
 	beqs		c_is_fp1
 	cmpib		#2,%d0
 	beqs		c_is_fp2
 c_is_fp3:
 	fmovemx	%fp1-%fp1,USER_FP3(%a6)
 	rts
 c_is_fp2:
 	fmovemx	%fp1-%fp1,USER_FP2(%a6)
 	rts
 c_is_fp1:
 	fmovemx	%fp1-%fp1,USER_FP1(%a6)
 	rts
 c_is_fp0:
 	fmovemx	%fp1-%fp1,USER_FP0(%a6)
 	rts
 	.global	sto_res
 sto_res:
 	bfextu		CMDREG1B(%a6){#6:#3},%d0	//extract destination register
 	cmpib		#3,%d0		//check for fp0/fp1 cases
 	bles		fp0123
 	fmovemx	%fp0-%fp0,-(%a7)
 	moveql		#7,%d1
 	subl		%d0,%d1		//d1 = 7- (dest. reg. no.)
 	clrl		%d0
 	bsetl		%d1,%d0		//d0 is dynamic register mask
 	fmovemx	(%a7)+,%d0
 	rts
 fp0123:
 	cmpib		#0,%d0
 	beqs		is_fp0
 	cmpib		#1,%d0
 	beqs		is_fp1
 	cmpib		#2,%d0
 	beqs		is_fp2
 is_fp3:
 	fmovemx	%fp0-%fp0,USER_FP3(%a6)
 	rts
 is_fp2:
 	fmovemx	%fp0-%fp0,USER_FP2(%a6)
 	rts
 is_fp1:
 	fmovemx	%fp0-%fp0,USER_FP1(%a6)
 	rts
 is_fp0:
 	fmovemx	%fp0-%fp0,USER_FP0(%a6)
 	rts
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/stwotox.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/stwotox.s
@@ -0,0 +1,427 @@
 //
 //	stwotox.sa 3.1 12/10/90
 //
 //	stwotox  --- 2**X
 //	stwotoxd --- 2**X for denormalized X
 //	stentox  --- 10**X
 //	stentoxd --- 10**X for denormalized X
 //
 //	Input: Double-extended number X in location pointed to
 //		by address register a0.
 //
 //	Output: The function values are returned in Fp0.
 //
 //	Accuracy and Monotonicity: The returned result is within 2 ulps in
 //		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
 //		result is subsequently rounded to double precision. The
 //		result is provably monotonic in double precision.
 //
 //	Speed: The program stwotox takes approximately 190 cycles and the
 //		program stentox takes approximately 200 cycles.
 //
 //	Algorithm:
 //
 //	twotox
 //	1. If |X| > 16480, go to ExpBig.
 //
 //	2. If |X| < 2**(-70), go to ExpSm.
 //
 //	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
 //		decompose N as
 //		 N = 64(M + M') + j,  j = 0,1,2,...,63.
 //
 //	4. Overwrite r := r * log2. Then
 //		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
 //		Go to expr to compute that expression.
 //
 //	tentox
 //	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
 //
 //	2. If |X| < 2**(-70), go to ExpSm.
 //
 //	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
 //		N := round-to-int(y). Decompose N as
 //		 N = 64(M + M') + j,  j = 0,1,2,...,63.
 //
 //	4. Define r as
 //		r := ((X - N*L1)-N*L2) * L10
 //		where L1, L2 are the leading and trailing parts of log_10(2)/64
 //		and L10 is the natural log of 10. Then
 //		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
 //		Go to expr to compute that expression.
 //
 //	expr
 //	1. Fetch 2**(j/64) from table as Fact1 and Fact2.
 //
 //	2. Overwrite Fact1 and Fact2 by
 //		Fact1 := 2**(M) * Fact1
 //		Fact2 := 2**(M) * Fact2
 //		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
 //
 //	3. Calculate P where 1 + P approximates exp(r):
 //		P = r + r*r*(A1+r*(A2+...+r*A5)).
 //
 //	4. Let AdjFact := 2**(M'). Return
 //		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
 //		Exit.
 //
 //	ExpBig
 //	1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
 //		underflow by Tiny * Tiny.
 //
 //	ExpSm
 //	1. Return 1 + X.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 //STWOTOX	idnt	2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 BOUNDS1:	.long 0x3FB98000,0x400D80C0 // ... 2^(-70),16480
 BOUNDS2:	.long 0x3FB98000,0x400B9B07 // ... 2^(-70),16480 LOG2/LOG10
 L2TEN64:	.long 0x406A934F,0x0979A371 // ... 64LOG10/LOG2
 L10TWO1:	.long 0x3F734413,0x509F8000 // ... LOG2/64LOG10
 L10TWO2:	.long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
 LOG10:	.long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
 LOG2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
 EXPA5:	.long 0x3F56C16D,0x6F7BD0B2
 EXPA4:	.long 0x3F811112,0x302C712C
 EXPA3:	.long 0x3FA55555,0x55554CC1
 EXPA2:	.long 0x3FC55555,0x55554A54
 EXPA1:	.long 0x3FE00000,0x00000000,0x00000000,0x00000000
 HUGE:	.long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
 TINY:	.long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
 EXPTBL:
 	.long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
 	.long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
 	.long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
 	.long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
 	.long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
 	.long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
 	.long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
 	.long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
 	.long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
 	.long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
 	.long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
 	.long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
 	.long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
 	.long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
 	.long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
 	.long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
 	.long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
 	.long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
 	.long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
 	.long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
 	.long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
 	.long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
 	.long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
 	.long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
 	.long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
 	.long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
 	.long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
 	.long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
 	.long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
 	.long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
 	.long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
 	.long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
 	.long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
 	.long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
 	.long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
 	.long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
 	.long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
 	.long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
 	.long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
 	.long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
 	.long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
 	.long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
 	.long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
 	.long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
 	.long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
 	.long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
 	.long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
 	.long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
 	.long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
 	.long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
 	.long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
 	.long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
 	.long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
 	.long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
 	.long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
 	.long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
 	.long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
 	.long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
 	.long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
 	.long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
 	.long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
 	.long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
 	.long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
 	.long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
 	.set	N,L_SCR1
 	.set	X,FP_SCR1
 	.set	XDCARE,X+2
 	.set	XFRAC,X+4
 	.set	ADJFACT,FP_SCR2
 	.set	FACT1,FP_SCR3
 	.set	FACT1HI,FACT1+4
 	.set	FACT1LOW,FACT1+8
 	.set	FACT2,FP_SCR4
 	.set	FACT2HI,FACT2+4
 	.set	FACT2LOW,FACT2+8
 	| xref	t_unfl
 	|xref	t_ovfl
 	|xref	t_frcinx
 	.global	stwotoxd
 stwotoxd:
 //--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
 	fmovel		%d1,%fpcr		// ...set user's rounding mode/precision
 	fmoves		#0x3F800000,%fp0  // ...RETURN 1 + X
 	movel		(%a0),%d0
 	orl		#0x00800001,%d0
 	fadds		%d0,%fp0
 	bra		t_frcinx
 	.global	stwotox
 stwotox:
 //--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
 	fmovemx	(%a0),%fp0-%fp0	// ...LOAD INPUT, do not set cc's
 	movel		(%a0),%d0
 	movew		4(%a0),%d0
 	fmovex		%fp0,X(%a6)
 	andil		#0x7FFFFFFF,%d0
 	cmpil		#0x3FB98000,%d0		// ...|X| >= 2**(-70)?
 	bges		TWOOK1
 	bra		EXPBORS
 TWOOK1:
 	cmpil		#0x400D80C0,%d0		// ...|X| > 16480?
 	bles		TWOMAIN
 	bra		EXPBORS
 TWOMAIN:
 //--USUAL CASE, 2^(-70) <= |X| <= 16480
 	fmovex		%fp0,%fp1
 	fmuls		#0x42800000,%fp1  // ...64 * X
 	fmovel		%fp1,N(%a6)		// ...N = ROUND-TO-INT(64 X)
 	movel		%d2,-(%sp)
 	lea		EXPTBL,%a1 	// ...LOAD ADDRESS OF TABLE OF 2^(J/64)
 	fmovel		N(%a6),%fp1		// ...N --> FLOATING FMT
 	movel		N(%a6),%d0
 	movel		%d0,%d2
 	andil		#0x3F,%d0		// ...D0 IS J
 	asll		#4,%d0		// ...DISPLACEMENT FOR 2^(J/64)
 	addal		%d0,%a1		// ...ADDRESS FOR 2^(J/64)
 	asrl		#6,%d2		// ...d2 IS L, N = 64L + J
 	movel		%d2,%d0
 	asrl		#1,%d0		// ...D0 IS M
 	subl		%d0,%d2		// ...d2 IS M', N = 64(M+M') + J
 	addil		#0x3FFF,%d2
 	movew		%d2,ADJFACT(%a6) 	// ...ADJFACT IS 2^(M')
 	movel		(%sp)+,%d2
 //--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
 //--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
 //--ADJFACT = 2^(M').
 //--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
 	fmuls		#0x3C800000,%fp1  // ...(1/64)*N
 	movel		(%a1)+,FACT1(%a6)
 	movel		(%a1)+,FACT1HI(%a6)
 	movel		(%a1)+,FACT1LOW(%a6)
 	movew		(%a1)+,FACT2(%a6)
 	clrw		FACT2+2(%a6)
 	fsubx		%fp1,%fp0	 	// ...X - (1/64)*INT(64 X)
 	movew		(%a1)+,FACT2HI(%a6)
 	clrw		FACT2HI+2(%a6)
 	clrl		FACT2LOW(%a6)
 	addw		%d0,FACT1(%a6)
 	fmulx		LOG2,%fp0	// ...FP0 IS R
 	addw		%d0,FACT2(%a6)
 	bra		expr
 EXPBORS:
 //--FPCR, D0 SAVED
 	cmpil		#0x3FFF8000,%d0
 	bgts		EXPBIG
 EXPSM:
 //--|X| IS SMALL, RETURN 1 + X
 	fmovel		%d1,%FPCR		//restore users exceptions
 	fadds		#0x3F800000,%fp0  // ...RETURN 1 + X
 	bra		t_frcinx
 EXPBIG:
 //--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
 //--REGISTERS SAVE SO FAR ARE FPCR AND  D0
 	movel		X(%a6),%d0
 	cmpil		#0,%d0
 	blts		EXPNEG
 	bclrb		#7,(%a0)		//t_ovfl expects positive value
 	bra		t_ovfl
 EXPNEG:
 	bclrb		#7,(%a0)		//t_unfl expects positive value
 	bra		t_unfl
 	.global	stentoxd
 stentoxd:
 //--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
 	fmovel		%d1,%fpcr		// ...set user's rounding mode/precision
 	fmoves		#0x3F800000,%fp0  // ...RETURN 1 + X
 	movel		(%a0),%d0
 	orl		#0x00800001,%d0
 	fadds		%d0,%fp0
 	bra		t_frcinx
 	.global	stentox
 stentox:
 //--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
 	fmovemx	(%a0),%fp0-%fp0	// ...LOAD INPUT, do not set cc's
 	movel		(%a0),%d0
 	movew		4(%a0),%d0
 	fmovex		%fp0,X(%a6)
 	andil		#0x7FFFFFFF,%d0
 	cmpil		#0x3FB98000,%d0		// ...|X| >= 2**(-70)?
 	bges		TENOK1
 	bra		EXPBORS
 TENOK1:
 	cmpil		#0x400B9B07,%d0		// ...|X| <= 16480*log2/log10 ?
 	bles		TENMAIN
 	bra		EXPBORS
 TENMAIN:
 //--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
 	fmovex		%fp0,%fp1
 	fmuld		L2TEN64,%fp1	// ...X*64*LOG10/LOG2
 	fmovel		%fp1,N(%a6)		// ...N=INT(X*64*LOG10/LOG2)
 	movel		%d2,-(%sp)
 	lea		EXPTBL,%a1 	// ...LOAD ADDRESS OF TABLE OF 2^(J/64)
 	fmovel		N(%a6),%fp1		// ...N --> FLOATING FMT
 	movel		N(%a6),%d0
 	movel		%d0,%d2
 	andil		#0x3F,%d0		// ...D0 IS J
 	asll		#4,%d0		// ...DISPLACEMENT FOR 2^(J/64)
 	addal		%d0,%a1		// ...ADDRESS FOR 2^(J/64)
 	asrl		#6,%d2		// ...d2 IS L, N = 64L + J
 	movel		%d2,%d0
 	asrl		#1,%d0		// ...D0 IS M
 	subl		%d0,%d2		// ...d2 IS M', N = 64(M+M') + J
 	addil		#0x3FFF,%d2
 	movew		%d2,ADJFACT(%a6) 	// ...ADJFACT IS 2^(M')
 	movel		(%sp)+,%d2
 //--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
 //--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
 //--ADJFACT = 2^(M').
 //--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
 	fmovex		%fp1,%fp2
 	fmuld		L10TWO1,%fp1	// ...N*(LOG2/64LOG10)_LEAD
 	movel		(%a1)+,FACT1(%a6)
 	fmulx		L10TWO2,%fp2	// ...N*(LOG2/64LOG10)_TRAIL
 	movel		(%a1)+,FACT1HI(%a6)
 	movel		(%a1)+,FACT1LOW(%a6)
 	fsubx		%fp1,%fp0		// ...X - N L_LEAD
 	movew		(%a1)+,FACT2(%a6)
 	fsubx		%fp2,%fp0		// ...X - N L_TRAIL
 	clrw		FACT2+2(%a6)
 	movew		(%a1)+,FACT2HI(%a6)
 	clrw		FACT2HI+2(%a6)
 	clrl		FACT2LOW(%a6)
 	fmulx		LOG10,%fp0	// ...FP0 IS R
 	addw		%d0,FACT1(%a6)
 	addw		%d0,FACT2(%a6)
 expr:
 //--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
 //--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
 //--FP0 IS R. THE FOLLOWING CODE COMPUTES
 //--	2**(M'+M) * 2**(J/64) * EXP(R)
 	fmovex		%fp0,%fp1
 	fmulx		%fp1,%fp1		// ...FP1 IS S = R*R
 	fmoved		EXPA5,%fp2	// ...FP2 IS A5
 	fmoved		EXPA4,%fp3	// ...FP3 IS A4
 	fmulx		%fp1,%fp2		// ...FP2 IS S*A5
 	fmulx		%fp1,%fp3		// ...FP3 IS S*A4
 	faddd		EXPA3,%fp2	// ...FP2 IS A3+S*A5
 	faddd		EXPA2,%fp3	// ...FP3 IS A2+S*A4
 	fmulx		%fp1,%fp2		// ...FP2 IS S*(A3+S*A5)
 	fmulx		%fp1,%fp3		// ...FP3 IS S*(A2+S*A4)
 	faddd		EXPA1,%fp2	// ...FP2 IS A1+S*(A3+S*A5)
 	fmulx		%fp0,%fp3		// ...FP3 IS R*S*(A2+S*A4)
 	fmulx		%fp1,%fp2		// ...FP2 IS S*(A1+S*(A3+S*A5))
 	faddx		%fp3,%fp0		// ...FP0 IS R+R*S*(A2+S*A4)
 	faddx		%fp2,%fp0		// ...FP0 IS EXP(R) - 1
 //--FINAL RECONSTRUCTION PROCESS
 //--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
 	fmulx		FACT1(%a6),%fp0
 	faddx		FACT2(%a6),%fp0
 	faddx		FACT1(%a6),%fp0
 	fmovel		%d1,%FPCR		//restore users exceptions
 	clrw		ADJFACT+2(%a6)
 	movel		#0x80000000,ADJFACT+4(%a6)
 	clrl		ADJFACT+8(%a6)
 	fmulx		ADJFACT(%a6),%fp0	// ...FINAL ADJUSTMENT
 	bra		t_frcinx
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/tbldo.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/tbldo.s
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/util.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/util.s
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/x_bsun.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_bsun.s
@@ -0,0 +1,47 @@
 //
 //	x_bsun.sa 3.3 7/1/91
 //
 //	fpsp_bsun --- FPSP handler for branch/set on unordered exception
 //
 //	Copy the PC to FPIAR to maintain 881/882 compatibility
 //
 //	The real_bsun handler will need to perform further corrective
 //	measures as outlined in the 040 User's Manual on pages
 //	9-41f, section 9.8.3.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 X_BSUN:	//idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	|xref	real_bsun
 	.global	fpsp_bsun
 fpsp_bsun:
 //
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%a7)
 	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
 	fmovemx	%fp0-%fp3,USER_FP0(%a6)
 	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 //
 	movel		EXC_PC(%a6),USER_FPIAR(%a6)
 //
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		real_bsun
 //
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/x_fline.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_fline.s
@@ -0,0 +1,104 @@
 //
 //	x_fline.sa 3.3 1/10/91
 //
 //	fpsp_fline --- FPSP handler for fline exception
 //
 //	First determine if the exception is one of the unimplemented
 //	floating point instructions.  If so, let fpsp_unimp handle it.
 //	Next, determine if the instruction is an fmovecr with a non-zero
 //	<ea> field.  If so, handle here and return.  Otherwise, it
 //	must be a real F-line exception.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 X_FLINE:	//idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	|xref	real_fline
 	|xref	fpsp_unimp
 	|xref	uni_2
 	|xref	mem_read
 	|xref	fpsp_fmt_error
 	.global	fpsp_fline
 fpsp_fline:
 //
 //	check for unimplemented vector first.  Use EXC_VEC-4 because
 //	the equate is valid only after a 'link a6' has pushed one more
 //	long onto the stack.
 //
 	cmpw	#UNIMP_VEC,EXC_VEC-4(%a7)
 	beql	fpsp_unimp
 //
 //	fmovecr with non-zero <ea> handling here
 //
 	subl	#4,%a7		//4 accounts for 2-word difference
 //				;between six word frame (unimp) and
 //				;four word frame
 	link	%a6,#-LOCAL_SIZE
 	fsave	-(%a7)
 	moveml	%d0-%d1/%a0-%a1,USER_DA(%a6)
 	moveal	EXC_PC+4(%a6),%a0	//get address of fline instruction
 	leal	L_SCR1(%a6),%a1	//use L_SCR1 as scratch
 	movel	#4,%d0
 	addl	#4,%a6		//to offset the sub.l #4,a7 above so that
 //				;a6 can point correctly to the stack frame 
 //				;before branching to mem_read
 	bsrl	mem_read
 	subl	#4,%a6
 	movel	L_SCR1(%a6),%d0	//d0 contains the fline and command word
 	bfextu	%d0{#4:#3},%d1	//extract coprocessor id
 	cmpib	#1,%d1		//check if cpid=1
 	bne	not_mvcr	//exit if not
 	bfextu	%d0{#16:#6},%d1
 	cmpib	#0x17,%d1		//check if it is an FMOVECR encoding
 	bne	not_mvcr	
 //				;if an FMOVECR instruction, fix stack
 //				;and go to FPSP_UNIMP
 fix_stack:
 	cmpib	#VER_40,(%a7)	//test for orig unimp frame
 	bnes	ck_rev
 	subl	#UNIMP_40_SIZE-4,%a7 //emulate an orig fsave
 	moveb	#VER_40,(%a7)
 	moveb	#UNIMP_40_SIZE-4,1(%a7)
 	clrw	2(%a7)
 	bras	fix_con
 ck_rev:
 	cmpib	#VER_41,(%a7)	//test for rev unimp frame
 	bnel	fpsp_fmt_error	//if not $40 or $41, exit with error
 	subl	#UNIMP_41_SIZE-4,%a7 //emulate a rev fsave
 	moveb	#VER_41,(%a7)
 	moveb	#UNIMP_41_SIZE-4,1(%a7)
 	clrw	2(%a7)
 fix_con:
 	movew	EXC_SR+4(%a6),EXC_SR(%a6) //move stacked sr to new position
 	movel	EXC_PC+4(%a6),EXC_PC(%a6) //move stacked pc to new position
 	fmovel	EXC_PC(%a6),%FPIAR //point FPIAR to fline inst
 	movel	#4,%d1
 	addl	%d1,EXC_PC(%a6)	//increment stacked pc value to next inst
 	movew	#0x202c,EXC_VEC(%a6) //reformat vector to unimp
 	clrl	EXC_EA(%a6)	//clear the EXC_EA field
 	movew	%d0,CMDREG1B(%a6) //move the lower word into CMDREG1B
 	clrl	E_BYTE(%a6)
 	bsetb	#UFLAG,T_BYTE(%a6)
 	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1 //restore data registers
 	bral	uni_2
 not_mvcr:
 	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1 //restore data registers
 	frestore (%a7)+
 	unlk	%a6
 	addl	#4,%a7
 	bral	real_fline
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/x_operr.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_operr.s
@@ -0,0 +1,356 @@
 //
 //	x_operr.sa 3.5 7/1/91
 //
 //	fpsp_operr --- FPSP handler for operand error exception
 //
 //	See 68040 User's Manual pp. 9-44f
 //
 // Note 1: For trap disabled 040 does the following:
 // If the dest is a fp reg, then an extended precision non_signaling
 // NAN is stored in the dest reg.  If the dest format is b, w, or l and
 // the source op is a NAN, then garbage is stored as the result (actually
 // the upper 32 bits of the mantissa are sent to the integer unit). If
 // the dest format is integer (b, w, l) and the operr is caused by
 // integer overflow, or the source op is inf, then the result stored is
 // garbage.
 // There are three cases in which operr is incorrectly signaled on the 
 // 040.  This occurs for move_out of format b, w, or l for the largest 
 // negative integer (-2^7 for b, -2^15 for w, -2^31 for l).
 //
 //	  On opclass = 011 fmove.(b,w,l) that causes a conversion
 //	  overflow -> OPERR, the exponent in wbte (and fpte) is:
 //		byte    56 - (62 - exp)
 //		word    48 - (62 - exp)
 //		long    32 - (62 - exp)
 //
 //			where exp = (true exp) - 1
 //
 //  So, wbtemp and fptemp will contain the following on erroneously
 //	  signalled operr:
 //			fpts = 1
 //			fpte = $4000  (15 bit externally)
 //		byte	fptm = $ffffffff ffffff80
 //		word	fptm = $ffffffff ffff8000
 //		long	fptm = $ffffffff 80000000
 //
 // Note 2: For trap enabled 040 does the following:
 // If the inst is move_out, then same as Note 1.
 // If the inst is not move_out, the dest is not modified.
 // The exceptional operand is not defined for integer overflow 
 // during a move_out.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 X_OPERR:	//idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	|xref	mem_write
 	|xref	real_operr
 	|xref	real_inex
 	|xref	get_fline
 	|xref	fpsp_done
 	|xref	reg_dest
 	.global	fpsp_operr
 fpsp_operr:
 //
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%a7)
 	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
 	fmovemx	%fp0-%fp3,USER_FP0(%a6)
 	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 //
 // Check if this is an opclass 3 instruction.
 //  If so, fall through, else branch to operr_end
 //
 	btstb	#TFLAG,T_BYTE(%a6)
 	beqs	operr_end
 //
 // If the destination size is B,W,or L, the operr must be 
 // handled here.
 //
 	movel	CMDREG1B(%a6),%d0
 	bfextu	%d0{#3:#3},%d0	//0=long, 4=word, 6=byte
 	cmpib	#0,%d0		//determine size; check long
 	beq	operr_long
 	cmpib	#4,%d0		//check word
 	beq	operr_word
 	cmpib	#6,%d0		//check byte
 	beq	operr_byte
 //
 // The size is not B,W,or L, so the operr is handled by the 
 // kernel handler.  Set the operr bits and clean up, leaving
 // only the integer exception frame on the stack, and the 
 // fpu in the original exceptional state.
 //
 operr_end:
 	bsetb		#operr_bit,FPSR_EXCEPT(%a6)
 	bsetb		#aiop_bit,FPSR_AEXCEPT(%a6)
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		real_operr
 operr_long:
 	moveql	#4,%d1		//write size to d1
 	moveb	STAG(%a6),%d0	//test stag for nan
 	andib	#0xe0,%d0		//clr all but tag
 	cmpib	#0x60,%d0		//check for nan
 	beq	operr_nan	
 	cmpil	#0x80000000,FPTEMP_LO(%a6) //test if ls lword is special
 	bnes	chklerr		//if not equal, check for incorrect operr
 	bsr	check_upper	//check if exp and ms mant are special
 	tstl	%d0
 	bnes	chklerr		//if d0 is true, check for incorrect operr
 	movel	#0x80000000,%d0	//store special case result
 	bsr	operr_store
 	bra	not_enabled	//clean and exit
 //
 //	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
 //
 chklerr:
 	movew	FPTEMP_EX(%a6),%d0
 	andw	#0x7FFF,%d0	//ignore sign bit
 	cmpw	#0x3FFE,%d0	//this is the only possible exponent value
 	bnes	chklerr2
 fixlong:
 	movel	FPTEMP_LO(%a6),%d0
 	bsr	operr_store
 	bra	not_enabled
 chklerr2:
 	movew	FPTEMP_EX(%a6),%d0
 	andw	#0x7FFF,%d0	//ignore sign bit
 	cmpw	#0x4000,%d0
 	bcc	store_max	//exponent out of range
 	movel	FPTEMP_LO(%a6),%d0
 	andl	#0x7FFF0000,%d0	//look for all 1's on bits 30-16
 	cmpl	#0x7FFF0000,%d0
 	beqs	fixlong
 	tstl	FPTEMP_LO(%a6)
 	bpls	chklepos
 	cmpl	#0xFFFFFFFF,FPTEMP_HI(%a6)
 	beqs	fixlong
 	bra	store_max
 chklepos:
 	tstl	FPTEMP_HI(%a6)
 	beqs	fixlong
 	bra	store_max
 operr_word:
 	moveql	#2,%d1		//write size to d1
 	moveb	STAG(%a6),%d0	//test stag for nan
 	andib	#0xe0,%d0		//clr all but tag
 	cmpib	#0x60,%d0		//check for nan
 	beq	operr_nan	
 	cmpil	#0xffff8000,FPTEMP_LO(%a6) //test if ls lword is special
 	bnes	chkwerr		//if not equal, check for incorrect operr
 	bsr	check_upper	//check if exp and ms mant are special
 	tstl	%d0
 	bnes	chkwerr		//if d0 is true, check for incorrect operr
 	movel	#0x80000000,%d0	//store special case result
 	bsr	operr_store
 	bra	not_enabled	//clean and exit
 //
 //	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
 //
 chkwerr:
 	movew	FPTEMP_EX(%a6),%d0
 	andw	#0x7FFF,%d0	//ignore sign bit
 	cmpw	#0x3FFE,%d0	//this is the only possible exponent value
 	bnes	store_max
 	movel	FPTEMP_LO(%a6),%d0
 	swap	%d0
 	bsr	operr_store
 	bra	not_enabled
 operr_byte:
 	moveql	#1,%d1		//write size to d1
 	moveb	STAG(%a6),%d0	//test stag for nan
 	andib	#0xe0,%d0		//clr all but tag
 	cmpib	#0x60,%d0		//check for nan
 	beqs	operr_nan	
 	cmpil	#0xffffff80,FPTEMP_LO(%a6) //test if ls lword is special
 	bnes	chkberr		//if not equal, check for incorrect operr
 	bsr	check_upper	//check if exp and ms mant are special
 	tstl	%d0
 	bnes	chkberr		//if d0 is true, check for incorrect operr
 	movel	#0x80000000,%d0	//store special case result
 	bsr	operr_store
 	bra	not_enabled	//clean and exit
 //
 //	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
 //
 chkberr:
 	movew	FPTEMP_EX(%a6),%d0
 	andw	#0x7FFF,%d0	//ignore sign bit
 	cmpw	#0x3FFE,%d0	//this is the only possible exponent value
 	bnes	store_max
 	movel	FPTEMP_LO(%a6),%d0
 	asll	#8,%d0
 	swap	%d0
 	bsr	operr_store
 	bra	not_enabled
 //
 // This operr condition is not of the special case.  Set operr
 // and aiop and write the portion of the nan to memory for the
 // given size.
 //
 operr_nan:
 	orl	#opaop_mask,USER_FPSR(%a6) //set operr & aiop
 	movel	ETEMP_HI(%a6),%d0	//output will be from upper 32 bits
 	bsr	operr_store
 	bra	end_operr
 //
 // Store_max loads the max pos or negative for the size, sets
 // the operr and aiop bits, and clears inex and ainex, incorrectly
 // set by the 040.
 //
 store_max:
 	orl	#opaop_mask,USER_FPSR(%a6) //set operr & aiop
 	bclrb	#inex2_bit,FPSR_EXCEPT(%a6)
 	bclrb	#ainex_bit,FPSR_AEXCEPT(%a6)
 	fmovel	#0,%FPSR
 	tstw	FPTEMP_EX(%a6)	//check sign
 	blts	load_neg
 	movel	#0x7fffffff,%d0
 	bsr	operr_store
 	bra	end_operr
 load_neg:
 	movel	#0x80000000,%d0
 	bsr	operr_store
 	bra	end_operr
 //
 // This routine stores the data in d0, for the given size in d1,
 // to memory or data register as required.  A read of the fline
 // is required to determine the destination.
 //
 operr_store:
 	movel	%d0,L_SCR1(%a6)	//move write data to L_SCR1
 	movel	%d1,-(%a7)	//save register size
 	bsrl	get_fline	//fline returned in d0
 	movel	(%a7)+,%d1
 	bftst	%d0{#26:#3}		//if mode is zero, dest is Dn
 	bnes	dest_mem
 //
 // Destination is Dn.  Get register number from d0. Data is on
 // the stack at (a7). D1 has size: 1=byte,2=word,4=long/single
 //
 	andil	#7,%d0		//isolate register number
 	cmpil	#4,%d1
 	beqs	op_long		//the most frequent case
 	cmpil	#2,%d1
 	bnes	op_con
 	orl	#8,%d0
 	bras	op_con
 op_long:
 	orl	#0x10,%d0
 op_con:
 	movel	%d0,%d1		//format size:reg for reg_dest
 	bral	reg_dest	//call to reg_dest returns to caller
 //				;of operr_store
 //
 // Destination is memory.  Get <ea> from integer exception frame
 // and call mem_write.
 //
 dest_mem:
 	leal	L_SCR1(%a6),%a0	//put ptr to write data in a0
 	movel	EXC_EA(%a6),%a1	//put user destination address in a1
 	movel	%d1,%d0		//put size in d0
 	bsrl	mem_write
 	rts
 //
 // Check the exponent for $c000 and the upper 32 bits of the 
 // mantissa for $ffffffff.  If both are true, return d0 clr
 // and store the lower n bits of the least lword of FPTEMP
 // to d0 for write out.  If not, it is a real operr, and set d0.
 //
 check_upper:
 	cmpil	#0xffffffff,FPTEMP_HI(%a6) //check if first byte is all 1's
 	bnes	true_operr	//if not all 1's then was true operr
 	cmpiw	#0xc000,FPTEMP_EX(%a6) //check if incorrectly signalled
 	beqs	not_true_operr	//branch if not true operr
 	cmpiw	#0xbfff,FPTEMP_EX(%a6) //check if incorrectly signalled
 	beqs	not_true_operr	//branch if not true operr
 true_operr:
 	movel	#1,%d0		//signal real operr
 	rts
 not_true_operr:
 	clrl	%d0		//signal no real operr
 	rts
 //
 // End_operr tests for operr enabled.  If not, it cleans up the stack
 // and does an rte.  If enabled, it cleans up the stack and branches
 // to the kernel operr handler with only the integer exception
 // frame on the stack and the fpu in the original exceptional state
 // with correct data written to the destination.
 //
 end_operr:
 	btstb		#operr_bit,FPCR_ENABLE(%a6)
 	beqs		not_enabled
 enabled:
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		real_operr
 not_enabled:
 //
 // It is possible to have either inex2 or inex1 exceptions with the
 // operr.  If the inex enable bit is set in the FPCR, and either
 // inex2 or inex1 occurred, we must clean up and branch to the
 // real inex handler.
 //
 ck_inex:
 	moveb	FPCR_ENABLE(%a6),%d0
 	andb	FPSR_EXCEPT(%a6),%d0
 	andib	#0x3,%d0
 	beq	operr_exit
 //
 // Inexact enabled and reported, and we must take an inexact exception.
 //
 take_inex:
 	moveb		#INEX_VEC,EXC_VEC+1(%a6)
 	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
 	orl		#sx_mask,E_BYTE(%a6)
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		real_inex
 //
 // Since operr is only an E1 exception, there is no need to frestore
 // any state back to the fpu.
 //
 operr_exit:
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	unlk		%a6
 	bral		fpsp_done
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/x_ovfl.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_ovfl.s
@@ -0,0 +1,186 @@
 //
 //	x_ovfl.sa 3.5 7/1/91
 //
 //	fpsp_ovfl --- FPSP handler for overflow exception
 //
 //	Overflow occurs when a floating-point intermediate result is
 //	too large to be represented in a floating-point data register,
 //	or when storing to memory, the contents of a floating-point
 //	data register are too large to be represented in the
 //	destination format.
 //		
 // Trap disabled results
 //
 // If the instruction is move_out, then garbage is stored in the
 // destination.  If the instruction is not move_out, then the
 // destination is not affected.  For 68881 compatibility, the
 // following values should be stored at the destination, based
 // on the current rounding mode:
 //
 //  RN	Infinity with the sign of the intermediate result.
 //  RZ	Largest magnitude number, with the sign of the
 //	intermediate result.
 //  RM   For pos overflow, the largest pos number. For neg overflow,
 //	-infinity
 //  RP   For pos overflow, +infinity. For neg overflow, the largest
 //	neg number
 //
 // Trap enabled results
 // All trap disabled code applies.  In addition the exceptional
 // operand needs to be made available to the users exception handler
 // with a bias of $6000 subtracted from the exponent.
 //
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 X_OVFL:	//idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	|xref	ovf_r_x2
 	|xref	ovf_r_x3
 	|xref	store
 	|xref	real_ovfl
 	|xref	real_inex
 	|xref	fpsp_done
 	|xref	g_opcls
 	|xref	b1238_fix
 	.global	fpsp_ovfl
 fpsp_ovfl:
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%a7)
 	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
 	fmovemx	%fp0-%fp3,USER_FP0(%a6)
 	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 //
 //	The 040 doesn't set the AINEX bit in the FPSR, the following
 //	line temporarily rectifies this error.
 //
 	bsetb	#ainex_bit,FPSR_AEXCEPT(%a6)
 //
 	bsrl	ovf_adj		//denormalize, round & store interm op
 //
 //	if overflow traps not enabled check for inexact exception
 //
 	btstb	#ovfl_bit,FPCR_ENABLE(%a6)
 	beqs	ck_inex	
 //
 	btstb		#E3,E_BYTE(%a6)
 	beqs		no_e3_1
 	bfextu		CMDREG3B(%a6){#6:#3},%d0	//get dest reg no
 	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
 	bsrl		b1238_fix
 	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
 	orl		#sx_mask,E_BYTE(%a6)
 no_e3_1:
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		real_ovfl
 //
 // It is possible to have either inex2 or inex1 exceptions with the
 // ovfl.  If the inex enable bit is set in the FPCR, and either
 // inex2 or inex1 occurred, we must clean up and branch to the
 // real inex handler.
 //
 ck_inex:
 //	move.b		FPCR_ENABLE(%a6),%d0
 //	and.b		FPSR_EXCEPT(%a6),%d0
 //	andi.b		#$3,%d0
 	btstb		#inex2_bit,FPCR_ENABLE(%a6)
 	beqs		ovfl_exit
 //
 // Inexact enabled and reported, and we must take an inexact exception.
 //
 take_inex:
 	btstb		#E3,E_BYTE(%a6)
 	beqs		no_e3_2
 	bfextu		CMDREG3B(%a6){#6:#3},%d0	//get dest reg no
 	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
 	bsrl		b1238_fix
 	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
 	orl		#sx_mask,E_BYTE(%a6)
 no_e3_2:
 	moveb		#INEX_VEC,EXC_VEC+1(%a6)
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		real_inex
 ovfl_exit:
 	bclrb	#E3,E_BYTE(%a6)	//test and clear E3 bit
 	beqs	e1_set
 //
 // Clear dirty bit on dest resister in the frame before branching
 // to b1238_fix.
 //
 	bfextu		CMDREG3B(%a6){#6:#3},%d0	//get dest reg no
 	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
 	bsrl		b1238_fix		//test for bug1238 case
 	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
 	orl		#sx_mask,E_BYTE(%a6)
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		fpsp_done
 e1_set:
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	unlk		%a6
 	bral		fpsp_done
 //
 //	ovf_adj
 //
 ovf_adj:
 //
 // Have a0 point to the correct operand. 
 //
 	btstb	#E3,E_BYTE(%a6)	//test E3 bit
 	beqs	ovf_e1
 	lea	WBTEMP(%a6),%a0
 	bras	ovf_com
 ovf_e1:
 	lea	ETEMP(%a6),%a0
 ovf_com:
 	bclrb	#sign_bit,LOCAL_EX(%a0)
 	sne	LOCAL_SGN(%a0)
 	bsrl	g_opcls		//returns opclass in d0
 	cmpiw	#3,%d0		//check for opclass3
 	bnes	not_opc011
 //
 // FPSR_CC is saved and restored because ovf_r_x3 affects it. The
 // CCs are defined to be 'not affected' for the opclass3 instruction.
 //
 	moveb	FPSR_CC(%a6),L_SCR1(%a6)
 	bsrl	ovf_r_x3	//returns a0 pointing to result
 	moveb	L_SCR1(%a6),FPSR_CC(%a6)
 	bral	store		//stores to memory or register
 not_opc011:
 	bsrl	ovf_r_x2	//returns a0 pointing to result
 	bral	store		//stores to memory or register
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/x_snan.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_snan.s
@@ -0,0 +1,277 @@
 //
 //	x_snan.sa 3.3 7/1/91
 //
 // fpsp_snan --- FPSP handler for signalling NAN exception
 //
 // SNAN for float -> integer conversions (integer conversion of
 // an SNAN) is a non-maskable run-time exception.
 //
 // For trap disabled the 040 does the following:
 // If the dest data format is s, d, or x, then the SNAN bit in the NAN
 // is set to one and the resulting non-signaling NAN (truncated if
 // necessary) is transferred to the dest.  If the dest format is b, w,
 // or l, then garbage is written to the dest (actually the upper 32 bits
 // of the mantissa are sent to the integer unit).
 //
 // For trap enabled the 040 does the following:
 // If the inst is move_out, then the results are the same as for trap 
 // disabled with the exception posted.  If the instruction is not move_
 // out, the dest. is not modified, and the exception is posted.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 X_SNAN:	//idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	|xref	get_fline
 	|xref	mem_write
 	|xref	real_snan
 	|xref	real_inex
 	|xref	fpsp_done
 	|xref	reg_dest
 	.global	fpsp_snan
 fpsp_snan:
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%a7)
 	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
 	fmovemx	%fp0-%fp3,USER_FP0(%a6)
 	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 //
 // Check if trap enabled
 //
 	btstb		#snan_bit,FPCR_ENABLE(%a6)
 	bnes		ena		//If enabled, then branch
 	bsrl		move_out	//else SNAN disabled
 //
 // It is possible to have an inex1 exception with the
 // snan.  If the inex enable bit is set in the FPCR, and either
 // inex2 or inex1 occurred, we must clean up and branch to the
 // real inex handler.
 //
 ck_inex:
 	moveb	FPCR_ENABLE(%a6),%d0
 	andb	FPSR_EXCEPT(%a6),%d0
 	andib	#0x3,%d0
 	beq	end_snan
 //
 // Inexact enabled and reported, and we must take an inexact exception.
 //
 take_inex:
 	moveb		#INEX_VEC,EXC_VEC+1(%a6)
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		real_inex
 //
 // SNAN is enabled.  Check if inst is move_out.
 // Make any corrections to the 040 output as necessary.
 //
 ena:
 	btstb		#5,CMDREG1B(%a6) //if set, inst is move out
 	beq		not_out
 	bsrl		move_out
 report_snan:
 	moveb		(%a7),VER_TMP(%a6)
 	cmpib		#VER_40,(%a7)	//test for orig unimp frame
 	bnes		ck_rev
 	moveql		#13,%d0		//need to zero 14 lwords
 	bras		rep_con
 ck_rev:
 	moveql		#11,%d0		//need to zero 12 lwords
 rep_con:
 	clrl		(%a7)
 loop1:
 	clrl		-(%a7)		//clear and dec a7
 	dbra		%d0,loop1
 	moveb		VER_TMP(%a6),(%a7) //format a busy frame
 	moveb		#BUSY_SIZE-4,1(%a7)
 	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
 	orl		#sx_mask,E_BYTE(%a6)
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		real_snan
 //
 // Exit snan handler by expanding the unimp frame into a busy frame
 //
 end_snan:
 	bclrb		#E1,E_BYTE(%a6)
 	moveb		(%a7),VER_TMP(%a6)
 	cmpib		#VER_40,(%a7)	//test for orig unimp frame
 	bnes		ck_rev2
 	moveql		#13,%d0		//need to zero 14 lwords
 	bras		rep_con2
 ck_rev2:
 	moveql		#11,%d0		//need to zero 12 lwords
 rep_con2:
 	clrl		(%a7)
 loop2:
 	clrl		-(%a7)		//clear and dec a7
 	dbra		%d0,loop2
 	moveb		VER_TMP(%a6),(%a7) //format a busy frame
 	moveb		#BUSY_SIZE-4,1(%a7) //write busy size
 	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
 	orl		#sx_mask,E_BYTE(%a6)
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		fpsp_done
 //
 // Move_out 
 //
 move_out:
 	movel		EXC_EA(%a6),%a0	//get <ea> from exc frame
 	bfextu		CMDREG1B(%a6){#3:#3},%d0 //move rx field to d0{2:0}
 	cmpil		#0,%d0		//check for long
 	beqs		sto_long	//branch if move_out long
 	cmpil		#4,%d0		//check for word
 	beqs		sto_word	//branch if move_out word
 	cmpil		#6,%d0		//check for byte
 	beqs		sto_byte	//branch if move_out byte
 //
 // Not byte, word or long
 //
 	rts
 //	
 // Get the 32 most significant bits of etemp mantissa
 //
 sto_long:
 	movel		ETEMP_HI(%a6),%d1
 	movel		#4,%d0		//load byte count
 //
 // Set signalling nan bit
 //
 	bsetl		#30,%d1			
 //
 // Store to the users destination address
 //
 	tstl		%a0		//check if <ea> is 0
 	beqs		wrt_dn		//destination is a data register
 	movel		%d1,-(%a7)	//move the snan onto the stack
 	movel		%a0,%a1		//load dest addr into a1
 	movel		%a7,%a0		//load src addr of snan into a0
 	bsrl		mem_write	//write snan to user memory
 	movel		(%a7)+,%d1	//clear off stack
 	rts
 //
 // Get the 16 most significant bits of etemp mantissa
 //
 sto_word:
 	movel		ETEMP_HI(%a6),%d1
 	movel		#2,%d0		//load byte count
 //
 // Set signalling nan bit
 //
 	bsetl		#30,%d1			
 //
 // Store to the users destination address
 //
 	tstl		%a0		//check if <ea> is 0
 	beqs		wrt_dn		//destination is a data register
 	movel		%d1,-(%a7)	//move the snan onto the stack
 	movel		%a0,%a1		//load dest addr into a1
 	movel		%a7,%a0		//point to low word
 	bsrl		mem_write	//write snan to user memory
 	movel		(%a7)+,%d1	//clear off stack
 	rts
 //
 // Get the 8 most significant bits of etemp mantissa
 //
 sto_byte:
 	movel		ETEMP_HI(%a6),%d1
 	movel		#1,%d0		//load byte count
 //
 // Set signalling nan bit
 //
 	bsetl		#30,%d1			
 //
 // Store to the users destination address
 //
 	tstl		%a0		//check if <ea> is 0
 	beqs		wrt_dn		//destination is a data register
 	movel		%d1,-(%a7)	//move the snan onto the stack
 	movel		%a0,%a1		//load dest addr into a1
 	movel		%a7,%a0		//point to source byte
 	bsrl		mem_write	//write snan to user memory
 	movel		(%a7)+,%d1	//clear off stack
 	rts
 //
 //	wrt_dn --- write to a data register
 //
 //	We get here with D1 containing the data to write and D0 the
 //	number of bytes to write: 1=byte,2=word,4=long.
 //
 wrt_dn:
 	movel		%d1,L_SCR1(%a6)	//data
 	movel		%d0,-(%a7)	//size
 	bsrl		get_fline	//returns fline word in d0
 	movel		%d0,%d1
 	andil		#0x7,%d1		//d1 now holds register number
 	movel		(%sp)+,%d0	//get original size
 	cmpil		#4,%d0
 	beqs		wrt_long
 	cmpil		#2,%d0
 	bnes		wrt_byte
 wrt_word:
 	orl		#0x8,%d1
 	bral		reg_dest
 wrt_long:
 	orl		#0x10,%d1
 	bral		reg_dest
 wrt_byte:
 	bral		reg_dest
 //
 // Check if it is a src nan or dst nan
 //
 not_out:
 	movel		DTAG(%a6),%d0	
 	bfextu		%d0{#0:#3},%d0	//isolate dtag in lsbs
 	cmpib		#3,%d0		//check for nan in destination
 	bnes		issrc		//destination nan has priority
 dst_nan:
 	btstb		#6,FPTEMP_HI(%a6) //check if dest nan is an snan
 	bnes		issrc		//no, so check source for snan
 	movew		FPTEMP_EX(%a6),%d0
 	bras		cont
 issrc:
 	movew		ETEMP_EX(%a6),%d0
 cont:
 	btstl		#15,%d0		//test for sign of snan
 	beqs		clr_neg
 	bsetb		#neg_bit,FPSR_CC(%a6)
 	bra		report_snan
 clr_neg:
 	bclrb		#neg_bit,FPSR_CC(%a6)
 	bra		report_snan
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/x_store.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_store.s
@@ -0,0 +1,256 @@
 //
 //	x_store.sa 3.2 1/24/91
 //
 //	store --- store operand to memory or register
 //
 //	Used by underflow and overflow handlers.
 //
 //	a6 = points to fp value to be stored.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 X_STORE:	//idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 fpreg_mask:
 	.byte	0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01
 	.include "fpsp.defs"
 	|xref	mem_write
 	|xref	get_fline
 	|xref	g_opcls
 	|xref	g_dfmtou
 	|xref	reg_dest
 	.global	dest_ext
 	.global	dest_dbl
 	.global	dest_sgl
 	.global	store
 store:
 	btstb	#E3,E_BYTE(%a6)
 	beqs	E1_sto
 E3_sto:
 	movel	CMDREG3B(%a6),%d0
 	bfextu	%d0{#6:#3},%d0		//isolate dest. reg from cmdreg3b
 sto_fp:
 	lea	fpreg_mask,%a1
 	moveb	(%a1,%d0.w),%d0		//convert reg# to dynamic register mask
 	tstb	LOCAL_SGN(%a0)
 	beqs	is_pos
 	bsetb	#sign_bit,LOCAL_EX(%a0)
 is_pos:
 	fmovemx (%a0),%d0		//move to correct register
 //
 //	if fp0-fp3 is being modified, we must put a copy
 //	in the USER_FPn variable on the stack because all exception
 //	handlers restore fp0-fp3 from there.
 //
 	cmpb	#0x80,%d0		
 	bnes	not_fp0
 	fmovemx %fp0-%fp0,USER_FP0(%a6)
 	rts
 not_fp0:
 	cmpb	#0x40,%d0
 	bnes	not_fp1
 	fmovemx %fp1-%fp1,USER_FP1(%a6)
 	rts
 not_fp1:
 	cmpb	#0x20,%d0
 	bnes	not_fp2
 	fmovemx %fp2-%fp2,USER_FP2(%a6)
 	rts
 not_fp2:
 	cmpb	#0x10,%d0
 	bnes	not_fp3
 	fmovemx %fp3-%fp3,USER_FP3(%a6)
 	rts
 not_fp3:
 	rts
 E1_sto:
 	bsrl	g_opcls		//returns opclass in d0
 	cmpib	#3,%d0
 	beq	opc011		//branch if opclass 3
 	movel	CMDREG1B(%a6),%d0
 	bfextu	%d0{#6:#3},%d0	//extract destination register
 	bras	sto_fp
 opc011:
 	bsrl	g_dfmtou	//returns dest format in d0
 //				;ext=00, sgl=01, dbl=10
 	movel	%a0,%a1		//save source addr in a1
 	movel	EXC_EA(%a6),%a0	//get the address
 	cmpil	#0,%d0		//if dest format is extended
 	beq	dest_ext	//then branch
 	cmpil	#1,%d0		//if dest format is single
 	beqs	dest_sgl	//then branch
 //
 //	fall through to dest_dbl
 //
 //
 //	dest_dbl --- write double precision value to user space
 //
 //Input
 //	a0 -> destination address
 //	a1 -> source in extended precision
 //Output
 //	a0 -> destroyed
 //	a1 -> destroyed
 //	d0 -> 0
 //
 //Changes extended precision to double precision.
 // Note: no attempt is made to round the extended value to double.
 //	dbl_sign = ext_sign
 //	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)
 //	get rid of ext integer bit
 //	dbl_mant = ext_mant{62:12}
 //
 //	    	---------------   ---------------    ---------------
 //  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |
 //	    	---------------   ---------------    ---------------
 //	   	 95	    64    63 62	      32      31     11	  0
 //				     |			     |
 //				     |			     |
 //				     |			     |
 //		 	             v   		     v
 //	    		      ---------------   ---------------
 //  double   ->  	      |s|exp| mant  |   |  mant       |
 //	    		      ---------------   ---------------
 //	   	 	      63     51   32   31	       0
 //
 dest_dbl:
 	clrl	%d0		//clear d0
 	movew	LOCAL_EX(%a1),%d0	//get exponent
 	subw	#0x3fff,%d0	//subtract extended precision bias
 	cmpw	#0x4000,%d0	//check if inf
 	beqs	inf		//if so, special case
 	addw	#0x3ff,%d0	//add double precision bias
 	swap	%d0		//d0 now in upper word
 	lsll	#4,%d0		//d0 now in proper place for dbl prec exp
 	tstb	LOCAL_SGN(%a1)	
 	beqs	get_mant	//if positive, go process mantissa
 	bsetl	#31,%d0		//if negative, put in sign information
 //				; before continuing
 	bras	get_mant	//go process mantissa
 inf:
 	movel	#0x7ff00000,%d0	//load dbl inf exponent
 	clrl	LOCAL_HI(%a1)	//clear msb
 	tstb	LOCAL_SGN(%a1)
 	beqs	dbl_inf		//if positive, go ahead and write it
 	bsetl	#31,%d0		//if negative put in sign information
 dbl_inf:
 	movel	%d0,LOCAL_EX(%a1)	//put the new exp back on the stack
 	bras	dbl_wrt
 get_mant:
 	movel	LOCAL_HI(%a1),%d1	//get ms mantissa
 	bfextu	%d1{#1:#20},%d1	//get upper 20 bits of ms
 	orl	%d1,%d0		//put these bits in ms word of double
 	movel	%d0,LOCAL_EX(%a1)	//put the new exp back on the stack
 	movel	LOCAL_HI(%a1),%d1	//get ms mantissa
 	movel	#21,%d0		//load shift count
 	lsll	%d0,%d1		//put lower 11 bits in upper bits
 	movel	%d1,LOCAL_HI(%a1)	//build lower lword in memory
 	movel	LOCAL_LO(%a1),%d1	//get ls mantissa
 	bfextu	%d1{#0:#21},%d0	//get ls 21 bits of double
 	orl	%d0,LOCAL_HI(%a1)	//put them in double result
 dbl_wrt:
 	movel	#0x8,%d0		//byte count for double precision number
 	exg	%a0,%a1		//a0=supervisor source, a1=user dest
 	bsrl	mem_write	//move the number to the user's memory
 	rts
 //
 //	dest_sgl --- write single precision value to user space
 //
 //Input
 //	a0 -> destination address
 //	a1 -> source in extended precision
 //
 //Output
 //	a0 -> destroyed
 //	a1 -> destroyed
 //	d0 -> 0
 //
 //Changes extended precision to single precision.
 //	sgl_sign = ext_sign
 //	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)
 //	get rid of ext integer bit
 //	sgl_mant = ext_mant{62:12}
 //
 //	    	---------------   ---------------    ---------------
 //  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |
 //	    	---------------   ---------------    ---------------
 //	   	 95	    64    63 62	   40 32      31     12	  0
 //				     |	   |
 //				     |	   |
 //				     |	   |
 //		 	             v     v
 //	    		      ---------------
 //  single   ->  	      |s|exp| mant  |
 //	    		      ---------------
 //	   	 	      31     22     0
 //
 dest_sgl:
 	clrl	%d0
 	movew	LOCAL_EX(%a1),%d0	//get exponent
 	subw	#0x3fff,%d0	//subtract extended precision bias
 	cmpw	#0x4000,%d0	//check if inf
 	beqs	sinf		//if so, special case
 	addw	#0x7f,%d0		//add single precision bias
 	swap	%d0		//put exp in upper word of d0
 	lsll	#7,%d0		//shift it into single exp bits
 	tstb	LOCAL_SGN(%a1)	
 	beqs	get_sman	//if positive, continue
 	bsetl	#31,%d0		//if negative, put in sign first
 	bras	get_sman	//get mantissa
 sinf:
 	movel	#0x7f800000,%d0	//load single inf exp to d0
 	tstb	LOCAL_SGN(%a1)
 	beqs	sgl_wrt		//if positive, continue
 	bsetl	#31,%d0		//if negative, put in sign info
 	bras	sgl_wrt
 get_sman:
 	movel	LOCAL_HI(%a1),%d1	//get ms mantissa
 	bfextu	%d1{#1:#23},%d1	//get upper 23 bits of ms
 	orl	%d1,%d0		//put these bits in ms word of single
 sgl_wrt:
 	movel	%d0,L_SCR1(%a6)	//put the new exp back on the stack
 	movel	#0x4,%d0		//byte count for single precision number
 	tstl	%a0		//users destination address
 	beqs	sgl_Dn		//destination is a data register
 	exg	%a0,%a1		//a0=supervisor source, a1=user dest
 	leal	L_SCR1(%a6),%a0	//point a0 to data
 	bsrl	mem_write	//move the number to the user's memory
 	rts
 sgl_Dn:
 	bsrl	get_fline	//returns fline word in d0
 	andw	#0x7,%d0		//isolate register number
 	movel	%d0,%d1		//d1 has size:reg formatted for reg_dest
 	orl	#0x10,%d1		//reg_dest wants size added to reg#
 	bral	reg_dest	//size is X, rts in reg_dest will
 //				;return to caller of dest_sgl
 dest_ext:
 	tstb	LOCAL_SGN(%a1)	//put back sign into exponent word
 	beqs	dstx_cont
 	bsetb	#sign_bit,LOCAL_EX(%a1)
 dstx_cont:
 	clrb	LOCAL_SGN(%a1)	//clear out the sign byte
 	movel	#0x0c,%d0		//byte count for extended number
 	exg	%a0,%a1		//a0=supervisor source, a1=user dest
 	bsrl	mem_write	//move the number to the user's memory
 	rts
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/x_unfl.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_unfl.s
@@ -0,0 +1,269 @@
 //
 //	x_unfl.sa 3.4 7/1/91
 //
 //	fpsp_unfl --- FPSP handler for underflow exception
 //
 // Trap disabled results
 //	For 881/2 compatibility, sw must denormalize the intermediate 
 // result, then store the result.  Denormalization is accomplished 
 // by taking the intermediate result (which is always normalized) and 
 // shifting the mantissa right while incrementing the exponent until 
 // it is equal to the denormalized exponent for the destination 
 // format.  After denormalization, the result is rounded to the 
 // destination format.
 //		
 // Trap enabled results
 // 	All trap disabled code applies.	In addition the exceptional 
 // operand needs to made available to the user with a bias of $6000 
 // added to the exponent.
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 X_UNFL:	//idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	|xref	denorm
 	|xref	round
 	|xref	store
 	|xref	g_rndpr
 	|xref	g_opcls
 	|xref	g_dfmtou
 	|xref	real_unfl
 	|xref	real_inex
 	|xref	fpsp_done
 	|xref	b1238_fix
 	.global	fpsp_unfl
 fpsp_unfl:
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%a7)
 	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
 	fmovemx	%fp0-%fp3,USER_FP0(%a6)
 	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 //
 	bsrl		unf_res	//denormalize, round & store interm op
 //
 // If underflow exceptions are not enabled, check for inexact
 // exception
 //
 	btstb		#unfl_bit,FPCR_ENABLE(%a6)
 	beqs		ck_inex
 	btstb		#E3,E_BYTE(%a6)
 	beqs		no_e3_1
 //
 // Clear dirty bit on dest resister in the frame before branching
 // to b1238_fix.
 //
 	bfextu		CMDREG3B(%a6){#6:#3},%d0	//get dest reg no
 	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
 	bsrl		b1238_fix		//test for bug1238 case
 	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
 	orl		#sx_mask,E_BYTE(%a6)
 no_e3_1:
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		real_unfl
 //
 // It is possible to have either inex2 or inex1 exceptions with the
 // unfl.  If the inex enable bit is set in the FPCR, and either
 // inex2 or inex1 occurred, we must clean up and branch to the
 // real inex handler.
 //
 ck_inex:
 	moveb		FPCR_ENABLE(%a6),%d0
 	andb		FPSR_EXCEPT(%a6),%d0
 	andib		#0x3,%d0
 	beqs		unfl_done
 //
 // Inexact enabled and reported, and we must take an inexact exception
 //	
 take_inex:
 	btstb		#E3,E_BYTE(%a6)
 	beqs		no_e3_2
 //
 // Clear dirty bit on dest resister in the frame before branching
 // to b1238_fix.
 //
 	bfextu		CMDREG3B(%a6){#6:#3},%d0	//get dest reg no
 	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
 	bsrl		b1238_fix		//test for bug1238 case
 	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
 	orl		#sx_mask,E_BYTE(%a6)
 no_e3_2:
 	moveb		#INEX_VEC,EXC_VEC+1(%a6)
 	moveml         USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx        USER_FP0(%a6),%fp0-%fp3
 	fmoveml        USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore        (%a7)+
 	unlk            %a6
 	bral		real_inex
 unfl_done:
 	bclrb		#E3,E_BYTE(%a6)
 	beqs		e1_set		//if set then branch
 //
 // Clear dirty bit on dest resister in the frame before branching
 // to b1238_fix.
 //
 	bfextu		CMDREG3B(%a6){#6:#3},%d0		//get dest reg no
 	bclrb		%d0,FPR_DIRTY_BITS(%a6)	//clr dest dirty bit
 	bsrl		b1238_fix		//test for bug1238 case
 	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
 	orl		#sx_mask,E_BYTE(%a6)
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	frestore	(%a7)+
 	unlk		%a6
 	bral		fpsp_done
 e1_set:
 	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
 	fmovemx	USER_FP0(%a6),%fp0-%fp3
 	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
 	unlk		%a6
 	bral		fpsp_done
 //
 //	unf_res --- underflow result calculation
 //
 unf_res:
 	bsrl		g_rndpr		//returns RND_PREC in d0 0=ext,
 //					;1=sgl, 2=dbl
 //					;we need the RND_PREC in the
 //					;upper word for round
 	movew		#0,-(%a7)	
 	movew		%d0,-(%a7)	//copy RND_PREC to stack
 //
 //
 // If the exception bit set is E3, the exceptional operand from the
 // fpu is in WBTEMP; else it is in FPTEMP.
 //
 	btstb		#E3,E_BYTE(%a6)
 	beqs		unf_E1
 unf_E3:
 	lea		WBTEMP(%a6),%a0	//a0 now points to operand
 //
 // Test for fsgldiv and fsglmul.  If the inst was one of these, then
 // force the precision to extended for the denorm routine.  Use
 // the user's precision for the round routine.
 //
 	movew		CMDREG3B(%a6),%d1	//check for fsgldiv or fsglmul
 	andiw		#0x7f,%d1
 	cmpiw		#0x30,%d1		//check for sgldiv
 	beqs		unf_sgl
 	cmpiw		#0x33,%d1		//check for sglmul
 	bnes		unf_cont	//if not, use fpcr prec in round
 unf_sgl:
 	clrl		%d0
 	movew		#0x1,(%a7)	//override g_rndpr precision
 //					;force single
 	bras		unf_cont
 unf_E1:
 	lea		FPTEMP(%a6),%a0	//a0 now points to operand
 unf_cont:
 	bclrb		#sign_bit,LOCAL_EX(%a0)	//clear sign bit
 	sne		LOCAL_SGN(%a0)		//store sign
 	bsrl		denorm		//returns denorm, a0 points to it
 //
 // WARNING:
 //				;d0 has guard,round sticky bit
 //				;make sure that it is not corrupted
 //				;before it reaches the round subroutine
 //				;also ensure that a0 isn't corrupted
 //
 // Set up d1 for round subroutine d1 contains the PREC/MODE
 // information respectively on upper/lower register halves.
 //
 	bfextu		FPCR_MODE(%a6){#2:#2},%d1	//get mode from FPCR
 //						;mode in lower d1
 	addl		(%a7)+,%d1		//merge PREC/MODE
 //
 // WARNING: a0 and d0 are assumed to be intact between the denorm and
 // round subroutines. All code between these two subroutines
 // must not corrupt a0 and d0.
 //
 //
 // Perform Round	
 //	Input:		a0 points to input operand
 //			d0{31:29} has guard, round, sticky
 //			d1{01:00} has rounding mode
 //			d1{17:16} has rounding precision
 //	Output:		a0 points to rounded operand
 //
 	bsrl		round		//returns rounded denorm at (a0)
 //
 // Differentiate between store to memory vs. store to register
 //
 unf_store:
 	bsrl		g_opcls		//returns opclass in d0{2:0}
 	cmpib		#0x3,%d0
 	bnes		not_opc011
 //
 // At this point, a store to memory is pending
 //
 opc011:
 	bsrl		g_dfmtou
 	tstb		%d0
 	beqs		ext_opc011	//If extended, do not subtract
 // 				;If destination format is sgl/dbl, 
 	tstb		LOCAL_HI(%a0)	//If rounded result is normal,don't
 //					;subtract
 	bmis		ext_opc011
 	subqw		#1,LOCAL_EX(%a0)	//account for denorm bias vs.
 //				;normalized bias
 //				;          normalized   denormalized
 //				;single       $7f           $7e
 //				;double       $3ff          $3fe
 //
 ext_opc011:
 	bsrl		store		//stores to memory
 	bras		unf_done	//finish up
 //
 // At this point, a store to a float register is pending
 //
 not_opc011:
 	bsrl		store	//stores to float register
 //				;a0 is not corrupted on a store to a
 //				;float register.
 //
 // Set the condition codes according to result
 //
 	tstl		LOCAL_HI(%a0)	//check upper mantissa
 	bnes		ck_sgn
 	tstl		LOCAL_LO(%a0)	//check lower mantissa
 	bnes		ck_sgn
 	bsetb		#z_bit,FPSR_CC(%a6) //set condition codes if zero
 ck_sgn:
 	btstb 		#sign_bit,LOCAL_EX(%a0)	//check the sign bit
 	beqs		unf_done
 	bsetb		#neg_bit,FPSR_CC(%a6)
 //
 // Finish.  
 //
 unf_done:
 	btstb		#inex2_bit,FPSR_EXCEPT(%a6)
 	beqs		no_aunfl
 	bsetb		#aunfl_bit,FPSR_AEXCEPT(%a6)
 no_aunfl:
 	rts
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/x_unimp.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_unimp.s
@@ -0,0 +1,77 @@
 //
 //	x_unimp.sa 3.3 7/1/91
 //
 //	fpsp_unimp --- FPSP handler for unimplemented instruction	
 //	exception.
 //
 // Invoked when the user program encounters a floating-point
 // op-code that hardware does not support.  Trap vector# 11
 // (See table 8-1 MC68030 User's Manual).
 //
 // 
 // Note: An fsave for an unimplemented inst. will create a short
 // fsave stack.
 //
 //  Input: 1. Six word stack frame for unimplemented inst, four word
 //            for illegal
 //            (See table 8-7 MC68030 User's Manual).
 //         2. Unimp (short) fsave state frame created here by fsave
 //            instruction.
 //
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 X_UNIMP:	//idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	|xref	get_op
 	|xref	do_func
 	|xref	sto_res
 	|xref	gen_except
 	|xref	fpsp_fmt_error
 	.global	fpsp_unimp
 	.global	uni_2
 fpsp_unimp:
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%a7)
 uni_2:
 	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
 	fmovemx	%fp0-%fp3,USER_FP0(%a6)
 	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 	moveb		(%a7),%d0		//test for valid version num
 	andib		#0xf0,%d0		//test for $4x
 	cmpib		#VER_4,%d0	//must be $4x or exit
 	bnel		fpsp_fmt_error
 //
 //	Temporary D25B Fix
 //	The following lines are used to ensure that the FPSR
 //	exception byte and condition codes are clear before proceeding
 //
 	movel		USER_FPSR(%a6),%d0
 	andl		#0xFF00FF,%d0	//clear all but accrued exceptions
 	movel		%d0,USER_FPSR(%a6)
 	fmovel		#0,%FPSR //clear all user bits
 	fmovel		#0,%FPCR	//clear all user exceptions for FPSP
 	clrb		UFLG_TMP(%a6)	//clr flag for unsupp data
 	bsrl		get_op		//go get operand(s)
 	clrb		STORE_FLG(%a6)
 	bsrl		do_func		//do the function
 	fsave		-(%a7)		//capture possible exc state
 	tstb		STORE_FLG(%a6)
 	bnes		no_store	//if STORE_FLG is set, no store
 	bsrl		sto_res		//store the result in user space
 no_store:
 	bral		gen_except	//post any exceptions and return
 	|end
--- a/c/src/lib/libcpu/m68k/m68040/fpsp/x_unsupp.s
+++ b/c/src/lib/libcpu/m68k/m68040/fpsp/x_unsupp.s
@@ -0,0 +1,83 @@
 //
 //	x_unsupp.sa 3.3 7/1/91
 //
 //	fpsp_unsupp --- FPSP handler for unsupported data type exception
 //
 // Trap vector #55	(See table 8-1 Mc68030 User's manual).	
 // Invoked when the user program encounters a data format (packed) that
 // hardware does not support or a data type (denormalized numbers or un-
 // normalized numbers).
 // Normalizes denorms and unnorms, unpacks packed numbers then stores 
 // them back into the machine to let the 040 finish the operation.  
 //
 // Unsupp calls two routines:
 // 	1. get_op -  gets the operand(s)
 // 	2. res_func - restore the function back into the 040 or
 // 			if fmove.p fpm,<ea> then pack source (fpm)
 // 			and store in users memory <ea>.
 //
 //  Input: Long fsave stack frame
 //
 //
 //		Copyright (C) Motorola, Inc. 1990
 //			All Rights Reserved
 //
 //	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
 //	The copyright notice above does not evidence any  
 //	actual or intended publication of such source code.
 X_UNSUPP:	//idnt    2,1 | Motorola 040 Floating Point Software Package
 	|section	8
 	.include "fpsp.defs"
 	|xref	get_op
 	|xref	res_func
 	|xref	gen_except
 	|xref	fpsp_fmt_error
 	.global	fpsp_unsupp
 fpsp_unsupp:
 //
 	link		%a6,#-LOCAL_SIZE
 	fsave		-(%a7)
 	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
 	fmovemx	%fp0-%fp3,USER_FP0(%a6)
 	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
 	moveb		(%a7),VER_TMP(%a6) //save version number
 	moveb		(%a7),%d0		//test for valid version num
 	andib		#0xf0,%d0		//test for $4x
 	cmpib		#VER_4,%d0	//must be $4x or exit
 	bnel		fpsp_fmt_error
 	fmovel		#0,%FPSR		//clear all user status bits
 	fmovel		#0,%FPCR		//clear all user control bits
 //
 //	The following lines are used to ensure that the FPSR
 //	exception byte and condition codes are clear before proceeding,
 //	except in the case of fmove, which leaves the cc's intact.
 //
 unsupp_con:
 	movel		USER_FPSR(%a6),%d1
 	btst		#5,CMDREG1B(%a6)	//looking for fmove out
 	bne		fmove_con
 	andl		#0xFF00FF,%d1	//clear all but aexcs and qbyte
 	bras		end_fix
 fmove_con:
 	andl		#0x0FFF40FF,%d1	//clear all but cc's, snan bit, aexcs, and qbyte
 end_fix:
 	movel		%d1,USER_FPSR(%a6)
 	st		UFLG_TMP(%a6)	//set flag for unsupp data
 	bsrl		get_op		//everything okay, go get operand(s)
 	bsrl		res_func	//fix up stack frame so can restore it
 	clrl		-(%a7)
 	moveb		VER_TMP(%a6),(%a7) //move idle fmt word to top of stack
 	bral		gen_except
 //
 	|end