Module Name: src Committed By: uwe Date: Thu Aug 4 03:20:10 UTC 2011
Modified Files: src/sys/lib/libkern/arch/sh3: Makefile.inc Added Files: src/sys/lib/libkern/arch/sh3: sdivsi3_i4i.S udivsi3_i4i.S Log Message: For unsignad integer division gcc used to emit a call to __udivsi3 "millicode" function that uses compiler-private ABI. Newer gcc uses heavily tuned __udivsi3_i4i that is NOT compatible with __udivsi3 because it's expected to clobber different registers. We don't want to link the kernel against libgcc and we don't have resources to write heavily tuned version ourselves, so clone __udivsi3 but adjust it to conform to the __udivsi3_i4i clobber spec. Ditto for signed division. You can make gcc use old routines with -mdiv=call-div1 to avoid few extra instructions to save/restore the right registers in the signed division funcion. To generate a diff of this commit: cvs rdiff -u -r1.18 -r1.19 src/sys/lib/libkern/arch/sh3/Makefile.inc cvs rdiff -u -r0 -r1.1 src/sys/lib/libkern/arch/sh3/sdivsi3_i4i.S \ src/sys/lib/libkern/arch/sh3/udivsi3_i4i.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/lib/libkern/arch/sh3/Makefile.inc diff -u src/sys/lib/libkern/arch/sh3/Makefile.inc:1.18 src/sys/lib/libkern/arch/sh3/Makefile.inc:1.19 --- src/sys/lib/libkern/arch/sh3/Makefile.inc:1.18 Fri Aug 14 19:23:54 2009 +++ src/sys/lib/libkern/arch/sh3/Makefile.inc Thu Aug 4 03:20:09 2011 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile.inc,v 1.18 2009/08/14 19:23:54 dsl Exp $ +# $NetBSD: Makefile.inc,v 1.19 2011/08/04 03:20:09 uwe Exp $ SRCS+= ffs.S SRCS+= memset.S @@ -6,4 +6,7 @@ SRCS+= ashiftrt.S ashlsi3.S ashrsi3.S lshrsi3.S movstr.S movstr_i4.S SRCS+= movstrSI.S movstrSI12_i4.S mulsi3.S sdivsi3.S udivsi3.S +# newer gcc uses different integer division millicode by default +SRCS+= sdivsi3_i4i.S udivsi3_i4i.S + NO_SRCS+= bswap64.c Added files: Index: src/sys/lib/libkern/arch/sh3/sdivsi3_i4i.S diff -u /dev/null src/sys/lib/libkern/arch/sh3/sdivsi3_i4i.S:1.1 --- /dev/null Thu Aug 4 03:20:10 2011 +++ src/sys/lib/libkern/arch/sh3/sdivsi3_i4i.S Thu Aug 4 03:20:09 2011 @@ -0,0 +1,85 @@ +/* $NetBSD: sdivsi3_i4i.S,v 1.1 2011/08/04 03:20:09 uwe Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)udivsi3.s 5.1 (Berkeley) 5/15/90 + */ + +#include <machine/asm.h> +#if defined(LIBC_SCCS) + RCSID("$NetBSD: sdivsi3_i4i.S,v 1.1 2011/08/04 03:20:09 uwe Exp $") +#endif + +/* See comments in udivsi3_i4i.S */ + +#ifdef __ELF__ + .hidden __sdivsi3_i4i +#endif + + +/* r0 <= r4 / r5 */ +NENTRY(__sdivsi3_i4i) + mov r4, r0 + mov r5, r1 + + tst r1, r1 + bt .L_div_by_zero + + !! this version cannot clobber r2 and r3, but can clobber macl/mach + lds r2, macl + lds r3, mach + + mov #0, r2 + div0s r2, r0 + subc r3, r3 + subc r2, r0 + div0s r1, r3 +#define DIVSTEP rotcl r0; div1 r1, r3 + /* repeat 32 times */ + DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; + DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; + DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; + DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; +#undef DIVSTEP + rotcl r0 + + addc r2, r0 + + sts mach, r3 + sts macl, r2 + + rts + nop + +.L_div_by_zero: + rts + mov #0, r0 Index: src/sys/lib/libkern/arch/sh3/udivsi3_i4i.S diff -u /dev/null src/sys/lib/libkern/arch/sh3/udivsi3_i4i.S:1.1 --- /dev/null Thu Aug 4 03:20:10 2011 +++ src/sys/lib/libkern/arch/sh3/udivsi3_i4i.S Thu Aug 4 03:20:09 2011 @@ -0,0 +1,102 @@ +/* $NetBSD: udivsi3_i4i.S,v 1.1 2011/08/04 03:20:09 uwe Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)udivsi3.s 5.1 (Berkeley) 5/15/90 + */ + +#include <machine/asm.h> +#if defined(LIBC_SCCS) + RCSID("$NetBSD: udivsi3_i4i.S,v 1.1 2011/08/04 03:20:09 uwe Exp $") +#endif + +/* + * XXX: uwe: this is a kludge... + * + * For unsigned integer division gcc used to emit a call to __udivsi3 + * "millicode" function that uses compiler-private ABI. Newer gcc + * uses heavily tuned __udivsi3_i4i that is NOT compatible with + * __udivsi3 because it's expected to clobber different registers. We + * don't want to link the kernel against libgcc and we don't have + * resources to write heavily tuned version ourselves, so clone + * __udivsi3 but adjust it to conform to the __udivsi3_i4i clobber + * spec. + */ + +/* + * IMPOTANT: This function is special. + * + * This function is an auxiliary function that is referenced by the + * code generated by gcc for integer division. But gcc does NOT treat + * a call to this function as an ordinary function call w.r.t. the set + * of register this call clobbers. See the definition of "udivsi3_i1" + * in gcc/config/sh/sh.md. + * + * Any call to this function MUST NOT clobber any registers besides r4 + * and r0, where the result is returned. At the time of the call the + * r4 contains the first argument, so we are only left with r0, and we + * cannot do anything meaningful using only one register. The + * consequences are: + * + * . this function cannot have _PROF_PROLOGUE + * . this function cannot be called via PLT + */ + + +#ifdef __ELF__ + .hidden __udivsi3_i4i +#endif + + +/* r0 <= r4 / r5 */ +NENTRY(__udivsi3_i4i) + tst r5, r5 + bt .L_div_by_zero + + mov r4, r1 ! cannot clobber r4 in this version + mov #0, r0 + div0u +#define DIVSTEP rotcl r1; div1 r5, r0 + /* repeat 32 times */ + DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; + DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; + DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; + DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; +#undef DIVSTEP + rotcl r1 + + rts + mov r1, r0 + +.L_div_by_zero: + rts + mov #0, r0