Module Name:    src
Committed By:   uwe
Date:           Thu Aug  4 03:20:10 UTC 2011

Modified Files:
        src/sys/lib/libkern/arch/sh3: Makefile.inc
Added Files:
        src/sys/lib/libkern/arch/sh3: sdivsi3_i4i.S udivsi3_i4i.S

Log Message:
For unsignad integer division gcc used to emit a call to __udivsi3
"millicode" function that uses compiler-private ABI.  Newer gcc uses
heavily tuned __udivsi3_i4i that is NOT compatible with __udivsi3
because it's expected to clobber different registers.  We don't want
to link the kernel against libgcc and we don't have resources to write
heavily tuned version ourselves, so clone __udivsi3 but adjust it to
conform to the __udivsi3_i4i clobber spec.

Ditto for signed division.

You can make gcc use old routines with -mdiv=call-div1 to avoid few
extra instructions to save/restore the right registers in the signed
division funcion.


To generate a diff of this commit:
cvs rdiff -u -r1.18 -r1.19 src/sys/lib/libkern/arch/sh3/Makefile.inc
cvs rdiff -u -r0 -r1.1 src/sys/lib/libkern/arch/sh3/sdivsi3_i4i.S \
    src/sys/lib/libkern/arch/sh3/udivsi3_i4i.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/lib/libkern/arch/sh3/Makefile.inc
diff -u src/sys/lib/libkern/arch/sh3/Makefile.inc:1.18 src/sys/lib/libkern/arch/sh3/Makefile.inc:1.19
--- src/sys/lib/libkern/arch/sh3/Makefile.inc:1.18	Fri Aug 14 19:23:54 2009
+++ src/sys/lib/libkern/arch/sh3/Makefile.inc	Thu Aug  4 03:20:09 2011
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile.inc,v 1.18 2009/08/14 19:23:54 dsl Exp $
+#	$NetBSD: Makefile.inc,v 1.19 2011/08/04 03:20:09 uwe Exp $
 
 SRCS+=	ffs.S
 SRCS+=	memset.S
@@ -6,4 +6,7 @@
 SRCS+=	ashiftrt.S ashlsi3.S ashrsi3.S lshrsi3.S movstr.S movstr_i4.S
 SRCS+=	movstrSI.S movstrSI12_i4.S mulsi3.S sdivsi3.S udivsi3.S
 
+# newer gcc uses different integer division millicode by default
+SRCS+=  sdivsi3_i4i.S udivsi3_i4i.S
+
 NO_SRCS+= bswap64.c

Added files:

Index: src/sys/lib/libkern/arch/sh3/sdivsi3_i4i.S
diff -u /dev/null src/sys/lib/libkern/arch/sh3/sdivsi3_i4i.S:1.1
--- /dev/null	Thu Aug  4 03:20:10 2011
+++ src/sys/lib/libkern/arch/sh3/sdivsi3_i4i.S	Thu Aug  4 03:20:09 2011
@@ -0,0 +1,85 @@
+/*	$NetBSD: sdivsi3_i4i.S,v 1.1 2011/08/04 03:20:09 uwe Exp $	*/
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)udivsi3.s	5.1 (Berkeley) 5/15/90
+ */
+
+#include <machine/asm.h>
+#if defined(LIBC_SCCS)
+	RCSID("$NetBSD: sdivsi3_i4i.S,v 1.1 2011/08/04 03:20:09 uwe Exp $")
+#endif
+
+/* See comments in udivsi3_i4i.S */
+
+#ifdef __ELF__
+	.hidden __sdivsi3_i4i
+#endif
+
+
+/* r0 <= r4 / r5 */
+NENTRY(__sdivsi3_i4i)
+	mov	r4, r0
+	mov	r5, r1
+
+	tst	r1, r1
+	bt	.L_div_by_zero
+
+	!! this version cannot clobber r2 and r3, but can clobber macl/mach
+	lds	r2, macl
+	lds	r3, mach
+
+	mov	#0, r2
+	div0s	r2, r0
+	subc	r3, r3
+	subc	r2, r0
+	div0s	r1, r3
+#define DIVSTEP	rotcl r0; div1 r1, r3
+	/* repeat 32 times */
+	DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP;
+	DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP;
+	DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP;
+	DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP;
+#undef DIVSTEP
+	rotcl	r0
+
+	addc	r2, r0
+
+	sts	mach, r3
+	sts	macl, r2
+
+	rts
+	 nop
+
+.L_div_by_zero:
+	rts
+	 mov	#0, r0
Index: src/sys/lib/libkern/arch/sh3/udivsi3_i4i.S
diff -u /dev/null src/sys/lib/libkern/arch/sh3/udivsi3_i4i.S:1.1
--- /dev/null	Thu Aug  4 03:20:10 2011
+++ src/sys/lib/libkern/arch/sh3/udivsi3_i4i.S	Thu Aug  4 03:20:09 2011
@@ -0,0 +1,102 @@
+/*	$NetBSD: udivsi3_i4i.S,v 1.1 2011/08/04 03:20:09 uwe Exp $	*/
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)udivsi3.s	5.1 (Berkeley) 5/15/90
+ */
+
+#include <machine/asm.h>
+#if defined(LIBC_SCCS)
+	RCSID("$NetBSD: udivsi3_i4i.S,v 1.1 2011/08/04 03:20:09 uwe Exp $")
+#endif
+
+/*
+ * XXX: uwe: this is a kludge...
+ *
+ * For unsigned integer division gcc used to emit a call to __udivsi3
+ * "millicode" function that uses compiler-private ABI.  Newer gcc
+ * uses heavily tuned __udivsi3_i4i that is NOT compatible with
+ * __udivsi3 because it's expected to clobber different registers.  We
+ * don't want to link the kernel against libgcc and we don't have
+ * resources to write heavily tuned version ourselves, so clone
+ * __udivsi3 but adjust it to conform to the __udivsi3_i4i clobber
+ * spec.
+ */
+
+/*
+ * IMPOTANT: This function is special.
+ *
+ * This function is an auxiliary function that is referenced by the
+ * code generated by gcc for integer division.  But gcc does NOT treat
+ * a call to this function as an ordinary function call w.r.t. the set
+ * of register this call clobbers.  See the definition of "udivsi3_i1"
+ * in gcc/config/sh/sh.md.
+ *
+ * Any call to this function MUST NOT clobber any registers besides r4
+ * and r0, where the result is returned.  At the time of the call the
+ * r4 contains the first argument, so we are only left with r0, and we
+ * cannot do anything meaningful using only one register.  The
+ * consequences are:
+ *
+ * . this function cannot have _PROF_PROLOGUE
+ * . this function cannot be called via PLT
+ */
+
+
+#ifdef __ELF__
+	.hidden __udivsi3_i4i
+#endif
+
+
+/* r0 <= r4 / r5 */
+NENTRY(__udivsi3_i4i)
+	tst	r5, r5
+	bt	.L_div_by_zero
+
+	mov	r4, r1		! cannot clobber r4 in this version
+	mov	#0, r0
+	div0u
+#define DIVSTEP	rotcl r1; div1 r5, r0
+	/* repeat 32 times */
+	DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP;
+	DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP;
+	DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP;
+	DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP; DIVSTEP;
+#undef DIVSTEP
+	rotcl	r1
+
+	rts
+	 mov	r1, r0
+
+.L_div_by_zero:
+	rts
+	 mov	#0, r0

Reply via email to