Doug Gilmore's post earlier today on an optimizer change in the Open64
compiler suite to speed integer division reminded me that it is worth
posting pointers to published research on that problem from the last
couple of decades, with the most recent from January 2011. The
entries below are culled from the bibliography of floating-point
arithmetic
http://www.math.utah.edu/pub/tex/bib/index-table-f.html#fparith
and sorted from oldest (first) to newest (last), with cross-referenced
proceedings volumes at the end:
@String{j-AUSTRALIAN-COMP-J = "Australian Computer Journal"}
@String{j-COMP-J = "The Computer Journal"}
@String{j-SIGPLAN = "ACM SIGPLAN Notices"}
@String{pub-AW = "Ad{\-d}i{\-s}on-Wes{\-l}ey"}
@String{pub-AW:adr = "Reading, MA, USA"}
@String{pub-IEEE = "IEEE Computer Society Press"}
@String{pub-IEEE:adr = "1109 Spring Street, Suite 300, Silver
Spring, MD 20910, USA"}
@String{pub-SV = "Spring{\-}er-Ver{\-}lag"}
@String{pub-SV:adr = "Berlin, Germany~/ Heidelberg,
Germany~/ London, UK~/ etc."}
@Article{Vowels:1992:D,
author = "R. A. Vowels",
title = "Division by 10",
journal = j-AUSTRALIAN-COMP-J,
volume = "24",
number = "3",
pages = "81--85",
month = aug,
year = "1992",
CODEN = "ACMJB2",
ISSN = "0004-8917",
bibdate = "Tue Dec 12 09:27:13 MST 1995",
abstract = "Division of a binary integer and a binary
floating-point mantissa by 10 can be performed with
shifts and adds, yielding a significant improvement in
hardware execution time, and in software execution time
if no hardware divide instruction is available. Several
algorithms are given, appropriate to specific machine
word sizes, hardware and hardware instructions
available, and depending on whether a remainder is
required. The integer division algorithms presented
here contain a new strategy that produces the correct
quotient directly, without the need for the
supplementary correction required of
previously-published algorithms. The algorithms are
competitive in time with binary coded decimal (BCD)
divide by 10. Both the integer and floating-point
algorithms are an order of magnitude faster than
conventional division.",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., R. Melbourne Inst. of Technol.
Ltd., Vic., Australia",
classification = "C7310 (Mathematics)",
keywords = "Binary integer; decimal floating-point arithmetic;
Floating-point algorithms; Hardware execution time;
Integer division algorithms; Software execution time",
pubcountry = "Australia",
thesaurus = "Digital arithmetic; Mathematics computing",
}
@Article{Granlund:1994:DII,
author = "Torbj{\"o}rn Granlund and Peter L. Montgomery",
title = "Division by invariant integers using multiplication",
journal = j-SIGPLAN,
volume = "29",
number = "6",
pages = "61--72",
month = jun,
year = "1994",
CODEN = "SINODQ",
DOI = "http://doi.acm.org/10.1145/178243.178249",
ISBN = "0-89791-598-4",
ISBN-13 = "978-0-89791-598-4",
ISSN = "0362-1340",
bibdate = "Sun Dec 14 09:16:51 MST 2003",
bibsource = "http://portal.acm.org/;
http://www.acm.org/pubs/contents/proceedings/pldi/178243/index.html",
URL = "ftp://ftp.cwi.nl/pub/pmontgom/divcnst.psa4.gz;
ftp://ftp.cwi.nl/pub/pmontgom/divcnst.psl.gz;
http://www.acm.org:80/pubs/citations/proceedings/pldi/178243/p61-granlund/",
abstract = "Integer division remains expensive on today's
processors as the cost of integer multiplication
declines. We present code sequences for division by
arbitrary nonzero integer constants and run-time
invariants using integer multiplication. The algorithms
assume a two's complement architecture. Most also
require that the upper half of an integer product be
quickly accessible. We treat unsigned division, signed
division where the quotient rounds towards zero, signed
division where the quotient rounds towards $-\infty$,
and division where the result is known a priori to be
exact. We give some implementation results using the C
compiler GCC.",
acknowledgement = ack-nhfb,
affiliation = "Cygnus Support, Mountain View, CA, USA",
annote = "Published as part of the Proceedings of PLDI'94.",
classification = "C5230 (Digital arithmetic methods); C6110 (Systems
analysis and programming); C6150C (Compilers,
interpreters and other processors)",
confdate = "20-24 June 1994",
conflocation = "Orlando, FL, USA",
confsponsor = "ACM",
conftitle = "ACM SIGPLAN '94 Conference on Programming Language
Design and Implementation (PLDI)",
corpsource = "Cygnus Support, Mountain View, CA, USA",
keywords = "algorithms; Arbitrary nonzero integer constants;
arbitrary nonzero integer constants; C compiler; Code
sequences; code sequences; digital arithmetic; Floating
point arithmetic; floating point arithmetic; GCC;
Integer division; integer division; Integer
multiplication; integer multiplication; Invariant
integers; invariant integers; mathematics computing;
Multiplication; multiplication; performance; program;
program compilers; programming; reduced instruction set
computing; RISC processors; Run-time invariants;
run-time invariants; Signed division; signed division;
Two's complement architecture; two's complement
architecture; Unsigned division; unsigned division",
sponsororg = "ACM",
subject = "{\bf G.1.0} Mathematics of Computing, NUMERICAL
ANALYSIS, General, Computer arithmetic. {\bf F.2.1}
Theory of Computation, ANALYSIS OF ALGORITHMS AND
PROBLEM COMPLEXITY, Numerical Algorithms and Problems.
{\bf D.3.4} Software, PROGRAMMING LANGUAGES,
Processors, Compilers.",
thesaurus = "Digital arithmetic; Mathematics computing; Program
compilers; Programming; Reduced instruction set
computing",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Sheldon:2003:SRI,
author = "Jeffrey Sheldon and Walter Lee and Ben Greenwald and
Saman Amarasinghe",
title = "Strength Reduction of Integer Division and Modulo
Operations",
crossref = "Dietz:2003:LCP",
pages = "254--273",
year = "2003",
bibdate = "Fri Jun 24 12:09:31 2005",
URL = "http://www.springerlink.com/link.asp?id=3hfwyuyjxkf23nd2;
http://www.springerlink.com/openurl.asp?genre=article&issn=0302-9743&volume=2624&spage=254",
abstract = "Integer division, modulo, and remainder operations are
expressive and useful operations. They are logical
candidates to express complex data accesses such as the
wrap-around behavior in queues using ring buffers. In
addition, they appear frequently in address
computations as a result of compiler optimizations that
improve data locality, perform data distribution, or
enable parallelization. Experienced application
programmers, however, avoid them because they are slow.
Furthermore, while advances in both hardware and
software have improved the performance of many parts of
a program, few are applicable to division and modulo
operations. This trend makes these operations
increasingly detrimental to program performance. This
paper describes a suite of optimizations for
eliminating division, modulo, and remainder operations
from programs. These techniques are analogous to
strength reduction techniques used for multiplications.
In addition to some algebraic simplifications, we
present a set of optimization techniques that
eliminates division and modulo operations that are
functions of loop induction variables and loop
constants. The optimizations rely on algebra, integer
programming, and loop transformations.",
acknowledgement = ack-nhfb,
}
@Book{Warren:2003:HD,
author = "Henry S. Warren",
title = "Hacker's delight",
publisher = pub-AW,
address = pub-AW:adr,
pages = "xiv + 306",
year = "2003",
ISBN = "0-201-91465-4",
ISBN-13 = "978-0-201-91465-8",
LCCN = "QA76.6 .W375 2003",
bibdate = "Tue Jan 03 18:20:34 2006",
bibsource = "z3950.loc.gov:7090/Voyager",
note = "While this book does not specifically address
computational aspects of floating-point arithmetic
(apart from the nine-page Chapter 15), it has extensive
coverage of, and clever algorithms for, integer
arithmetic operations that are fundamental for
implementing hardware floating-arithmetic and software
multiple-precision arithmetic.",
URL =
"http://www.awprofessional.com/bookstore/product.asp?isbn=0201914654;
http://www.hackersdelight.org/;
http://www.hackersdelight.org/hackerTOC.pdf;
http://www.informit.com/content/images/chap3_0201914654/elementLinks/0201914654.pdf",
...
}
@InProceedings{Robison:2005:BUD,
author = "Arch Robison",
title = "{$N$}-Bit Unsigned Division Via {$N$}-Bit
Multiply-Add",
crossref = "Montuschi:2005:PIS",
pages = "??--??",
year = "2005",
bibdate = "Wed Jun 22 07:02:55 2005",
URL = "http://arith17.polito.it/final/paper-104.pdf",
abstract = "Integer division on modern processors is expensive
compared to multiplication. Previous algorithms for
performing unsigned division by an invariant divisor,
via reciprocal approximation, suffer in the worst case
from a common requirement for $n+1$ bit multiplication,
which typically must be synthesized from $n$-bit
multiplication and extra arithmetic operations. This
paper presents, and proves, a hybrid of previous
algorithms that replaces $n+1$ bit multiplication with
a single fused multiply-add operation on $n$-bit
operands, thus reducing any $n$-bit unsigned division
to the upper $n$ bits of a multiply-add, followed by a
single right shift. An additional benefit is that the
prerequisite calculations are simple and fast. On the
Itanium 2 processor, the technique is advantageous for
as few as two quotients that share a common run-time
divisor.",
acknowledgement = ack-nhfb,
keywords = "ARITH-17",
pagecount = "9",
}
@Article{Cavagnino:2008:EAI,
author = "D. Cavagnino and A. E. Werbrouck",
title = "Efficient Algorithms for Integer Division by Constants
Using Multiplication",
journal = j-COMP-J,
volume = "51",
number = "4",
pages = "470--480",
month = jul,
year = "2008",
CODEN = "CMPJA6",
DOI = "http://dx.doi.org/10.1093/comjnl/bxm082",
ISSN = "0010-4620",
bibdate = "Wed Apr 28 14:33:34 MDT 2010",
bibsource =
"http://comjnl.oxfordjournals.org/content/vol51/issue4/index.dtl",
URL =
"http://comjnl.oxfordjournals.org/cgi/content/abstract/51/4/470;
http://comjnl.oxfordjournals.org/cgi/content/full/51/4/470;
http://comjnl.oxfordjournals.org/cgi/reprint/51/4/470",
abstract = "We present a complete analysis of the integer division
of a single unsigned dividend word by a single unsigned
divisor word based on double-word multiplication of the
dividend by an inverse of the divisor. The well-known
advantage of this method yields run-time efficiency, if
the inverse of the divisor can be calculated at compile
time, since multiplication is much faster than division
in arithmetic units. Our analysis leads to the
discovery of a limit to the straightforward application
of this method in the form of a critical dividend,
which fortunately associates with a minority of the
possible divisors (20\%) and defines only a small upper
part of the available dividend space. We present two
algorithms for ascertaining whether a critical dividend
exists and, if so, its value along with a circumvention
of this limit. For completeness, we include an
algorithm for integer division of a unsigned
double-word dividend by an unsigned single-word divisor
in which the quotient is not limited to a single word
and the remainder is an intrinsic part of the result.",
acknowledgement = ack-nhfb,
keywords = "integer division; multiplicative inverse; division by
multiplication; efficiency; integer constants",
remark = "See \cite{Cavagnino:2011:AAD}.",
}
@Article{Cavagnino:2011:AAD,
author = "D. Cavagnino and A. E. Werbrouck",
title = "An Analysis of Associated Dividends in the {DBM}
Algorithm for Division by Constants Using
Multiplication",
journal = j-COMP-J,
volume = "54",
number = "1",
pages = "148--156",
month = jan,
year = "2011",
CODEN = "CMPJA6",
DOI = "http://dx.doi.org/10.1093/comjnl/bxp117",
ISSN = "0010-4620",
bibdate = "Tue Dec 21 19:26:47 MST 2010",
bibsource = "http://comjnl.oxfordjournals.org/content/54/1.toc",
URL =
"http://comjnl.oxfordjournals.org/content/54/1/148.full.pdf+html",
abstract = "When a compiler encounters a fixed integer divisor,
which is not a power of 2, it can calculate its inverse
to be multiplied by the run-time integer dividends to
obtain the quotients, using our very efficient,
recently published [Cavagnino, D. and Werbrouck, A.E.
(2008) {\em Efficient algorithms for integer division
by constants using multiplication}. Comp. J., 51,
470--480] division by multiplication algorithms.
Essentially our algorithms permit a complete partition
of a defined number space into non-adverse and adverse
divisors on the basis of whether a dividend associated
with each divisor is, or is not, greater than the
maximum dividend size. In this paper, we demonstrate
useful relations between the dividends associated with
all divisors and with their multiples by positive
powers",
acknowledgement = ack-nhfb,
fjournal = "Computer Journal",
keywords = "dividend distribution; division by multiplication
(DBM); integer division; multiplicative division",
onlinedate = "December 30, 2009",
remark = "See \cite{Cavagnino:2008:EAI}.",
}
@Proceedings{Dietz:2003:LCP,
editor = "Henry G. Dietz",
booktitle = "{Languages and Compilers for Parallel Computing: 14th
International Workshop, LCPC 2001, Cumberland Falls,
KY, USA, August 1--3, 2001: Revised Papers}",
title = "{Languages and Compilers for Parallel Computing: 14th
International Workshop, LCPC 2001, Cumberland Falls,
KY, USA, August 1--3, 2001: Revised Papers}",
volume = "2624",
publisher = pub-SV,
address = pub-SV:adr,
pages = "ix + 444",
year = "2003",
CODEN = "LNCSD9",
DOI = "????",
ISBN = "3-540-04029-3 (paperback)",
ISBN-13 = "978-3-540-04029-3 (paperback)",
ISSN = "0302-9743",
LCCN = "QA76.58 .W656 2001",
bibdate = "Thu Aug 21 09:09:03 MDT 2003",
note = "The 14th workshop on Languages and Compilers for
Parallel Computing, LCPC 2001, was organized and hosted
by the Electrical and Computer Engineering Department
of the University of Kentucky, Lexington, KY, USA.",
series = ser-LNCS,
URL =
"http://link.springer-ny.com/link/service/series/0558/tocs/t2624.htm;
http://www.springerlink.com/openurl.asp?genre=issue&issn=0302-9743&volume=2624",
acknowledgement = ack-nhfb,
keywords = "compilers (computer programs); parallel processing
(electronic computers); programming languages
(electronic computers)",
}
@Proceedings{Montuschi:2005:PIS,
editor = "Paolo Montuschi and Eric (Eric Mark) Schwarz",
booktitle = "{Proceedings of the 17th IEEE Symposium on Computer
Arithmetic, ARITH-17 2005, June 27--29, 2005, Cape Cod,
Massachusetts, USA}",
title = "{Proceedings of the 17th IEEE Symposium on Computer
Arithmetic, ARITH-17 2005, June 27--29, 2005, Cape Cod,
Massachusetts, USA}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xii + 298",
year = "2005",
ISBN = "0-7695-2366-8",
ISBN-13 = "978-0-7695-2366-8",
LCCN = "QA76.9.C62 .S95 2005",
bibdate = "Thu Sep 14 12:30:26 2006",
bibsource = "http://arith17.polito.it/",
acknowledgement = ack-nhfb,
keywords = "ARITH-17",
}
-------------------------------------------------------------------------------
- Nelson H. F. Beebe Tel: +1 801 581 5254 -
- University of Utah FAX: +1 801 581 4148 -
- Department of Mathematics, 110 LCB Internet e-mail: [email protected] -
- 155 S 1400 E RM 233 [email protected] [email protected] -
- Salt Lake City, UT 84112-0090, USA URL: http://www.math.utah.edu/~beebe/ -
-------------------------------------------------------------------------------
------------------------------------------------------------------------------
Create and publish websites with WebMatrix
Use the most popular FREE web apps or write code yourself;
WebMatrix provides all the features you need to develop and
publish your website. http://p.sf.net/sfu/ms-webmatrix-sf
_______________________________________________
Open64-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/open64-devel