Doug Gilmore's post earlier today on an optimizer change in the Open64 compiler suite to speed integer division reminded me that it is worth posting pointers to published research on that problem from the last couple of decades, with the most recent from January 2011. The entries below are culled from the bibliography of floating-point arithmetic
http://www.math.utah.edu/pub/tex/bib/index-table-f.html#fparith and sorted from oldest (first) to newest (last), with cross-referenced proceedings volumes at the end: @String{j-AUSTRALIAN-COMP-J = "Australian Computer Journal"} @String{j-COMP-J = "The Computer Journal"} @String{j-SIGPLAN = "ACM SIGPLAN Notices"} @String{pub-AW = "Ad{\-d}i{\-s}on-Wes{\-l}ey"} @String{pub-AW:adr = "Reading, MA, USA"} @String{pub-IEEE = "IEEE Computer Society Press"} @String{pub-IEEE:adr = "1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA"} @String{pub-SV = "Spring{\-}er-Ver{\-}lag"} @String{pub-SV:adr = "Berlin, Germany~/ Heidelberg, Germany~/ London, UK~/ etc."} @Article{Vowels:1992:D, author = "R. A. Vowels", title = "Division by 10", journal = j-AUSTRALIAN-COMP-J, volume = "24", number = "3", pages = "81--85", month = aug, year = "1992", CODEN = "ACMJB2", ISSN = "0004-8917", bibdate = "Tue Dec 12 09:27:13 MST 1995", abstract = "Division of a binary integer and a binary floating-point mantissa by 10 can be performed with shifts and adds, yielding a significant improvement in hardware execution time, and in software execution time if no hardware divide instruction is available. Several algorithms are given, appropriate to specific machine word sizes, hardware and hardware instructions available, and depending on whether a remainder is required. The integer division algorithms presented here contain a new strategy that produces the correct quotient directly, without the need for the supplementary correction required of previously-published algorithms. The algorithms are competitive in time with binary coded decimal (BCD) divide by 10. Both the integer and floating-point algorithms are an order of magnitude faster than conventional division.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., R. Melbourne Inst. of Technol. Ltd., Vic., Australia", classification = "C7310 (Mathematics)", keywords = "Binary integer; decimal floating-point arithmetic; Floating-point algorithms; Hardware execution time; Integer division algorithms; Software execution time", pubcountry = "Australia", thesaurus = "Digital arithmetic; Mathematics computing", } @Article{Granlund:1994:DII, author = "Torbj{\"o}rn Granlund and Peter L. Montgomery", title = "Division by invariant integers using multiplication", journal = j-SIGPLAN, volume = "29", number = "6", pages = "61--72", month = jun, year = "1994", CODEN = "SINODQ", DOI = "http://doi.acm.org/10.1145/178243.178249", ISBN = "0-89791-598-4", ISBN-13 = "978-0-89791-598-4", ISSN = "0362-1340", bibdate = "Sun Dec 14 09:16:51 MST 2003", bibsource = "http://portal.acm.org/; http://www.acm.org/pubs/contents/proceedings/pldi/178243/index.html", URL = "ftp://ftp.cwi.nl/pub/pmontgom/divcnst.psa4.gz; ftp://ftp.cwi.nl/pub/pmontgom/divcnst.psl.gz; http://www.acm.org:80/pubs/citations/proceedings/pldi/178243/p61-granlund/", abstract = "Integer division remains expensive on today's processors as the cost of integer multiplication declines. We present code sequences for division by arbitrary nonzero integer constants and run-time invariants using integer multiplication. The algorithms assume a two's complement architecture. Most also require that the upper half of an integer product be quickly accessible. We treat unsigned division, signed division where the quotient rounds towards zero, signed division where the quotient rounds towards $-\infty$, and division where the result is known a priori to be exact. We give some implementation results using the C compiler GCC.", acknowledgement = ack-nhfb, affiliation = "Cygnus Support, Mountain View, CA, USA", annote = "Published as part of the Proceedings of PLDI'94.", classification = "C5230 (Digital arithmetic methods); C6110 (Systems analysis and programming); C6150C (Compilers, interpreters and other processors)", confdate = "20-24 June 1994", conflocation = "Orlando, FL, USA", confsponsor = "ACM", conftitle = "ACM SIGPLAN '94 Conference on Programming Language Design and Implementation (PLDI)", corpsource = "Cygnus Support, Mountain View, CA, USA", keywords = "algorithms; Arbitrary nonzero integer constants; arbitrary nonzero integer constants; C compiler; Code sequences; code sequences; digital arithmetic; Floating point arithmetic; floating point arithmetic; GCC; Integer division; integer division; Integer multiplication; integer multiplication; Invariant integers; invariant integers; mathematics computing; Multiplication; multiplication; performance; program; program compilers; programming; reduced instruction set computing; RISC processors; Run-time invariants; run-time invariants; Signed division; signed division; Two's complement architecture; two's complement architecture; Unsigned division; unsigned division", sponsororg = "ACM", subject = "{\bf G.1.0} Mathematics of Computing, NUMERICAL ANALYSIS, General, Computer arithmetic. {\bf F.2.1} Theory of Computation, ANALYSIS OF ALGORITHMS AND PROBLEM COMPLEXITY, Numerical Algorithms and Problems. {\bf D.3.4} Software, PROGRAMMING LANGUAGES, Processors, Compilers.", thesaurus = "Digital arithmetic; Mathematics computing; Program compilers; Programming; Reduced instruction set computing", treatment = "T Theoretical or Mathematical", } @InProceedings{Sheldon:2003:SRI, author = "Jeffrey Sheldon and Walter Lee and Ben Greenwald and Saman Amarasinghe", title = "Strength Reduction of Integer Division and Modulo Operations", crossref = "Dietz:2003:LCP", pages = "254--273", year = "2003", bibdate = "Fri Jun 24 12:09:31 2005", URL = "http://www.springerlink.com/link.asp?id=3hfwyuyjxkf23nd2; http://www.springerlink.com/openurl.asp?genre=article&issn=0302-9743&volume=2624&spage=254", abstract = "Integer division, modulo, and remainder operations are expressive and useful operations. They are logical candidates to express complex data accesses such as the wrap-around behavior in queues using ring buffers. In addition, they appear frequently in address computations as a result of compiler optimizations that improve data locality, perform data distribution, or enable parallelization. Experienced application programmers, however, avoid them because they are slow. Furthermore, while advances in both hardware and software have improved the performance of many parts of a program, few are applicable to division and modulo operations. This trend makes these operations increasingly detrimental to program performance. This paper describes a suite of optimizations for eliminating division, modulo, and remainder operations from programs. These techniques are analogous to strength reduction techniques used for multiplications. In addition to some algebraic simplifications, we present a set of optimization techniques that eliminates division and modulo operations that are functions of loop induction variables and loop constants. The optimizations rely on algebra, integer programming, and loop transformations.", acknowledgement = ack-nhfb, } @Book{Warren:2003:HD, author = "Henry S. Warren", title = "Hacker's delight", publisher = pub-AW, address = pub-AW:adr, pages = "xiv + 306", year = "2003", ISBN = "0-201-91465-4", ISBN-13 = "978-0-201-91465-8", LCCN = "QA76.6 .W375 2003", bibdate = "Tue Jan 03 18:20:34 2006", bibsource = "z3950.loc.gov:7090/Voyager", note = "While this book does not specifically address computational aspects of floating-point arithmetic (apart from the nine-page Chapter 15), it has extensive coverage of, and clever algorithms for, integer arithmetic operations that are fundamental for implementing hardware floating-arithmetic and software multiple-precision arithmetic.", URL = "http://www.awprofessional.com/bookstore/product.asp?isbn=0201914654; http://www.hackersdelight.org/; http://www.hackersdelight.org/hackerTOC.pdf; http://www.informit.com/content/images/chap3_0201914654/elementLinks/0201914654.pdf", ... } @InProceedings{Robison:2005:BUD, author = "Arch Robison", title = "{$N$}-Bit Unsigned Division Via {$N$}-Bit Multiply-Add", crossref = "Montuschi:2005:PIS", pages = "??--??", year = "2005", bibdate = "Wed Jun 22 07:02:55 2005", URL = "http://arith17.polito.it/final/paper-104.pdf", abstract = "Integer division on modern processors is expensive compared to multiplication. Previous algorithms for performing unsigned division by an invariant divisor, via reciprocal approximation, suffer in the worst case from a common requirement for $n+1$ bit multiplication, which typically must be synthesized from $n$-bit multiplication and extra arithmetic operations. This paper presents, and proves, a hybrid of previous algorithms that replaces $n+1$ bit multiplication with a single fused multiply-add operation on $n$-bit operands, thus reducing any $n$-bit unsigned division to the upper $n$ bits of a multiply-add, followed by a single right shift. An additional benefit is that the prerequisite calculations are simple and fast. On the Itanium 2 processor, the technique is advantageous for as few as two quotients that share a common run-time divisor.", acknowledgement = ack-nhfb, keywords = "ARITH-17", pagecount = "9", } @Article{Cavagnino:2008:EAI, author = "D. Cavagnino and A. E. Werbrouck", title = "Efficient Algorithms for Integer Division by Constants Using Multiplication", journal = j-COMP-J, volume = "51", number = "4", pages = "470--480", month = jul, year = "2008", CODEN = "CMPJA6", DOI = "http://dx.doi.org/10.1093/comjnl/bxm082", ISSN = "0010-4620", bibdate = "Wed Apr 28 14:33:34 MDT 2010", bibsource = "http://comjnl.oxfordjournals.org/content/vol51/issue4/index.dtl", URL = "http://comjnl.oxfordjournals.org/cgi/content/abstract/51/4/470; http://comjnl.oxfordjournals.org/cgi/content/full/51/4/470; http://comjnl.oxfordjournals.org/cgi/reprint/51/4/470", abstract = "We present a complete analysis of the integer division of a single unsigned dividend word by a single unsigned divisor word based on double-word multiplication of the dividend by an inverse of the divisor. The well-known advantage of this method yields run-time efficiency, if the inverse of the divisor can be calculated at compile time, since multiplication is much faster than division in arithmetic units. Our analysis leads to the discovery of a limit to the straightforward application of this method in the form of a critical dividend, which fortunately associates with a minority of the possible divisors (20\%) and defines only a small upper part of the available dividend space. We present two algorithms for ascertaining whether a critical dividend exists and, if so, its value along with a circumvention of this limit. For completeness, we include an algorithm for integer division of a unsigned double-word dividend by an unsigned single-word divisor in which the quotient is not limited to a single word and the remainder is an intrinsic part of the result.", acknowledgement = ack-nhfb, keywords = "integer division; multiplicative inverse; division by multiplication; efficiency; integer constants", remark = "See \cite{Cavagnino:2011:AAD}.", } @Article{Cavagnino:2011:AAD, author = "D. Cavagnino and A. E. Werbrouck", title = "An Analysis of Associated Dividends in the {DBM} Algorithm for Division by Constants Using Multiplication", journal = j-COMP-J, volume = "54", number = "1", pages = "148--156", month = jan, year = "2011", CODEN = "CMPJA6", DOI = "http://dx.doi.org/10.1093/comjnl/bxp117", ISSN = "0010-4620", bibdate = "Tue Dec 21 19:26:47 MST 2010", bibsource = "http://comjnl.oxfordjournals.org/content/54/1.toc", URL = "http://comjnl.oxfordjournals.org/content/54/1/148.full.pdf+html", abstract = "When a compiler encounters a fixed integer divisor, which is not a power of 2, it can calculate its inverse to be multiplied by the run-time integer dividends to obtain the quotients, using our very efficient, recently published [Cavagnino, D. and Werbrouck, A.E. (2008) {\em Efficient algorithms for integer division by constants using multiplication}. Comp. J., 51, 470--480] division by multiplication algorithms. Essentially our algorithms permit a complete partition of a defined number space into non-adverse and adverse divisors on the basis of whether a dividend associated with each divisor is, or is not, greater than the maximum dividend size. In this paper, we demonstrate useful relations between the dividends associated with all divisors and with their multiples by positive powers", acknowledgement = ack-nhfb, fjournal = "Computer Journal", keywords = "dividend distribution; division by multiplication (DBM); integer division; multiplicative division", onlinedate = "December 30, 2009", remark = "See \cite{Cavagnino:2008:EAI}.", } @Proceedings{Dietz:2003:LCP, editor = "Henry G. Dietz", booktitle = "{Languages and Compilers for Parallel Computing: 14th International Workshop, LCPC 2001, Cumberland Falls, KY, USA, August 1--3, 2001: Revised Papers}", title = "{Languages and Compilers for Parallel Computing: 14th International Workshop, LCPC 2001, Cumberland Falls, KY, USA, August 1--3, 2001: Revised Papers}", volume = "2624", publisher = pub-SV, address = pub-SV:adr, pages = "ix + 444", year = "2003", CODEN = "LNCSD9", DOI = "????", ISBN = "3-540-04029-3 (paperback)", ISBN-13 = "978-3-540-04029-3 (paperback)", ISSN = "0302-9743", LCCN = "QA76.58 .W656 2001", bibdate = "Thu Aug 21 09:09:03 MDT 2003", note = "The 14th workshop on Languages and Compilers for Parallel Computing, LCPC 2001, was organized and hosted by the Electrical and Computer Engineering Department of the University of Kentucky, Lexington, KY, USA.", series = ser-LNCS, URL = "http://link.springer-ny.com/link/service/series/0558/tocs/t2624.htm; http://www.springerlink.com/openurl.asp?genre=issue&issn=0302-9743&volume=2624", acknowledgement = ack-nhfb, keywords = "compilers (computer programs); parallel processing (electronic computers); programming languages (electronic computers)", } @Proceedings{Montuschi:2005:PIS, editor = "Paolo Montuschi and Eric (Eric Mark) Schwarz", booktitle = "{Proceedings of the 17th IEEE Symposium on Computer Arithmetic, ARITH-17 2005, June 27--29, 2005, Cape Cod, Massachusetts, USA}", title = "{Proceedings of the 17th IEEE Symposium on Computer Arithmetic, ARITH-17 2005, June 27--29, 2005, Cape Cod, Massachusetts, USA}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xii + 298", year = "2005", ISBN = "0-7695-2366-8", ISBN-13 = "978-0-7695-2366-8", LCCN = "QA76.9.C62 .S95 2005", bibdate = "Thu Sep 14 12:30:26 2006", bibsource = "http://arith17.polito.it/", acknowledgement = ack-nhfb, keywords = "ARITH-17", } ------------------------------------------------------------------------------- - Nelson H. F. Beebe Tel: +1 801 581 5254 - - University of Utah FAX: +1 801 581 4148 - - Department of Mathematics, 110 LCB Internet e-mail: be...@math.utah.edu - - 155 S 1400 E RM 233 be...@acm.org be...@computer.org - - Salt Lake City, UT 84112-0090, USA URL: http://www.math.utah.edu/~beebe/ - ------------------------------------------------------------------------------- ------------------------------------------------------------------------------ Create and publish websites with WebMatrix Use the most popular FREE web apps or write code yourself; WebMatrix provides all the features you need to develop and publish your website. http://p.sf.net/sfu/ms-webmatrix-sf _______________________________________________ Open64-devel mailing list Open64-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/open64-devel