[Open64-devel] [open64-devel] faster integer division

Nelson H. F. Beebe Thu, 31 Mar 2011 16:17:05 -0700

Doug Gilmore's post earlier today on an optimizer change in the Open64
compiler suite to speed integer division reminded me that it is worth
posting pointers to published research on that problem from the last
couple of decades, with the most recent from January 2011.  The
entries below are culled from the bibliography of floating-point
arithmetic


        http://www.math.utah.edu/pub/tex/bib/index-table-f.html#fparith

and sorted from oldest (first) to newest (last), with cross-referenced
proceedings volumes at the end:

@String{j-AUSTRALIAN-COMP-J     = "Australian Computer Journal"}
@String{j-COMP-J                = "The Computer Journal"}
@String{j-SIGPLAN               = "ACM SIGPLAN Notices"}

@String{pub-AW                  = "Ad{\-d}i{\-s}on-Wes{\-l}ey"}
@String{pub-AW:adr              = "Reading, MA, USA"}

@String{pub-IEEE                = "IEEE Computer Society Press"}
@String{pub-IEEE:adr            = "1109 Spring Street, Suite 300, Silver
                                   Spring, MD 20910, USA"}

@String{pub-SV                  = "Spring{\-}er-Ver{\-}lag"}
@String{pub-SV:adr              = "Berlin, Germany~/ Heidelberg,
                                  Germany~/ London, UK~/ etc."}

@Article{Vowels:1992:D,
  author =       "R. A. Vowels",
  title =        "Division by 10",
  journal =      j-AUSTRALIAN-COMP-J,
  volume =       "24",
  number =       "3",
  pages =        "81--85",
  month =        aug,
  year =         "1992",
  CODEN =        "ACMJB2",
  ISSN =         "0004-8917",
  bibdate =      "Tue Dec 12 09:27:13 MST 1995",
  abstract =     "Division of a binary integer and a binary
                 floating-point mantissa by 10 can be performed with
                 shifts and adds, yielding a significant improvement in
                 hardware execution time, and in software execution time
                 if no hardware divide instruction is available. Several
                 algorithms are given, appropriate to specific machine
                 word sizes, hardware and hardware instructions
                 available, and depending on whether a remainder is
                 required. The integer division algorithms presented
                 here contain a new strategy that produces the correct
                 quotient directly, without the need for the
                 supplementary correction required of
                 previously-published algorithms. The algorithms are
                 competitive in time with binary coded decimal (BCD)
                 divide by 10. Both the integer and floating-point
                 algorithms are an order of magnitude faster than
                 conventional division.",
  acknowledgement = ack-nhfb,
  affiliation =  "Dept. of Comput. Sci., R. Melbourne Inst. of Technol.
                 Ltd., Vic., Australia",
  classification = "C7310 (Mathematics)",
  keywords =     "Binary integer; decimal floating-point arithmetic;
                 Floating-point algorithms; Hardware execution time;
                 Integer division algorithms; Software execution time",
  pubcountry =   "Australia",
  thesaurus =    "Digital arithmetic; Mathematics computing",
}

@Article{Granlund:1994:DII,
  author =       "Torbj{\"o}rn Granlund and Peter L. Montgomery",
  title =        "Division by invariant integers using multiplication",
  journal =      j-SIGPLAN,
  volume =       "29",
  number =       "6",
  pages =        "61--72",
  month =        jun,
  year =         "1994",
  CODEN =        "SINODQ",
  DOI =          "http://doi.acm.org/10.1145/178243.178249";,
  ISBN =         "0-89791-598-4",
  ISBN-13 =      "978-0-89791-598-4",
  ISSN =         "0362-1340",
  bibdate =      "Sun Dec 14 09:16:51 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 
http://www.acm.org/pubs/contents/proceedings/pldi/178243/index.html";,
  URL =          "ftp://ftp.cwi.nl/pub/pmontgom/divcnst.psa4.gz;
                 ftp://ftp.cwi.nl/pub/pmontgom/divcnst.psl.gz;
                 
http://www.acm.org:80/pubs/citations/proceedings/pldi/178243/p61-granlund/";,
  abstract =     "Integer division remains expensive on today's
                 processors as the cost of integer multiplication
                 declines. We present code sequences for division by
                 arbitrary nonzero integer constants and run-time
                 invariants using integer multiplication. The algorithms
                 assume a two's complement architecture. Most also
                 require that the upper half of an integer product be
                 quickly accessible. We treat unsigned division, signed
                 division where the quotient rounds towards zero, signed
                 division where the quotient rounds towards $-\infty$,
                 and division where the result is known a priori to be
                 exact. We give some implementation results using the C
                 compiler GCC.",
  acknowledgement = ack-nhfb,
  affiliation =  "Cygnus Support, Mountain View, CA, USA",
  annote =       "Published as part of the Proceedings of PLDI'94.",
  classification = "C5230 (Digital arithmetic methods); C6110 (Systems
                 analysis and programming); C6150C (Compilers,
                 interpreters and other processors)",
  confdate =     "20-24 June 1994",
  conflocation = "Orlando, FL, USA",
  confsponsor =  "ACM",
  conftitle =    "ACM SIGPLAN '94 Conference on Programming Language
                 Design and Implementation (PLDI)",
  corpsource =   "Cygnus Support, Mountain View, CA, USA",
  keywords =     "algorithms; Arbitrary nonzero integer constants;
                 arbitrary nonzero integer constants; C compiler; Code
                 sequences; code sequences; digital arithmetic; Floating
                 point arithmetic; floating point arithmetic; GCC;
                 Integer division; integer division; Integer
                 multiplication; integer multiplication; Invariant
                 integers; invariant integers; mathematics computing;
                 Multiplication; multiplication; performance; program;
                 program compilers; programming; reduced instruction set
                 computing; RISC processors; Run-time invariants;
                 run-time invariants; Signed division; signed division;
                 Two's complement architecture; two's complement
                 architecture; Unsigned division; unsigned division",
  sponsororg =   "ACM",
  subject =      "{\bf G.1.0} Mathematics of Computing, NUMERICAL
                 ANALYSIS, General, Computer arithmetic. {\bf F.2.1}
                 Theory of Computation, ANALYSIS OF ALGORITHMS AND
                 PROBLEM COMPLEXITY, Numerical Algorithms and Problems.
                 {\bf D.3.4} Software, PROGRAMMING LANGUAGES,
                 Processors, Compilers.",
  thesaurus =    "Digital arithmetic; Mathematics computing; Program
                 compilers; Programming; Reduced instruction set
                 computing",
  treatment =    "T Theoretical or Mathematical",
}

@InProceedings{Sheldon:2003:SRI,
  author =       "Jeffrey Sheldon and Walter Lee and Ben Greenwald and
                 Saman Amarasinghe",
  title =        "Strength Reduction of Integer Division and Modulo
                 Operations",
  crossref =     "Dietz:2003:LCP",
  pages =        "254--273",
  year =         "2003",
  bibdate =      "Fri Jun 24 12:09:31 2005",
  URL =          "http://www.springerlink.com/link.asp?id=3hfwyuyjxkf23nd2;
                 
http://www.springerlink.com/openurl.asp?genre=article&issn=0302-9743&volume=2624&spage=254";,
  abstract =     "Integer division, modulo, and remainder operations are
                 expressive and useful operations. They are logical
                 candidates to express complex data accesses such as the
                 wrap-around behavior in queues using ring buffers. In
                 addition, they appear frequently in address
                 computations as a result of compiler optimizations that
                 improve data locality, perform data distribution, or
                 enable parallelization. Experienced application
                 programmers, however, avoid them because they are slow.
                 Furthermore, while advances in both hardware and
                 software have improved the performance of many parts of
                 a program, few are applicable to division and modulo
                 operations. This trend makes these operations
                 increasingly detrimental to program performance. This
                 paper describes a suite of optimizations for
                 eliminating division, modulo, and remainder operations
                 from programs. These techniques are analogous to
                 strength reduction techniques used for multiplications.
                 In addition to some algebraic simplifications, we
                 present a set of optimization techniques that
                 eliminates division and modulo operations that are
                 functions of loop induction variables and loop
                 constants. The optimizations rely on algebra, integer
                 programming, and loop transformations.",
  acknowledgement = ack-nhfb,
}

@Book{Warren:2003:HD,
  author =       "Henry S. Warren",
  title =        "Hacker's delight",
  publisher =    pub-AW,
  address =      pub-AW:adr,
  pages =        "xiv + 306",
  year =         "2003",
  ISBN =         "0-201-91465-4",
  ISBN-13 =      "978-0-201-91465-8",
  LCCN =         "QA76.6 .W375 2003",
  bibdate =      "Tue Jan 03 18:20:34 2006",
  bibsource =    "z3950.loc.gov:7090/Voyager",
  note =         "While this book does not specifically address
                 computational aspects of floating-point arithmetic
                 (apart from the nine-page Chapter 15), it has extensive
                 coverage of, and clever algorithms for, integer
                 arithmetic operations that are fundamental for
                 implementing hardware floating-arithmetic and software
                 multiple-precision arithmetic.",
  URL =          
"http://www.awprofessional.com/bookstore/product.asp?isbn=0201914654;
                 http://www.hackersdelight.org/;
                 http://www.hackersdelight.org/hackerTOC.pdf;
                 
http://www.informit.com/content/images/chap3_0201914654/elementLinks/0201914654.pdf";,
  ...
}

@InProceedings{Robison:2005:BUD,
  author =       "Arch Robison",
  title =        "{$N$}-Bit Unsigned Division Via {$N$}-Bit
                 Multiply-Add",
  crossref =     "Montuschi:2005:PIS",
  pages =        "??--??",
  year =         "2005",
  bibdate =      "Wed Jun 22 07:02:55 2005",
  URL =          "http://arith17.polito.it/final/paper-104.pdf";,
  abstract =     "Integer division on modern processors is expensive
                 compared to multiplication. Previous algorithms for
                 performing unsigned division by an invariant divisor,
                 via reciprocal approximation, suffer in the worst case
                 from a common requirement for $n+1$ bit multiplication,
                 which typically must be synthesized from $n$-bit
                 multiplication and extra arithmetic operations. This
                 paper presents, and proves, a hybrid of previous
                 algorithms that replaces $n+1$ bit multiplication with
                 a single fused multiply-add operation on $n$-bit
                 operands, thus reducing any $n$-bit unsigned division
                 to the upper $n$ bits of a multiply-add, followed by a
                 single right shift. An additional benefit is that the
                 prerequisite calculations are simple and fast. On the
                 Itanium 2 processor, the technique is advantageous for
                 as few as two quotients that share a common run-time
                 divisor.",
  acknowledgement = ack-nhfb,
  keywords =     "ARITH-17",
  pagecount =    "9",
}

@Article{Cavagnino:2008:EAI,
  author =       "D. Cavagnino and A. E. Werbrouck",
  title =        "Efficient Algorithms for Integer Division by Constants
                 Using Multiplication",
  journal =      j-COMP-J,
  volume =       "51",
  number =       "4",
  pages =        "470--480",
  month =        jul,
  year =         "2008",
  CODEN =        "CMPJA6",
  DOI =          "http://dx.doi.org/10.1093/comjnl/bxm082";,
  ISSN =         "0010-4620",
  bibdate =      "Wed Apr 28 14:33:34 MDT 2010",
  bibsource =    
"http://comjnl.oxfordjournals.org/content/vol51/issue4/index.dtl";,
  URL =          
"http://comjnl.oxfordjournals.org/cgi/content/abstract/51/4/470;
                 http://comjnl.oxfordjournals.org/cgi/content/full/51/4/470;
                 http://comjnl.oxfordjournals.org/cgi/reprint/51/4/470";,
  abstract =     "We present a complete analysis of the integer division
                 of a single unsigned dividend word by a single unsigned
                 divisor word based on double-word multiplication of the
                 dividend by an inverse of the divisor. The well-known
                 advantage of this method yields run-time efficiency, if
                 the inverse of the divisor can be calculated at compile
                 time, since multiplication is much faster than division
                 in arithmetic units. Our analysis leads to the
                 discovery of a limit to the straightforward application
                 of this method in the form of a critical dividend,
                 which fortunately associates with a minority of the
                 possible divisors (20\%) and defines only a small upper
                 part of the available dividend space. We present two
                 algorithms for ascertaining whether a critical dividend
                 exists and, if so, its value along with a circumvention
                 of this limit. For completeness, we include an
                 algorithm for integer division of a unsigned
                 double-word dividend by an unsigned single-word divisor
                 in which the quotient is not limited to a single word
                 and the remainder is an intrinsic part of the result.",
  acknowledgement = ack-nhfb,
  keywords =     "integer division; multiplicative inverse; division by
                 multiplication; efficiency; integer constants",
  remark =       "See \cite{Cavagnino:2011:AAD}.",
}

@Article{Cavagnino:2011:AAD,
  author =       "D. Cavagnino and A. E. Werbrouck",
  title =        "An Analysis of Associated Dividends in the {DBM}
                 Algorithm for Division by Constants Using
                 Multiplication",
  journal =      j-COMP-J,
  volume =       "54",
  number =       "1",
  pages =        "148--156",
  month =        jan,
  year =         "2011",
  CODEN =        "CMPJA6",
  DOI =          "http://dx.doi.org/10.1093/comjnl/bxp117";,
  ISSN =         "0010-4620",
  bibdate =      "Tue Dec 21 19:26:47 MST 2010",
  bibsource =    "http://comjnl.oxfordjournals.org/content/54/1.toc";,
  URL =          
"http://comjnl.oxfordjournals.org/content/54/1/148.full.pdf+html";,
  abstract =     "When a compiler encounters a fixed integer divisor,
                 which is not a power of 2, it can calculate its inverse
                 to be multiplied by the run-time integer dividends to
                 obtain the quotients, using our very efficient,
                 recently published [Cavagnino, D. and Werbrouck, A.E.
                 (2008) {\em Efficient algorithms for integer division
                 by constants using multiplication}. Comp. J., 51,
                 470--480] division by multiplication algorithms.
                 Essentially our algorithms permit a complete partition
                 of a defined number space into non-adverse and adverse
                 divisors on the basis of whether a dividend associated
                 with each divisor is, or is not, greater than the
                 maximum dividend size. In this paper, we demonstrate
                 useful relations between the dividends associated with
                 all divisors and with their multiples by positive
                 powers",
  acknowledgement = ack-nhfb,
  fjournal =     "Computer Journal",
  keywords =     "dividend distribution; division by multiplication
                 (DBM); integer division; multiplicative division",
  onlinedate =   "December 30, 2009",
  remark =       "See \cite{Cavagnino:2008:EAI}.",
}


@Proceedings{Dietz:2003:LCP,
  editor =       "Henry G. Dietz",
  booktitle =    "{Languages and Compilers for Parallel Computing: 14th
                 International Workshop, LCPC 2001, Cumberland Falls,
                 KY, USA, August 1--3, 2001: Revised Papers}",
  title =        "{Languages and Compilers for Parallel Computing: 14th
                 International Workshop, LCPC 2001, Cumberland Falls,
                 KY, USA, August 1--3, 2001: Revised Papers}",
  volume =       "2624",
  publisher =    pub-SV,
  address =      pub-SV:adr,
  pages =        "ix + 444",
  year =         "2003",
  CODEN =        "LNCSD9",
  DOI =          "????",
  ISBN =         "3-540-04029-3 (paperback)",
  ISBN-13 =      "978-3-540-04029-3 (paperback)",
  ISSN =         "0302-9743",
  LCCN =         "QA76.58 .W656 2001",
  bibdate =      "Thu Aug 21 09:09:03 MDT 2003",
  note =         "The 14th workshop on Languages and Compilers for
                 Parallel Computing, LCPC 2001, was organized and hosted
                 by the Electrical and Computer Engineering Department
                 of the University of Kentucky, Lexington, KY, USA.",
  series =       ser-LNCS,
  URL =          
"http://link.springer-ny.com/link/service/series/0558/tocs/t2624.htm;
                 
http://www.springerlink.com/openurl.asp?genre=issue&issn=0302-9743&volume=2624";,
  acknowledgement = ack-nhfb,
  keywords =     "compilers (computer programs); parallel processing
                 (electronic computers); programming languages
                 (electronic computers)",
}

@Proceedings{Montuschi:2005:PIS,
  editor =       "Paolo Montuschi and Eric (Eric Mark) Schwarz",
  booktitle =    "{Proceedings of the 17th IEEE Symposium on Computer
                 Arithmetic, ARITH-17 2005, June 27--29, 2005, Cape Cod,
                 Massachusetts, USA}",
  title =        "{Proceedings of the 17th IEEE Symposium on Computer
                 Arithmetic, ARITH-17 2005, June 27--29, 2005, Cape Cod,
                 Massachusetts, USA}",
  publisher =    pub-IEEE,
  address =      pub-IEEE:adr,
  pages =        "xii + 298",
  year =         "2005",
  ISBN =         "0-7695-2366-8",
  ISBN-13 =      "978-0-7695-2366-8",
  LCCN =         "QA76.9.C62 .S95 2005",
  bibdate =      "Thu Sep 14 12:30:26 2006",
  bibsource =    "http://arith17.polito.it/";,
  acknowledgement = ack-nhfb,
  keywords =     "ARITH-17",
}

-------------------------------------------------------------------------------
- Nelson H. F. Beebe                    Tel: +1 801 581 5254                  -
- University of Utah                    FAX: +1 801 581 4148                  -
- Department of Mathematics, 110 LCB    Internet e-mail: be...@math.utah.edu  -
- 155 S 1400 E RM 233                       be...@acm.org  be...@computer.org -
- Salt Lake City, UT 84112-0090, USA    URL: http://www.math.utah.edu/~beebe/ -
-------------------------------------------------------------------------------

------------------------------------------------------------------------------
Create and publish websites with WebMatrix
Use the most popular FREE web apps or write code yourself; 
WebMatrix provides all the features you need to develop and 
publish your website. http://p.sf.net/sfu/ms-webmatrix-sf
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

[Open64-devel] [open64-devel] faster integer division

Reply via email to