Modified: lucene/hadoop/branches/branch-0.15/docs/linkmap.pdf URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/docs/linkmap.pdf?rev=611895&r1=611894&r2=611895&view=diff ============================================================================== --- lucene/hadoop/branches/branch-0.15/docs/linkmap.pdf (original) +++ lucene/hadoop/branches/branch-0.15/docs/linkmap.pdf Mon Jan 14 11:16:43 2008 @@ -5,10 +5,10 @@ /Producer (FOP 0.20.5) >> endobj 5 0 obj -<< /Length 357 /Filter [ /ASCII85Decode /FlateDecode ] +<< /Length 807 /Filter [ /ASCII85Decode /FlateDecode ] >> stream -GauI0_,>n>&;KY!METC?CGrC"Nj`F.NYeV(W<]6G1/^A&*T(0o)+Zp_BQ?ZCAG=hhs0WHJ>9?gp&kjq=\>2I.5nM+G'#`g5(eF;:#g<UH]j9#u/%AG;)K*4.U3]$MbmIdn4:EnQ1r1dMX;T+jcNKHEPnq3rPum.l5-:kLX86_r0p''/O]3Qe#m5V<OW1G%7,)fBqF^G*>hr$Mh_DcrLsR56#bIK6F`53R[DXtdUdiT?fEY&K$uH3ZGS9>i8i:[EMAIL PROTECTED]&tLL\*oq4a5lV52V!1.TC[]-^*T]rUc(4)(7LD>dW)PJ3tGFOJ<P&[llGkcDt]YAB9Cf7*hB0p>c~> +GatUr9i'Ou&;KZL'gBJ8f:Vju0$k?8W*7hE4d;se'/+-`NL.Qap"q]g!BZqc.I9[=*a>',)/[EMAIL PROTECTED]>[EMAIL PROTECTED]&pDBfQds2?+eF"p%D`[T"'[EMAIL PROTECTED])0lWc8<WZs8!g6igP^]VaMm$d=P$bA51?p%[:t]<DifJ9PrZXu.?EP7P/%nh-^X:$CL(nkEYOOf+$L8r_b%GEfh$)[EMAIL PROTECTED]"d?U>,!VO,nihWVWeH:2%mX-isBn#_(:"D^EJ9F'.cUr>Bl)LH]IedPZlgD3#[JM]`#K&N8HT?$&2SP`\(PUdh!e5&.CpX7bB(*]AV"XdI$i*-B-CG;DcOp6OST??+cL4349KjoY2&Qh]HD#T]G-B.S_/+nQ8/n31V9]"JuiC=.Li/=sFrlS;[EMAIL PROTECTED];@p3:1COG'<ULMQQJ0#8o=iWNREtcIX"2\8$n,T?0G8/dBQU([EMAIL PROTECTED];ZE,mPRXpC&N]JI,rU0:13O<nIJh>[EMAIL PROTECTED]&<`2Q0,[EMAIL PROTECTED])<[EMAIL PROTECTED]"HMZQHW-IV\TA3o\03&i,4Q&:+*)P]t4%_:?['UmcS:dp.c'qb%9,Pqn#h0=oJJd^>J.\J#$1,O2F>WoB:X:;[EMAIL PROTECTED]'[EMAIL PROTECTED],miTtGD#9#)fq*<a:[EMAIL PROTECTED]'#2"RXJeBQdD];2"X4Dp>%jJsj%7l>[EMAIL PROTECTED]"53="'`54FX0WD6iR[Sm7:F~> endstream endobj 6 0 obj @@ -17,143 +17,78 @@ /MediaBox [ 0 0 612 792 ] /Resources 3 0 R /Contents 5 0 R -/Annots 7 0 R >> endobj 7 0 obj -[ -8 0 R -] -endobj -8 0 obj -<< /Type /Annot -/Subtype /Link -/Rect [ 102.0 559.666 195.992 547.666 ] -/C [ 0 0 0 ] -/Border [ 0 0 0 ] -/A 9 0 R -/H /I ->> -endobj -10 0 obj -<< /Length 822 /Filter [ /ASCII85Decode /FlateDecode ] - >> -stream -Gatn%?#Q2d'Sc)P'u"K;iqhDKR3p+gC"&LSBc#M\\R6%L&lLEcp?uc>$DoCd&kd+_/H!4XbW.lM3qTRU&mtnU#SR`"XbQq_?OR!Hf*%i[;ZX`upj%GL-(#9NY?utJ0%4*n-f+5[IPCV+b"M4CKV,pgU%_q`CZ1=\.nU]OK`J3"IRY`;$ksW8H#)l[#/UA((8qa23(1Djq*bQR)-j<=X625.:E,[;2r(>kc]=Ai[Zl8r5%dmc8sNigK7NT9bLGbM5_u@)OFTa1j>2a!Nb'83H[B#?V"nFCSkiNPa">T3ni]*405t8boJYR87pS6SqIq-V5Tn%?B9]$Ra6820rnDP\.ZNjhUC&fBO#8<J>[EMAIL PROTECTED]']ibcP$<g?,j?$0tjW@"JcZY>4,le.T+lkh,WcADE"[EMAIL PROTECTED])9NC;5Ygbup^Lt6<PCH+qq_&1hk,UPg^?`:dS/WlX9O=MERn-a*.:[EMAIL PROTECTED],L[F^bV3LVBb"d8A:ZA''RbrSZ%W^7X(t1Ha;baEi3i:qjS.(\1O7YW,#UP'=(/KqP(_``5'[EMAIL PROTECTED]"MOJmqS,291(lQXU-/3CPg055,I^.a7_88Uf*[TP,-g-^C/qmX2V^44-OOOt$R,\ZpgRQ`_V;Sk[+;R7D3D/$OZ69TcSSK9!^,sMhE[M!jK*[KQdnA:8k,O/38lbf&:^K\,5q[j(C^-J05#PphDmS#d2Eu5/mHtLcG/%*HZIuAE\1Q>^(=P/8h\;LVZZAZSjmXk737;fc['D&",B4[?0LUcUJd+O^r?-mh_:97+C3ctkCZ[2mZW`)X~> -endstream -endobj -11 0 obj -<< /Type /Page -/Parent 1 0 R -/MediaBox [ 0 0 612 792 ] -/Resources 3 0 R -/Contents 10 0 R ->> -endobj -13 0 obj -<< - /Title (\376\377\0\61\0\40\0\124\0\141\0\142\0\154\0\145\0\40\0\157\0\146\0\40\0\103\0\157\0\156\0\164\0\145\0\156\0\164\0\163) - /Parent 12 0 R - /A 9 0 R ->> endobj -14 0 obj << /Type /Font /Subtype /Type1 /Name /F3 /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding >> endobj -15 0 obj +8 0 obj << /Type /Font /Subtype /Type1 /Name /F5 /BaseFont /Times-Roman /Encoding /WinAnsiEncoding >> endobj -16 0 obj +9 0 obj << /Type /Font /Subtype /Type1 /Name /F6 /BaseFont /Times-Italic /Encoding /WinAnsiEncoding >> endobj -17 0 obj +10 0 obj << /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj -18 0 obj +11 0 obj << /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Helvetica-Oblique /Encoding /WinAnsiEncoding >> endobj -19 0 obj -<< /Type /Font -/Subtype /Type1 -/Name /F7 -/BaseFont /Times-Bold -/Encoding /WinAnsiEncoding >> -endobj 1 0 obj << /Type /Pages -/Count 2 -/Kids [6 0 R 11 0 R ] >> +/Count 1 +/Kids [6 0 R ] >> endobj 2 0 obj << /Type /Catalog /Pages 1 0 R - /Outlines 12 0 R - /PageMode /UseOutlines >> endobj 3 0 obj << -/Font << /F3 14 0 R /F5 15 0 R /F1 17 0 R /F6 16 0 R /F2 18 0 R /F7 19 0 R >> +/Font << /F3 7 0 R /F5 8 0 R /F1 10 0 R /F6 9 0 R /F2 11 0 R >> /ProcSet [ /PDF /ImageC /Text ] >> endobj -9 0 obj -<< -/S /GoTo -/D [11 0 R /XYZ 85.0 659.0 null] ->> -endobj -12 0 obj -<< - /First 13 0 R - /Last 13 0 R ->> endobj xref -0 20 +0 12 0000000000 65535 f -0000002665 00000 n -0000002730 00000 n -0000002822 00000 n +0000001630 00000 n +0000001688 00000 n +0000001738 00000 n 0000000015 00000 n 0000000071 00000 n -0000000519 00000 n -0000000639 00000 n -0000000664 00000 n -0000002956 00000 n -0000000799 00000 n -0000001713 00000 n -0000003019 00000 n -0000001821 00000 n -0000001998 00000 n -0000002111 00000 n -0000002221 00000 n -0000002332 00000 n -0000002440 00000 n -0000002556 00000 n +0000000969 00000 n +0000001075 00000 n +0000001187 00000 n +0000001296 00000 n +0000001406 00000 n +0000001514 00000 n trailer << -/Size 20 +/Size 12 /Root 2 0 R /Info 4 0 R >> startxref -3070 +1858 %%EOF
Modified: lucene/hadoop/branches/branch-0.15/docs/mapred_tutorial.html URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/docs/mapred_tutorial.html?rev=611895&r1=611894&r2=611895&view=diff ============================================================================== --- lucene/hadoop/branches/branch-0.15/docs/mapred_tutorial.html (original) +++ lucene/hadoop/branches/branch-0.15/docs/mapred_tutorial.html Mon Jan 14 11:16:43 2008 @@ -1,9 +1,9 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <META http-equiv="Content-Type" content="text/html; charset=UTF-8"> <meta content="Apache Forrest" name="Generator"> -<meta name="Forrest-version" content="0.7"> +<meta name="Forrest-version" content="0.8"> <meta name="Forrest-skin-name" content="pelt"> <title>Hadoop Map-Reduce Tutorial</title> <link type="text/css" href="skin/basic.css" rel="stylesheet"> @@ -16,46 +16,91 @@ <body onload="init()"> <script type="text/javascript">ndeSetTextSize();</script> <div id="top"> +<!--+ + |breadtrail + +--> <div class="breadtrail"> <a href="http://www.apache.org/">Apache</a> > <a href="http://lucene.apache.org/">Lucene</a> > <a href="http://lucene.apache.org/hadoop/">Hadoop</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script> </div> +<!--+ + |header + +--> <div class="header"> +<!--+ + |start group logo + +--> <div class="grouplogo"> <a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a> </div> +<!--+ + |end group logo + +--> +<!--+ + |start Project Logo + +--> <div class="projectlogo"> <a href="http://lucene.apache.org/hadoop/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a> </div> +<!--+ + |end Project Logo + +--> +<!--+ + |start Search + +--> <div class="searchbox"> <form action="http://www.google.com/search" method="get" class="roundtopsmall"> <input value="lucene.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google"> - <input attr="value" name="Search" value="Search" type="submit"> + <input name="Search" value="Search" type="submit"> </form> </div> +<!--+ + |end search + +--> +<!--+ + |start Tabs + +--> <ul id="tabs"> <li> -<a class="base-not-selected" href="http://lucene.apache.org/hadoop/">Project</a> +<a class="unselected" href="http://lucene.apache.org/hadoop/">Project</a> </li> <li> -<a class="base-not-selected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a> +<a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a> </li> <li class="current"> -<a class="base-selected" href="index.html">Hadoop 0.15 Documentation</a> +<a class="selected" href="index.html">Hadoop 0.15 Documentation</a> </li> </ul> +<!--+ + |end Tabs + +--> </div> </div> <div id="main"> <div id="publishedStrip"> +<!--+ + |start Subtabs + +--> <div id="level2tabs"></div> +<!--+ + |end Endtabs + +--> <script type="text/javascript"><!-- -document.write("<text>Last Published:</text> " + document.lastModified); +document.write("Last Published: " + document.lastModified); // --></script> </div> +<!--+ + |breadtrail + +--> <div class="breadtrail"> - + </div> +<!--+ + |start Menu, mainarea + +--> +<!--+ + |start Menu + +--> <div id="menu"> <div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div> <div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;"> @@ -93,8 +138,17 @@ <div id="credit"></div> <div id="roundbottom"> <img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div> +<!--+ + |alternative credits + +--> <div id="credit2"></div> </div> +<!--+ + |end Menu + +--> +<!--+ + |start content + +--> <div id="content"> <div title="Portable Document Format" class="pdflink"> <a class="dida" href="mapred_tutorial.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br> @@ -211,13 +265,13 @@ <a href="#Example%3A+WordCount+v2.0">Example: WordCount v2.0</a> <ul class="minitoc"> <li> -<a href="#Source+Code-N10A91">Source Code</a> +<a href="#Source+Code-N10AB2">Source Code</a> </li> <li> <a href="#Sample+Runs">Sample Runs</a> </li> <li> -<a href="#Salient+Points">Salient Points</a> +<a href="#Highlights">Highlights</a> </li> </ul> </li> @@ -354,7 +408,12 @@ <p> <span class="codefrag">WordCount</span> is a simple application that counts the number of occurences of each word in a given input set.</p> -<a name="N100DA"></a><a name="Source+Code"></a> +<p>This works with a + <a href="quickstart.html#Standalone+Operation">local-standalone</a>, + <a href="quickstart.html#SingleNodeSetup">pseudo-distributed</a> or + <a href="quickstart.html#Fully-Distributed+Operation">fully-distributed</a> + Hadoop installation.</p> +<a name="N100E9"></a><a name="Source+Code"></a> <h3 class="h4">Source Code</h3> <table class="ForrestTable" cellspacing="1" cellpadding="4"> @@ -385,7 +444,7 @@ <td colspan="1" rowspan="1">3.</td> <td colspan="1" rowspan="1"> - <span class="codefrag">import java.io.Exception;</span> + <span class="codefrag">import java.io.IOException;</span> </td> </tr> @@ -480,7 +539,7 @@ <td colspan="1" rowspan="1"> <span class="codefrag"> - public static class MapClass extends MapReduceBase + public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { </span> </td> @@ -794,7 +853,7 @@ <td colspan="1" rowspan="1">45.</td> <td colspan="1" rowspan="1"> - <span class="codefrag">conf.setMapperClass(MapClass.class);</span> + <span class="codefrag">conf.setMapperClass(Map.class);</span> </td> </tr> @@ -858,7 +917,7 @@ <td colspan="1" rowspan="1">52.</td> <td colspan="1" rowspan="1"> - <span class="codefrag">conf.setInputPath(new Path(args[1]));</span> + <span class="codefrag">conf.setInputPath(new Path(args[0]));</span> </td> </tr> @@ -868,7 +927,7 @@ <td colspan="1" rowspan="1">53.</td> <td colspan="1" rowspan="1"> - <span class="codefrag">conf.setOutputPath(new Path(args[2]));</span> + <span class="codefrag">conf.setOutputPath(new Path(args[1]));</span> </td> </tr> @@ -917,20 +976,23 @@ </tr> </table> -<a name="N1045C"></a><a name="Usage"></a> +<a name="N1046B"></a><a name="Usage"></a> <h3 class="h4">Usage</h3> <p>Assuming <span class="codefrag">HADOOP_HOME</span> is the root of the installation and <span class="codefrag">HADOOP_VERSION</span> is the Hadoop version installed, compile <span class="codefrag">WordCount.java</span> and create a jar:</p> <p> +<span class="codefrag">$ mkdir wordcount_classes</span> +<br> + <span class="codefrag"> $ javac -classpath ${HADOOP_HOME}/hadoop-${HADOOP_VERSION}-core.jar - WordCount.java + -d wordcount_classes WordCount.java </span> <br> -<span class="codefrag">$ jar -cvf /usr/joe/wordcount.jar WordCount.class</span> +<span class="codefrag">$ jar -cvf /usr/joe/wordcount.jar -C wordcount_classes/ .</span> </p> <p>Assuming that:</p> @@ -1009,7 +1071,7 @@ <br> </p> -<a name="N104D8"></a><a name="Walk-through"></a> +<a name="N104EB"></a><a name="Walk-through"></a> <h3 class="h4">Walk-through</h3> <p>The <span class="codefrag">WordCount</span> application is quite straight-forward.</p> <p>The <span class="codefrag">Mapper</span> implementation (lines 14-26), via the @@ -1119,7 +1181,7 @@ </div> -<a name="N1058F"></a><a name="Map-Reduce+-+User+Interfaces"></a> +<a name="N105A2"></a><a name="Map-Reduce+-+User+Interfaces"></a> <h2 class="h3">Map-Reduce - User Interfaces</h2> <div class="section"> <p>This section provides a reasonable amount of detail on every user-facing @@ -1138,12 +1200,12 @@ <p>Finally, we will wrap up by discussing some useful features of the framework such as the <span class="codefrag">DistributedCache</span>, <span class="codefrag">IsolationRunner</span> etc.</p> -<a name="N105C8"></a><a name="Payload"></a> +<a name="N105DB"></a><a name="Payload"></a> <h3 class="h4">Payload</h3> <p>Applications typically implement the <span class="codefrag">Mapper</span> and <span class="codefrag">Reducer</span> interfaces to provide the <span class="codefrag">map</span> and <span class="codefrag">reduce</span> methods. These form the core of the job.</p> -<a name="N105DD"></a><a name="Mapper"></a> +<a name="N105F0"></a><a name="Mapper"></a> <h4>Mapper</h4> <p> <a href="api/org/apache/hadoop/mapred/Mapper.html"> @@ -1199,7 +1261,7 @@ <a href="api/org/apache/hadoop/io/compress/CompressionCodec.html"> CompressionCodec</a> to be used via the <span class="codefrag">JobConf</span>. </p> -<a name="N10657"></a><a name="How+Many+Maps%3F"></a> +<a name="N1066A"></a><a name="How+Many+Maps%3F"></a> <h5>How Many Maps?</h5> <p>The number of maps is usually driven by the total size of the inputs, that is, the total number of blocks of the input files.</p> @@ -1212,7 +1274,7 @@ <a href="api/org/apache/hadoop/mapred/JobConf.html#setNumMapTasks(int)"> setNumMapTasks(int)</a> (which only provides a hint to the framework) is used to set it even higher.</p> -<a name="N1066F"></a><a name="Reducer"></a> +<a name="N10682"></a><a name="Reducer"></a> <h4>Reducer</h4> <p> <a href="api/org/apache/hadoop/mapred/Reducer.html"> @@ -1235,18 +1297,18 @@ <p> <span class="codefrag">Reducer</span> has 3 primary phases: shuffle, sort and reduce. </p> -<a name="N1069F"></a><a name="Shuffle"></a> +<a name="N106B2"></a><a name="Shuffle"></a> <h5>Shuffle</h5> <p>Input to the <span class="codefrag">Reducer</span> is the sorted output of the mappers. In this phase the framework fetches the relevant partition of the output of all the mappers, via HTTP.</p> -<a name="N106AC"></a><a name="Sort"></a> +<a name="N106BF"></a><a name="Sort"></a> <h5>Sort</h5> <p>The framework groups <span class="codefrag">Reducer</span> inputs by keys (since different mappers may have output the same key) in this stage.</p> <p>The shuffle and sort phases occur simultaneously; while map-outputs are being fetched they are merged.</p> -<a name="N106BB"></a><a name="Secondary+Sort"></a> +<a name="N106CE"></a><a name="Secondary+Sort"></a> <h5>Secondary Sort</h5> <p>If equivalence rules for grouping the intermediate keys are required to be different from those for grouping keys before @@ -1257,7 +1319,7 @@ JobConf.setOutputKeyComparatorClass(Class)</a> can be used to control how intermediate keys are grouped, these can be used in conjunction to simulate <em>secondary sort on values</em>.</p> -<a name="N106D4"></a><a name="Reduce"></a> +<a name="N106E7"></a><a name="Reduce"></a> <h5>Reduce</h5> <p>In this phase the <a href="api/org/apache/hadoop/mapred/Reducer.html#reduce(K2, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)"> @@ -1273,7 +1335,7 @@ progress, set application-level status messages and update <span class="codefrag">Counters</span>, or just indicate that they are alive.</p> <p>The output of the <span class="codefrag">Reducer</span> is <em>not sorted</em>.</p> -<a name="N10702"></a><a name="How+Many+Reduces%3F"></a> +<a name="N10715"></a><a name="How+Many+Reduces%3F"></a> <h5>How Many Reduces?</h5> <p>The right number of reduces seems to be <span class="codefrag">0.95</span> or <span class="codefrag">1.75</span> multiplied by (<<em>no. of nodes</em>> * @@ -1288,7 +1350,7 @@ <p>The scaling factors above are slightly less than whole numbers to reserve a few reduce slots in the framework for speculative-tasks and failed tasks.</p> -<a name="N10727"></a><a name="Reducer+NONE"></a> +<a name="N1073A"></a><a name="Reducer+NONE"></a> <h5>Reducer NONE</h5> <p>It is legal to set the number of reduce-tasks to <em>zero</em> if no reduction is desired.</p> @@ -1298,7 +1360,7 @@ setOutputPath(Path)</a>. The framework does not sort the map-outputs before writing them out to the <span class="codefrag">FileSystem</span>. </p> -<a name="N10742"></a><a name="Partitioner"></a> +<a name="N10755"></a><a name="Partitioner"></a> <h4>Partitioner</h4> <p> <a href="api/org/apache/hadoop/mapred/Partitioner.html"> @@ -1312,7 +1374,7 @@ <p> <a href="api/org/apache/hadoop/mapred/lib/HashPartitioner.html"> HashPartitioner</a> is the default <span class="codefrag">Partitioner</span>.</p> -<a name="N10761"></a><a name="Reporter"></a> +<a name="N10774"></a><a name="Reporter"></a> <h4>Reporter</h4> <p> <a href="api/org/apache/hadoop/mapred/Reporter.html"> @@ -1331,7 +1393,7 @@ </p> <p>Applications can also update <span class="codefrag">Counters</span> using the <span class="codefrag">Reporter</span>.</p> -<a name="N1078B"></a><a name="OutputCollector"></a> +<a name="N1079E"></a><a name="OutputCollector"></a> <h4>OutputCollector</h4> <p> <a href="api/org/apache/hadoop/mapred/OutputCollector.html"> @@ -1342,7 +1404,7 @@ <p>Hadoop Map-Reduce comes bundled with a <a href="api/org/apache/hadoop/mapred/lib/package-summary.html"> library</a> of generally useful mappers, reducers, and partitioners.</p> -<a name="N107A6"></a><a name="Job+Configuration"></a> +<a name="N107B9"></a><a name="Job+Configuration"></a> <h3 class="h4">Job Configuration</h3> <p> <a href="api/org/apache/hadoop/mapred/JobConf.html"> @@ -1397,7 +1459,7 @@ <a href="api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String, java.lang.String)">set(String, String)</a>/<a href="api/org/apache/hadoop/conf/Configuration.html#get(java.lang.String, java.lang.String)">get(String, String)</a> to set/get arbitrary parameters needed by applications. However, use the <span class="codefrag">DistributedCache</span> for large amounts of (read-only) data.</p> -<a name="N1082C"></a><a name="Job+Submission+and+Monitoring"></a> +<a name="N1083F"></a><a name="Job+Submission+and+Monitoring"></a> <h3 class="h4">Job Submission and Monitoring</h3> <p> <a href="api/org/apache/hadoop/mapred/JobClient.html"> @@ -1433,7 +1495,7 @@ <p>Normally the user creates the application, describes various facets of the job via <span class="codefrag">JobConf</span>, and then uses the <span class="codefrag">JobClient</span> to submit the job and monitor its progress.</p> -<a name="N1086A"></a><a name="Job+Control"></a> +<a name="N1087D"></a><a name="Job+Control"></a> <h4>Job Control</h4> <p>Users may need to chain map-reduce jobs to accomplish complex tasks which cannot be done via a single map-reduce job. This is fairly @@ -1469,7 +1531,7 @@ </li> </ul> -<a name="N10894"></a><a name="Job+Input"></a> +<a name="N108A7"></a><a name="Job+Input"></a> <h3 class="h4">Job Input</h3> <p> <a href="api/org/apache/hadoop/mapred/InputFormat.html"> @@ -1512,7 +1574,7 @@ <a href="api/org/apache/hadoop/mapred/TextInputFormat.html"> TextInputFormat</a> is the default <span class="codefrag">InputFormat</span>. </p> -<a name="N108E9"></a><a name="InputSplit"></a> +<a name="N108FC"></a><a name="InputSplit"></a> <h4>InputSplit</h4> <p> <a href="api/org/apache/hadoop/mapred/InputSplit.html"> @@ -1526,7 +1588,7 @@ FileSplit</a> is the default <span class="codefrag">InputSplit</span>. It sets <span class="codefrag">map.input.file</span> to the path of the input file for the logical split.</p> -<a name="N1090E"></a><a name="RecordReader"></a> +<a name="N10921"></a><a name="RecordReader"></a> <h4>RecordReader</h4> <p> <a href="api/org/apache/hadoop/mapred/RecordReader.html"> @@ -1538,7 +1600,7 @@ for processing. <span class="codefrag">RecordReader</span> thus assumes the responsibility of processing record boundaries and presents the tasks with keys and values.</p> -<a name="N10931"></a><a name="Job+Output"></a> +<a name="N10944"></a><a name="Job+Output"></a> <h3 class="h4">Job Output</h3> <p> <a href="api/org/apache/hadoop/mapred/OutputFormat.html"> @@ -1563,7 +1625,7 @@ <p> <span class="codefrag">TextOutputFormat</span> is the default <span class="codefrag">OutputFormat</span>.</p> -<a name="N1095A"></a><a name="Task+Side-Effect+Files"></a> +<a name="N1096D"></a><a name="Task+Side-Effect+Files"></a> <h4>Task Side-Effect Files</h4> <p>In some applications, component tasks need to create and/or write to side-files, which differ from the actual job-output files.</p> @@ -1589,7 +1651,7 @@ JobConf.getOutputPath()</a>, and the framework will promote them similarly for succesful task-attempts, thus eliminating the need to pick unique paths per task-attempt.</p> -<a name="N1098F"></a><a name="RecordWriter"></a> +<a name="N109A2"></a><a name="RecordWriter"></a> <h4>RecordWriter</h4> <p> <a href="api/org/apache/hadoop/mapred/RecordWriter.html"> @@ -1597,9 +1659,9 @@ pairs to an output file.</p> <p>RecordWriter implementations write the job outputs to the <span class="codefrag">FileSystem</span>.</p> -<a name="N109A6"></a><a name="Other+Useful+Features"></a> +<a name="N109B9"></a><a name="Other+Useful+Features"></a> <h3 class="h4">Other Useful Features</h3> -<a name="N109AC"></a><a name="Counters"></a> +<a name="N109BF"></a><a name="Counters"></a> <h4>Counters</h4> <p> <span class="codefrag">Counters</span> represent global counters, defined either by @@ -1613,7 +1675,7 @@ Reporter.incrCounter(Enum, long)</a> in the <span class="codefrag">map</span> and/or <span class="codefrag">reduce</span> methods. These counters are then globally aggregated by the framework.</p> -<a name="N109D7"></a><a name="DistributedCache"></a> +<a name="N109EA"></a><a name="DistributedCache"></a> <h4>DistributedCache</h4> <p> <a href="api/org/apache/hadoop/filecache/DistributedCache.html"> @@ -1645,7 +1707,7 @@ <span class="codefrag">DistributedCache</span> tracks the modification timestamps of the cached files. Clearly the cache files should not be modified by the application or externally while the job is executing.</p> -<a name="N10A11"></a><a name="Tool"></a> +<a name="N10A24"></a><a name="Tool"></a> <h4>Tool</h4> <p>The <a href="api/org/apache/hadoop/util/Tool.html">Tool</a> interface supports the handling of generic Hadoop command-line options. @@ -1685,7 +1747,7 @@ </span> </p> -<a name="N10A43"></a><a name="IsolationRunner"></a> +<a name="N10A56"></a><a name="IsolationRunner"></a> <h4>IsolationRunner</h4> <p> <a href="api/org/apache/hadoop/mapred/IsolationRunner.html"> @@ -1709,7 +1771,7 @@ <p> <span class="codefrag">IsolationRunner</span> will run the failed task in a single jvm, which can be in the debugger, over precisely the same input.</p> -<a name="N10A76"></a><a name="JobControl"></a> +<a name="N10A89"></a><a name="JobControl"></a> <h4>JobControl</h4> <p> <a href="api/org/apache/hadoop/mapred/jobcontrol/package-summary.html"> @@ -1718,12 +1780,17 @@ </div> -<a name="N10A85"></a><a name="Example%3A+WordCount+v2.0"></a> +<a name="N10A98"></a><a name="Example%3A+WordCount+v2.0"></a> <h2 class="h3">Example: WordCount v2.0</h2> <div class="section"> <p>Here is a more complete <span class="codefrag">WordCount</span> which uses many of the - features provided by the Map-Reduce framework we discussed so far:</p> -<a name="N10A91"></a><a name="Source+Code-N10A91"></a> + features provided by the Map-Reduce framework we discussed so far.</p> +<p>This needs the HDFS to be up and running, especially for the + <span class="codefrag">DistributedCache</span>-related features. Hence it only works with a + <a href="quickstart.html#SingleNodeSetup">pseudo-distributed</a> or + <a href="quickstart.html#Fully-Distributed+Operation">fully-distributed</a> + Hadoop installation.</p> +<a name="N10AB2"></a><a name="Source+Code-N10AB2"></a> <h3 class="h4">Source Code</h3> <table class="ForrestTable" cellspacing="1" cellpadding="4"> @@ -1858,7 +1925,7 @@ <td colspan="1" rowspan="1"> <span class="codefrag"> - public static class MapClass extends MapReduceBase + public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { </span> </td> @@ -2018,7 +2085,7 @@ <td colspan="1" rowspan="1">32.</td> <td colspan="1" rowspan="1"> - <span class="codefrag">Path[] patternsFiles = new Path[0];</span> + <span class="codefrag">if (job.getBoolean("wordcount.skip.patterns", false)) {</span> </td> </tr> @@ -2027,8 +2094,8 @@ <td colspan="1" rowspan="1">33.</td> <td colspan="1" rowspan="1"> - - <span class="codefrag">try {</span> + + <span class="codefrag">Path[] patternsFiles = new Path[0];</span> </td> </tr> @@ -2038,6 +2105,16 @@ <td colspan="1" rowspan="1">34.</td> <td colspan="1" rowspan="1"> + <span class="codefrag">try {</span> + </td> + +</tr> + +<tr> + +<td colspan="1" rowspan="1">35.</td> + <td colspan="1" rowspan="1"> + <span class="codefrag"> patternsFiles = DistributedCache.getLocalCacheFiles(job); </span> @@ -2047,9 +2124,9 @@ <tr> -<td colspan="1" rowspan="1">35.</td> +<td colspan="1" rowspan="1">36.</td> <td colspan="1" rowspan="1"> - + <span class="codefrag">} catch (IOException ioe) {</span> </td> @@ -2057,9 +2134,9 @@ <tr> -<td colspan="1" rowspan="1">36.</td> +<td colspan="1" rowspan="1">37.</td> <td colspan="1" rowspan="1"> - + <span class="codefrag"> System.err.println("Caught exception while getting cached files: " + StringUtils.stringifyException(ioe)); @@ -2070,9 +2147,9 @@ <tr> -<td colspan="1" rowspan="1">37.</td> +<td colspan="1" rowspan="1">38.</td> <td colspan="1" rowspan="1"> - + <span class="codefrag">}</span> </td> @@ -2080,9 +2157,9 @@ <tr> -<td colspan="1" rowspan="1">38.</td> +<td colspan="1" rowspan="1">39.</td> <td colspan="1" rowspan="1"> - + <span class="codefrag">for (Path patternsFile : patternsFiles) {</span> </td> @@ -2090,9 +2167,9 @@ <tr> -<td colspan="1" rowspan="1">39.</td> +<td colspan="1" rowspan="1">40.</td> <td colspan="1" rowspan="1"> - + <span class="codefrag">parseSkipFile(patternsFile);</span> </td> @@ -2100,7 +2177,17 @@ <tr> -<td colspan="1" rowspan="1">40.</td> +<td colspan="1" rowspan="1">41.</td> + <td colspan="1" rowspan="1"> + + <span class="codefrag">}</span> + </td> + +</tr> + +<tr> + +<td colspan="1" rowspan="1">42.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2110,7 +2197,7 @@ <tr> -<td colspan="1" rowspan="1">41.</td> +<td colspan="1" rowspan="1">43.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2120,14 +2207,14 @@ <tr> -<td colspan="1" rowspan="1">42.</td> +<td colspan="1" rowspan="1">44.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">43.</td> +<td colspan="1" rowspan="1">45.</td> <td colspan="1" rowspan="1"> <span class="codefrag">private void parseSkipFile(Path patternsFile) {</span> @@ -2137,7 +2224,7 @@ <tr> -<td colspan="1" rowspan="1">44.</td> +<td colspan="1" rowspan="1">46.</td> <td colspan="1" rowspan="1"> <span class="codefrag">try {</span> @@ -2147,7 +2234,7 @@ <tr> -<td colspan="1" rowspan="1">45.</td> +<td colspan="1" rowspan="1">47.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2160,7 +2247,7 @@ <tr> -<td colspan="1" rowspan="1">46.</td> +<td colspan="1" rowspan="1">48.</td> <td colspan="1" rowspan="1"> <span class="codefrag">String pattern = null;</span> @@ -2170,7 +2257,7 @@ <tr> -<td colspan="1" rowspan="1">47.</td> +<td colspan="1" rowspan="1">49.</td> <td colspan="1" rowspan="1"> <span class="codefrag">while ((pattern = fis.readLine()) != null) {</span> @@ -2180,7 +2267,7 @@ <tr> -<td colspan="1" rowspan="1">48.</td> +<td colspan="1" rowspan="1">50.</td> <td colspan="1" rowspan="1"> <span class="codefrag">patternsToSkip.add(pattern);</span> @@ -2190,7 +2277,7 @@ <tr> -<td colspan="1" rowspan="1">49.</td> +<td colspan="1" rowspan="1">51.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2200,7 +2287,7 @@ <tr> -<td colspan="1" rowspan="1">50.</td> +<td colspan="1" rowspan="1">52.</td> <td colspan="1" rowspan="1"> <span class="codefrag">} catch (IOException ioe) {</span> @@ -2210,7 +2297,7 @@ <tr> -<td colspan="1" rowspan="1">51.</td> +<td colspan="1" rowspan="1">53.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2225,7 +2312,7 @@ <tr> -<td colspan="1" rowspan="1">52.</td> +<td colspan="1" rowspan="1">54.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2235,7 +2322,7 @@ <tr> -<td colspan="1" rowspan="1">53.</td> +<td colspan="1" rowspan="1">55.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2245,14 +2332,14 @@ <tr> -<td colspan="1" rowspan="1">54.</td> +<td colspan="1" rowspan="1">56.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">55.</td> +<td colspan="1" rowspan="1">57.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2266,7 +2353,7 @@ <tr> -<td colspan="1" rowspan="1">56.</td> +<td colspan="1" rowspan="1">58.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2280,14 +2367,14 @@ <tr> -<td colspan="1" rowspan="1">57.</td> +<td colspan="1" rowspan="1">59.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">58.</td> +<td colspan="1" rowspan="1">60.</td> <td colspan="1" rowspan="1"> <span class="codefrag">for (String pattern : patternsToSkip) {</span> @@ -2297,7 +2384,7 @@ <tr> -<td colspan="1" rowspan="1">59.</td> +<td colspan="1" rowspan="1">61.</td> <td colspan="1" rowspan="1"> <span class="codefrag">line = line.replaceAll(pattern, "");</span> @@ -2307,7 +2394,7 @@ <tr> -<td colspan="1" rowspan="1">60.</td> +<td colspan="1" rowspan="1">62.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2317,14 +2404,14 @@ <tr> -<td colspan="1" rowspan="1">61.</td> +<td colspan="1" rowspan="1">63.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">62.</td> +<td colspan="1" rowspan="1">64.</td> <td colspan="1" rowspan="1"> <span class="codefrag">StringTokenizer tokenizer = new StringTokenizer(line);</span> @@ -2334,7 +2421,7 @@ <tr> -<td colspan="1" rowspan="1">63.</td> +<td colspan="1" rowspan="1">65.</td> <td colspan="1" rowspan="1"> <span class="codefrag">while (tokenizer.hasMoreTokens()) {</span> @@ -2344,7 +2431,7 @@ <tr> -<td colspan="1" rowspan="1">64.</td> +<td colspan="1" rowspan="1">66.</td> <td colspan="1" rowspan="1"> <span class="codefrag">word.set(tokenizer.nextToken());</span> @@ -2354,7 +2441,7 @@ <tr> -<td colspan="1" rowspan="1">65.</td> +<td colspan="1" rowspan="1">67.</td> <td colspan="1" rowspan="1"> <span class="codefrag">output.collect(word, one);</span> @@ -2364,7 +2451,7 @@ <tr> -<td colspan="1" rowspan="1">66.</td> +<td colspan="1" rowspan="1">68.</td> <td colspan="1" rowspan="1"> <span class="codefrag">reporter.incrCounter(Counters.INPUT_WORDS, 1);</span> @@ -2374,7 +2461,7 @@ <tr> -<td colspan="1" rowspan="1">67.</td> +<td colspan="1" rowspan="1">69.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2384,14 +2471,14 @@ <tr> -<td colspan="1" rowspan="1">68.</td> +<td colspan="1" rowspan="1">70.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">69.</td> +<td colspan="1" rowspan="1">71.</td> <td colspan="1" rowspan="1"> <span class="codefrag">if ((++numRecords % 100) == 0) {</span> @@ -2401,7 +2488,7 @@ <tr> -<td colspan="1" rowspan="1">70.</td> +<td colspan="1" rowspan="1">72.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2415,7 +2502,7 @@ <tr> -<td colspan="1" rowspan="1">71.</td> +<td colspan="1" rowspan="1">73.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2425,7 +2512,7 @@ <tr> -<td colspan="1" rowspan="1">72.</td> +<td colspan="1" rowspan="1">74.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2435,7 +2522,7 @@ <tr> -<td colspan="1" rowspan="1">73.</td> +<td colspan="1" rowspan="1">75.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2445,14 +2532,14 @@ <tr> -<td colspan="1" rowspan="1">74.</td> +<td colspan="1" rowspan="1">76.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">75.</td> +<td colspan="1" rowspan="1">77.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2465,7 +2552,7 @@ <tr> -<td colspan="1" rowspan="1">76.</td> +<td colspan="1" rowspan="1">78.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2479,7 +2566,7 @@ <tr> -<td colspan="1" rowspan="1">77.</td> +<td colspan="1" rowspan="1">79.</td> <td colspan="1" rowspan="1"> <span class="codefrag">int sum = 0;</span> @@ -2489,7 +2576,7 @@ <tr> -<td colspan="1" rowspan="1">78.</td> +<td colspan="1" rowspan="1">80.</td> <td colspan="1" rowspan="1"> <span class="codefrag">while (values.hasNext()) {</span> @@ -2499,7 +2586,7 @@ <tr> -<td colspan="1" rowspan="1">79.</td> +<td colspan="1" rowspan="1">81.</td> <td colspan="1" rowspan="1"> <span class="codefrag">sum += values.next().get();</span> @@ -2509,7 +2596,7 @@ <tr> -<td colspan="1" rowspan="1">80.</td> +<td colspan="1" rowspan="1">82.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2519,7 +2606,7 @@ <tr> -<td colspan="1" rowspan="1">81.</td> +<td colspan="1" rowspan="1">83.</td> <td colspan="1" rowspan="1"> <span class="codefrag">output.collect(key, new IntWritable(sum));</span> @@ -2529,7 +2616,7 @@ <tr> -<td colspan="1" rowspan="1">82.</td> +<td colspan="1" rowspan="1">84.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2539,7 +2626,7 @@ <tr> -<td colspan="1" rowspan="1">83.</td> +<td colspan="1" rowspan="1">85.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2549,14 +2636,14 @@ <tr> -<td colspan="1" rowspan="1">84.</td> +<td colspan="1" rowspan="1">86.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">85.</td> +<td colspan="1" rowspan="1">87.</td> <td colspan="1" rowspan="1"> <span class="codefrag">public int run(String[] args) throws Exception {</span> @@ -2566,7 +2653,7 @@ <tr> -<td colspan="1" rowspan="1">86.</td> +<td colspan="1" rowspan="1">88.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2578,7 +2665,7 @@ <tr> -<td colspan="1" rowspan="1">87.</td> +<td colspan="1" rowspan="1">89.</td> <td colspan="1" rowspan="1"> <span class="codefrag">conf.setJobName("wordcount");</span> @@ -2588,14 +2675,14 @@ <tr> -<td colspan="1" rowspan="1">88.</td> +<td colspan="1" rowspan="1">90.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">89.</td> +<td colspan="1" rowspan="1">91.</td> <td colspan="1" rowspan="1"> <span class="codefrag">conf.setOutputKeyClass(Text.class);</span> @@ -2605,7 +2692,7 @@ <tr> -<td colspan="1" rowspan="1">90.</td> +<td colspan="1" rowspan="1">92.</td> <td colspan="1" rowspan="1"> <span class="codefrag">conf.setOutputValueClass(IntWritable.class);</span> @@ -2615,24 +2702,24 @@ <tr> -<td colspan="1" rowspan="1">91.</td> +<td colspan="1" rowspan="1">93.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">92.</td> +<td colspan="1" rowspan="1">94.</td> <td colspan="1" rowspan="1"> - <span class="codefrag">conf.setMapperClass(MapClass.class);</span> + <span class="codefrag">conf.setMapperClass(Map.class);</span> </td> </tr> <tr> -<td colspan="1" rowspan="1">93.</td> +<td colspan="1" rowspan="1">95.</td> <td colspan="1" rowspan="1"> <span class="codefrag">conf.setCombinerClass(Reduce.class);</span> @@ -2642,7 +2729,7 @@ <tr> -<td colspan="1" rowspan="1">94.</td> +<td colspan="1" rowspan="1">96.</td> <td colspan="1" rowspan="1"> <span class="codefrag">conf.setReducerClass(Reduce.class);</span> @@ -2652,14 +2739,14 @@ <tr> -<td colspan="1" rowspan="1">95.</td> +<td colspan="1" rowspan="1">97.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">96.</td> +<td colspan="1" rowspan="1">98.</td> <td colspan="1" rowspan="1"> <span class="codefrag">conf.setInputFormat(TextInputFormat.class);</span> @@ -2669,7 +2756,7 @@ <tr> -<td colspan="1" rowspan="1">97.</td> +<td colspan="1" rowspan="1">99.</td> <td colspan="1" rowspan="1"> <span class="codefrag">conf.setOutputFormat(TextOutputFormat.class);</span> @@ -2679,14 +2766,14 @@ <tr> -<td colspan="1" rowspan="1">98.</td> +<td colspan="1" rowspan="1">100.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">99.</td> +<td colspan="1" rowspan="1">101.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2698,7 +2785,7 @@ <tr> -<td colspan="1" rowspan="1">100.</td> +<td colspan="1" rowspan="1">102.</td> <td colspan="1" rowspan="1"> <span class="codefrag">for (int i=0; i < args.length; ++i) {</span> @@ -2708,17 +2795,17 @@ <tr> -<td colspan="1" rowspan="1">101.</td> +<td colspan="1" rowspan="1">103.</td> <td colspan="1" rowspan="1"> - <span class="codefrag">if ("-skip".equals(args[i]) {</span> + <span class="codefrag">if ("-skip".equals(args[i])) {</span> </td> </tr> <tr> -<td colspan="1" rowspan="1">102.</td> +<td colspan="1" rowspan="1">104.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2730,7 +2817,19 @@ <tr> -<td colspan="1" rowspan="1">103.</td> +<td colspan="1" rowspan="1">105.</td> + <td colspan="1" rowspan="1"> + + <span class="codefrag"> + conf.setBoolean("wordcount.skip.patterns", true); + </span> + </td> + +</tr> + +<tr> + +<td colspan="1" rowspan="1">106.</td> <td colspan="1" rowspan="1"> <span class="codefrag">} else {</span> @@ -2740,7 +2839,7 @@ <tr> -<td colspan="1" rowspan="1">104.</td> +<td colspan="1" rowspan="1">107.</td> <td colspan="1" rowspan="1"> <span class="codefrag">other_args.add(args[i]);</span> @@ -2750,7 +2849,7 @@ <tr> -<td colspan="1" rowspan="1">105.</td> +<td colspan="1" rowspan="1">108.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2760,7 +2859,7 @@ <tr> -<td colspan="1" rowspan="1">106.</td> +<td colspan="1" rowspan="1">109.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2770,41 +2869,41 @@ <tr> -<td colspan="1" rowspan="1">107.</td> +<td colspan="1" rowspan="1">110.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">108.</td> +<td colspan="1" rowspan="1">111.</td> <td colspan="1" rowspan="1"> - <span class="codefrag">conf.setInputPath(new Path(other_args[0]));</span> + <span class="codefrag">conf.setInputPath(new Path(other_args.get(0)));</span> </td> </tr> <tr> -<td colspan="1" rowspan="1">109.</td> +<td colspan="1" rowspan="1">112.</td> <td colspan="1" rowspan="1"> - <span class="codefrag">conf.setOutputPath(new Path(other_args[1]));</span> + <span class="codefrag">conf.setOutputPath(new Path(other_args.get(1)));</span> </td> </tr> <tr> -<td colspan="1" rowspan="1">110.</td> +<td colspan="1" rowspan="1">113.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">111.</td> +<td colspan="1" rowspan="1">114.</td> <td colspan="1" rowspan="1"> <span class="codefrag">JobClient.runJob(conf);</span> @@ -2814,7 +2913,7 @@ <tr> -<td colspan="1" rowspan="1">112.</td> +<td colspan="1" rowspan="1">115.</td> <td colspan="1" rowspan="1"> <span class="codefrag">return 0;</span> @@ -2824,7 +2923,7 @@ <tr> -<td colspan="1" rowspan="1">113.</td> +<td colspan="1" rowspan="1">116.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2834,14 +2933,14 @@ <tr> -<td colspan="1" rowspan="1">114.</td> +<td colspan="1" rowspan="1">117.</td> <td colspan="1" rowspan="1"></td> </tr> <tr> -<td colspan="1" rowspan="1">115.</td> +<td colspan="1" rowspan="1">118.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2853,7 +2952,7 @@ <tr> -<td colspan="1" rowspan="1">116.</td> +<td colspan="1" rowspan="1">119.</td> <td colspan="1" rowspan="1"> <span class="codefrag"> @@ -2866,7 +2965,7 @@ <tr> -<td colspan="1" rowspan="1">117.</td> +<td colspan="1" rowspan="1">120.</td> <td colspan="1" rowspan="1"> <span class="codefrag">System.exit(res);</span> @@ -2876,7 +2975,7 @@ <tr> -<td colspan="1" rowspan="1">118.</td> +<td colspan="1" rowspan="1">121.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> @@ -2886,7 +2985,7 @@ <tr> -<td colspan="1" rowspan="1">119.</td> +<td colspan="1" rowspan="1">122.</td> <td colspan="1" rowspan="1"> <span class="codefrag">}</span> </td> @@ -2895,13 +2994,13 @@ <tr> -<td colspan="1" rowspan="1">120.</td> +<td colspan="1" rowspan="1">123.</td> <td colspan="1" rowspan="1"></td> </tr> </table> -<a name="N111C3"></a><a name="Sample+Runs"></a> +<a name="N11214"></a><a name="Sample+Runs"></a> <h3 class="h4">Sample Runs</h3> <p>Sample text-files as input:</p> <p> @@ -2928,7 +3027,7 @@ <span class="codefrag">$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file02</span> <br> -<span class="codefrag">Hello Hadoop, Goodbye the Hadoop.</span> +<span class="codefrag">Hello Hadoop, Goodbye to hadoop.</span> </p> <p>Run the application:</p> @@ -2958,9 +3057,6 @@ <span class="codefrag">Hadoop, 1</span> <br> -<span class="codefrag">Hadoop. 1</span> -<br> - <span class="codefrag">Hello 2</span> <br> @@ -2970,7 +3066,10 @@ <span class="codefrag">World, 1</span> <br> -<span class="codefrag">the 1</span> +<span class="codefrag">hadoop. 1</span> +<br> + +<span class="codefrag">to 1</span> <br> </p> @@ -2992,7 +3091,7 @@ <span class="codefrag">\!</span> <br> -<span class="codefrag">the</span> +<span class="codefrag">to</span> <br> </p> @@ -3021,7 +3120,7 @@ <span class="codefrag">Goodbye 1</span> <br> -<span class="codefrag">Hadoop 2</span> +<span class="codefrag">Hadoop 1</span> <br> <span class="codefrag">Hello 2</span> @@ -3029,6 +3128,9 @@ <span class="codefrag">World 2</span> <br> + +<span class="codefrag">hadoop 1</span> +<br> </p> <p>Run it once more, this time switch-off case-sensitivity:</p> @@ -3066,8 +3168,8 @@ <br> </p> -<a name="N11293"></a><a name="Salient+Points"></a> -<h3 class="h4">Salient Points</h3> +<a name="N112E8"></a><a name="Highlights"></a> +<h3 class="h4">Highlights</h3> <p>The second version of <span class="codefrag">WordCount</span> improves upon the previous one by using some features offered by the Map-Reduce framework: </p> @@ -3076,26 +3178,26 @@ <li> Demonstrates how applications can access configuration parameters in the <span class="codefrag">configure</span> method of the <span class="codefrag">Mapper</span> (and - <span class="codefrag">Reducer</span>) implementations (lines 28-41). + <span class="codefrag">Reducer</span>) implementations (lines 28-43). </li> <li> Demonstrates how the <span class="codefrag">DistributedCache</span> can be used to distribute read-only data needed by the jobs. Here it allows the user - to specify word-patterns to skip while counting (line 102). + to specify word-patterns to skip while counting (line 104). </li> <li> Demonstrates the utility of the <span class="codefrag">Tool</span> interface and the <span class="codefrag">GenericOptionsParser</span> to handle generic Hadoop - command-line options (lines 85-86, 116). + command-line options (lines 87-116, 119). </li> <li> - Demonstrates how applications can use <span class="codefrag">Counters</span> (line 66) + Demonstrates how applications can use <span class="codefrag">Counters</span> (line 68) and how they can set application-specific status information via the <span class="codefrag">Reporter</span> instance passed to the <span class="codefrag">map</span> (and - <span class="codefrag">reduce</span>) method (line 70). + <span class="codefrag">reduce</span>) method (line 72). </li> </ul> @@ -3111,18 +3213,27 @@ </div> +<!--+ + |end content + +--> <div class="clearboth"> </div> </div> <div id="footer"> +<!--+ + |start bottomstrip + +--> <div class="lastmodified"> <script type="text/javascript"><!-- -document.write("<text>Last Published:</text> " + document.lastModified); +document.write("Last Published: " + document.lastModified); // --></script> </div> <div class="copyright"> Copyright © 2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a> </div> +<!--+ + |end bottomstrip + +--> </div> </body> </html>