Dear Wiki user, You have subscribed to a wiki page or wiki category on "Hama Wiki" for change notification.
The "WriteHamaGraphFile" page has been changed by thomasjungblut: http://wiki.apache.org/hama/WriteHamaGraphFile?action=diff&rev1=1&rev2=2 # to be extended {{{ - Path txtPath = new Path( - "/tmp/web-Google.txt"); + Path txtPath = new Path("/tmp/web-Google.txt"); - Path input = new Path( - "/tmp/pagerankin.seq"); + Path input = new Path("/tmp/pagerankin.seq"); - HamaConfiguration conf = new HamaConfiguration(new Configuration()); + HamaConfiguration conf = new HamaConfiguration(new Configuration()); - FileSystem fileSystem = FileSystem.get(conf); - HashMultimap<Integer, Integer> map = HashMultimap.create(); + HashMultimap<Integer, Integer> map = HashMultimap.create(); - BufferedReader br = new BufferedReader(new InputStreamReader( + BufferedReader br = new BufferedReader(new InputStreamReader( - fileSystem.open(txtPath))); + fileSystem.open(txtPath))); - String line = null; + String line = null; - while ((line = br.readLine()) != null) { + while ((line = br.readLine()) != null) { - String[] split = line.split("\t"); + String[] split = line.split("\t"); - map.put(Integer.parseInt(split[0]), Integer.parseInt(split[1])); + map.put(Integer.parseInt(split[0]), Integer.parseInt(split[1])); - } + } + - Set<Entry<Integer, Collection<Integer>>> entries = map.asMap().entrySet(); + Set<Entry<Integer, Collection<Integer>>> entries = map.asMap().entrySet(); + + Path input = new Path( + "/Users/thomas.jungblut/Downloads/web_in/pagerankin.seq"); - VertexWritable.CONFIGURATION = conf; + VertexWritable.CONFIGURATION = conf; - SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, conf, + SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, conf, input, VertexWritable.class, VertexArrayWritable.class); - for (Entry<Integer, Collection<Integer>> entry : entries) { - VertexWritable<IntWritable, DoubleWritable> key = new VertexWritable<IntWritable, DoubleWritable>( + VertexWritable<Text, DoubleWritable> key = new VertexWritable<Text, DoubleWritable>( - new DoubleWritable(0.0d), new IntWritable(entry.getKey()), + new DoubleWritable(0.0d), new Text(entry.getKey() + ""), Text.class, - IntWritable.class, DoubleWritable.class); + DoubleWritable.class); ArrayList<Integer> arrayList = new ArrayList<Integer>(entry.getValue()); @SuppressWarnings("unchecked") - VertexWritable<IntWritable, NullWritable>[] adjacents = new VertexWritable[entry + VertexWritable<Text, NullWritable>[] adjacents = new VertexWritable[entry .getValue().size()]; for (int i = 0; i < adjacents.length; i++) { - adjacents[i] = new VertexWritable<IntWritable, NullWritable>( + adjacents[i] = new VertexWritable<Text, NullWritable>( - NullWritable.get(), new IntWritable(arrayList.get(i)), + NullWritable.get(), new Text(arrayList.get(i) + ""), Text.class, - IntWritable.class, NullWritable.class); + NullWritable.class); } VertexArrayWritable val = new VertexArrayWritable(); val.set(adjacents); writer.append(key, val); } writer.close(); + }}} + + Now we can setup the job with the following code + + {{{ + GraphJob pageJob = new GraphJob(conf, PageRank.class); + pageJob.setJobName("Pagerank"); + pageJob.set("hama.graph.repair", "true"); + + pageJob.setVertexClass(PageRankVertex.class); + pageJob.setInputPath(input); + pageJob.setOutputPath(new Path("/tmp/pagerank-output/")); + + // set the defaults + pageJob.setMaxIteration(30); + // we need to include a vertex in its adjacency list, + // otherwise the pagerank result has a constant loss + pageJob.set("hama.graph.self.ref", "true"); + pageJob.setAggregatorClass(AverageAggregator.class); + + pageJob.setVertexIDClass(Text.class); + pageJob.setVertexValueClass(DoubleWritable.class); + pageJob.setEdgeValueClass(NullWritable.class); + + pageJob.setInputFormat(SequenceFileInputFormat.class); + pageJob.setPartitioner(HashPartitioner.class); + pageJob.setOutputFormat(SequenceFileOutputFormat.class); + pageJob.setOutputKeyClass(Text.class); + pageJob.setOutputValueClass(DoubleWritable.class); + + long startTime = System.currentTimeMillis(); + if (pageJob.waitForCompletion(true)) { + System.out.println("Job Finished in " + + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); + } + }}} + + You should see the algorithm converge relative fast, in my case it was after nine supersteps. + If you read the results back from the sequencefile output, you will see the following top 10 ranked sites: + + {{{ + 885605 = 0.00149900065779375 + 846221 = 0.0010280702392776039 + 557124 = 8.654234880507804E-4 + 537039 = 6.634317501245855E-4 + 163075 = 6.529762251084758E-4 + 597621 = 6.503367245789417E-4 + 41909 = 5.845160681337011E-4 + 551829 = 5.702205338951212E-4 + 504140 = 5.507901000809657E-4 + 765334 = 5.432108978490109E-4 + 486980 = 5.394792436341423E-4 + }}} +