Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Hama Wiki" for change 
notification.

The "WriteHamaGraphFile" page has been changed by thomasjungblut:
http://wiki.apache.org/hama/WriteHamaGraphFile?action=diff&rev1=1&rev2=2

  
  # to be extended
  {{{
-  Path txtPath = new Path(
-         "/tmp/web-Google.txt");
+     Path txtPath = new Path("/tmp/web-Google.txt");
-  Path input = new Path(
-         "/tmp/pagerankin.seq");
+     Path input = new Path("/tmp/pagerankin.seq");
-  HamaConfiguration conf = new HamaConfiguration(new Configuration());
+     HamaConfiguration conf = new HamaConfiguration(new Configuration());
-  FileSystem fileSystem = FileSystem.get(conf);
-  HashMultimap<Integer, Integer> map = HashMultimap.create();
+     HashMultimap<Integer, Integer> map = HashMultimap.create();
-  BufferedReader br = new BufferedReader(new InputStreamReader(
+     BufferedReader br = new BufferedReader(new InputStreamReader(
-       fileSystem.open(txtPath)));
+         fileSystem.open(txtPath)));
-  String line = null;
+     String line = null;
-  while ((line = br.readLine()) != null) {
+     while ((line = br.readLine()) != null) {
-    String[] split = line.split("\t");
+       String[] split = line.split("\t");
-    map.put(Integer.parseInt(split[0]), Integer.parseInt(split[1]));
+       map.put(Integer.parseInt(split[0]), Integer.parseInt(split[1]));
-  }
+     }
+ 
-  Set<Entry<Integer, Collection<Integer>>> entries = map.asMap().entrySet();
+     Set<Entry<Integer, Collection<Integer>>> entries = map.asMap().entrySet();
+ 
+     Path input = new Path(
+         "/Users/thomas.jungblut/Downloads/web_in/pagerankin.seq");
-  VertexWritable.CONFIGURATION = conf;
+     VertexWritable.CONFIGURATION = conf;
-  SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, conf,
+     SequenceFile.Writer writer = new SequenceFile.Writer(fileSystem, conf,
          input, VertexWritable.class, VertexArrayWritable.class);
- 
      for (Entry<Integer, Collection<Integer>> entry : entries) {
-       VertexWritable<IntWritable, DoubleWritable> key = new 
VertexWritable<IntWritable, DoubleWritable>(
+       VertexWritable<Text, DoubleWritable> key = new VertexWritable<Text, 
DoubleWritable>(
-           new DoubleWritable(0.0d), new IntWritable(entry.getKey()),
+           new DoubleWritable(0.0d), new Text(entry.getKey() + ""), Text.class,
-           IntWritable.class, DoubleWritable.class);
+           DoubleWritable.class);
        ArrayList<Integer> arrayList = new ArrayList<Integer>(entry.getValue());
        @SuppressWarnings("unchecked")
-       VertexWritable<IntWritable, NullWritable>[] adjacents = new 
VertexWritable[entry
+       VertexWritable<Text, NullWritable>[] adjacents = new 
VertexWritable[entry
            .getValue().size()];
        for (int i = 0; i < adjacents.length; i++) {
-         adjacents[i] = new VertexWritable<IntWritable, NullWritable>(
+         adjacents[i] = new VertexWritable<Text, NullWritable>(
-             NullWritable.get(), new IntWritable(arrayList.get(i)),
+             NullWritable.get(), new Text(arrayList.get(i) + ""), Text.class,
-             IntWritable.class, NullWritable.class);
+             NullWritable.class);
        }
        VertexArrayWritable val = new VertexArrayWritable();
        val.set(adjacents);
        writer.append(key, val);
      }
      writer.close();
+ 
  }}}
  
+ 
+ Now we can setup the job with the following code
+ 
+ {{{
+  GraphJob pageJob = new GraphJob(conf, PageRank.class);
+     pageJob.setJobName("Pagerank");
+     pageJob.set("hama.graph.repair", "true");
+ 
+     pageJob.setVertexClass(PageRankVertex.class);
+     pageJob.setInputPath(input);
+     pageJob.setOutputPath(new Path("/tmp/pagerank-output/"));
+ 
+     // set the defaults
+     pageJob.setMaxIteration(30);
+     // we need to include a vertex in its adjacency list,
+     // otherwise the pagerank result has a constant loss
+     pageJob.set("hama.graph.self.ref", "true");
+     pageJob.setAggregatorClass(AverageAggregator.class);
+ 
+     pageJob.setVertexIDClass(Text.class);
+     pageJob.setVertexValueClass(DoubleWritable.class);
+     pageJob.setEdgeValueClass(NullWritable.class);
+ 
+     pageJob.setInputFormat(SequenceFileInputFormat.class);
+     pageJob.setPartitioner(HashPartitioner.class);
+     pageJob.setOutputFormat(SequenceFileOutputFormat.class);
+     pageJob.setOutputKeyClass(Text.class);
+     pageJob.setOutputValueClass(DoubleWritable.class);
+ 
+     long startTime = System.currentTimeMillis();
+     if (pageJob.waitForCompletion(true)) {
+       System.out.println("Job Finished in "
+           + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
+     }
+ }}}
+ 
+ You should see the algorithm converge relative fast, in my case it was after 
nine supersteps.
+ If you read the results back from the sequencefile output, you will see the 
following top 10 ranked sites:
+ 
+ {{{
+ 885605 = 0.00149900065779375
+ 846221 = 0.0010280702392776039
+ 557124 = 8.654234880507804E-4
+ 537039 = 6.634317501245855E-4
+ 163075 = 6.529762251084758E-4
+ 597621 = 6.503367245789417E-4
+ 41909 = 5.845160681337011E-4
+ 551829 = 5.702205338951212E-4
+ 504140 = 5.507901000809657E-4
+ 765334 = 5.432108978490109E-4
+ 486980 = 5.394792436341423E-4
+ }}}
+ 

Reply via email to