Author: siren
Date: Sat Jan 19 00:59:29 2008
New Revision: 613378

URL: http://svn.apache.org/viewvc?rev=613378&view=rev
Log:
NUTCH-580 Remove deprecated hadoop api calls (FS)

Added:
    lucene/nutch/trunk/src/java/org/apache/nutch/util/HadoopFSUtil.java   (with 
props)
Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher2.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java
    lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
    lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/util/LockUtil.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Sat Jan 19 00:59:29 2008
@@ -191,6 +191,8 @@
 
 66. NUTCH-584 - urls missing from fetchlist (Ruslan Ermilov, ab)
 
+67. NUTCH-580 - Remove deprecated hadoop api calls (FS) (siren)
+
 
 Release 0.9 - 2007-04-02
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java Sat Jan 19 
00:59:29 2008
@@ -32,6 +32,7 @@
 import org.apache.nutch.indexer.DeleteDuplicates;
 import org.apache.nutch.indexer.IndexMerger;
 import org.apache.nutch.indexer.Indexer;
+import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
 
@@ -131,9 +132,9 @@
       linkDbTool.invert(linkDb, segments, true, true, false); // invert links
 
       // index, dedup & merge
-      indexer.index(indexes, crawlDb, linkDb, fs.listPaths(segments));
+      indexer.index(indexes, crawlDb, linkDb, fs.listPaths(segments, 
HadoopFSUtil.getPassAllFilter()));
       dedup.dedup(new Path[] { indexes });
-      merger.merge(fs.listPaths(indexes), index, tmpDir);
+      merger.merge(fs.listPaths(indexes, HadoopFSUtil.getPassAllFilter()), 
index, tmpDir);
     } else {
       LOG.warn("No URLs to fetch - check your seed list and URL filters.");
     }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java Sat Jan 19 
00:59:29 2008
@@ -31,6 +31,7 @@
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.ToolBase;
 
+import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.nutch.util.LockUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
@@ -181,15 +182,7 @@
       } else if (args[i].equals("-noAdditions")) {
         additionsAllowed = false;
       } else if (args[i].equals("-dir")) {
-        Path[] paths = fs.listPaths(new Path(args[++i]), new PathFilter() {
-          public boolean accept(Path dir) {
-            try {
-              return fs.isDirectory(dir);
-            } catch (IOException ioe) {
-              return false;
-            }
-          }
-        });
+        Path[] paths = fs.listPaths(new Path(args[++i]), 
HadoopFSUtil.getPassDirectoriesFilter(fs));
         dirs.addAll(Arrays.asList(paths));
       } else {
         dirs.add(new Path(args[i]));

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Sat Jan 19 
00:59:29 2008
@@ -36,6 +36,7 @@
 import org.apache.nutch.net.URLFilters;
 import org.apache.nutch.net.URLNormalizers;
 import org.apache.nutch.parse.*;
+import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.nutch.util.LockUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
@@ -146,14 +147,7 @@
 
   public void invert(Path linkDb, final Path segmentsDir, boolean normalize, 
boolean filter, boolean force) throws IOException {
     final FileSystem fs = FileSystem.get(getConf());
-    Path[] files = fs.listPaths(segmentsDir, new PathFilter() {
-      public boolean accept(Path f) {
-        try {
-          if (fs.isDirectory(f)) return true;
-        } catch (IOException ioe) {};
-        return false;
-      }
-    });
+    Path[] files = fs.listPaths(segmentsDir, 
HadoopFSUtil.getPassDirectoriesFilter(fs));
     invert(linkDb, files, normalize, filter, force);
   }
 
@@ -283,7 +277,7 @@
         Path[] files = fs.listPaths(segDir, new PathFilter() {
           public boolean accept(Path f) {
             try {
-              if (fs.isDirectory(f)) return true;
+              if (fs.getFileStatus(f).isDir()) return true;
             } catch (IOException ioe) {};
             return false;
           }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Sat Jan 
19 00:59:29 2008
@@ -63,7 +63,7 @@
       FileSystem fs = FileSystem.get(job);
       InputSplit[] splits = new InputSplit[files.length];
       for (int i = 0; i < files.length; i++) {
-        splits[i] = new FileSplit(files[i], 0, fs.getLength(files[i]), job);
+        splits[i] = new FileSplit(files[i], 0, 
fs.getFileStatus(files[i]).getLen(), job);
       }
       return splits;
     }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher2.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher2.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher2.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher2.java Sat Jan 
19 00:59:29 2008
@@ -95,7 +95,7 @@
       FileSplit[] splits = new FileSplit[files.length];
       FileSystem fs = FileSystem.get(job);
       for (int i = 0; i < files.length; i++) {
-        splits[i] = new FileSplit(files[i], 0, fs.getLength(files[i]), job);
+        splits[i] = new FileSplit(files[i], 0, 
fs.getFileStatus(files[i]).getLen(), job);
       }
       return splits;
     }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java Sat 
Jan 19 00:59:29 2008
@@ -19,6 +19,7 @@
 
 import java.io.*;
 import org.apache.lucene.store.*;
+import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.conf.Configuration;
 
@@ -40,7 +41,7 @@
       create();
     }
 
-    if (!fs.isDirectory(directory))
+    if (!fs.getFileStatus(directory).isDir())
       throw new IOException(directory + " not a directory");
   }
 
@@ -49,11 +50,11 @@
       fs.mkdirs(directory);
     }
 
-    if (!fs.isDirectory(directory))
+    if (!fs.getFileStatus(directory).isDir())
       throw new IOException(directory + " not a directory");
 
     // clear old files
-    Path[] files = fs.listPaths(directory);
+    Path[] files = fs.listPaths(directory, HadoopFSUtil.getPassAllFilter());
     for (int i = 0; i < files.length; i++) {
       if (!fs.delete(files[i]))
         throw new IOException("Cannot delete " + files[i]);
@@ -61,7 +62,7 @@
   }
 
   public String[] list() throws IOException {
-    Path[] files = fs.listPaths(directory);
+    Path[] files = fs.listPaths(directory, HadoopFSUtil.getPassAllFilter());
     if (files == null) return null;
 
     String[] result = new String[files.length];
@@ -84,7 +85,7 @@
   }
 
   public long fileLength(String name) throws IOException {
-    return fs.getLength(new Path(directory, name));
+    return fs.getFileStatus(new Path(directory, name)).getLen();
   }
 
   public void deleteFile(String name) throws IOException {
@@ -157,7 +158,7 @@
 
     public DfsIndexInput(Path path, int ioFileBufferSize) throws IOException {
       descriptor = new Descriptor(path,ioFileBufferSize);
-      length = fs.getLength(path);
+      length = fs.getFileStatus(path).getLen();
     }
 
     protected void readInternal(byte[] b, int offset, int len)

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java Sat 
Jan 19 00:59:29 2008
@@ -29,6 +29,7 @@
 import org.apache.hadoop.util.ToolBase;
 import org.apache.hadoop.conf.*;
 
+import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.nutch.util.LogUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
@@ -135,7 +136,7 @@
     Path outputIndex = new Path(args[i++]);
 
     for (; i < args.length; i++) {
-      indexDirs.addAll(Arrays.asList(fs.listPaths(new Path(args[i]))));
+      indexDirs.addAll(Arrays.asList(fs.listPaths(new Path(args[i]), 
HadoopFSUtil.getPassAllFilter())));
     }
 
     //

Modified: 
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java 
Sat Jan 19 00:59:29 2008
@@ -82,7 +82,7 @@
   }
 
   private Directory getDirectory(Path file) throws IOException {
-    if ("local".equals(this.fs.getName())) {
+    if ("file".equals(this.fs.getUri().getScheme())) {
       Path qualified = file.makeQualified(FileSystem.getLocal(conf));
       File fsLocal = new File(qualified.toUri());
       return FSDirectory.getDirectory(fsLocal.getAbsolutePath(), false);

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java Sat 
Jan 19 00:59:29 2008
@@ -31,6 +31,7 @@
 import org.apache.nutch.parse.*;
 import org.apache.nutch.indexer.*;
 import org.apache.nutch.crawl.Inlinks;
+import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
 /** 
@@ -121,8 +122,8 @@
       }
       
       Vector vDirs=new Vector();
-      Path [] directories = fs.listPaths(indexesDir);
-      for(int i = 0; i < fs.listPaths(indexesDir).length; i++) {
+      Path [] directories = fs.listPaths(indexesDir, 
HadoopFSUtil.getPassDirectoriesFilter(fs));
+      for(int i = 0; i < directories.length; i++) {
         Path indexdone = new Path(directories[i], Indexer.DONE_NAME);
         if(fs.isFile(indexdone)) {
           vDirs.add(directories[i]);

Modified: 
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Sat 
Jan 19 00:59:29 2008
@@ -29,7 +29,6 @@
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.io.MapFile;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
@@ -60,6 +59,7 @@
 import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.ParseText;
 import org.apache.nutch.protocol.Content;
+import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
 
@@ -626,15 +626,7 @@
     boolean normalize = false;
     for (int i = 1; i < args.length; i++) {
       if (args[i].equals("-dir")) {
-        Path[] files = fs.listPaths(new Path(args[++i]), new PathFilter() {
-          public boolean accept(Path f) {
-            try {
-              if (fs.isDirectory(f)) return true;
-            } catch (IOException e) {}
-            ;
-            return false;
-          }
-        });
+        Path[] files = fs.listPaths(new Path(args[++i]), 
HadoopFSUtil.getPassDirectoriesFilter(fs));
         for (int j = 0; j < files.length; j++)
           segs.add(files[j]);
       } else if (args[i].equals("-filter")) {

Modified: 
lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Sat 
Jan 19 00:59:29 2008
@@ -40,7 +40,6 @@
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.io.MapFile;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
@@ -64,6 +63,7 @@
 import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.ParseText;
 import org.apache.nutch.protocol.Content;
+import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.nutch.util.LogUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
@@ -220,7 +220,7 @@
 
     // remove the old file
     fs.delete(dumpFile);
-    Path[] files = fs.listPaths(tempDir);
+    Path[] files = fs.listPaths(tempDir, HadoopFSUtil.getPassAllFilter());
 
     PrintWriter writer = null;
     int currentRecordNumber = 0;
@@ -451,7 +451,7 @@
     }
     stats.generated = cnt;
     Path fetchDir = new Path(segment, CrawlDatum.FETCH_DIR_NAME);
-    if (fs.exists(fetchDir) && fs.isDirectory(fetchDir)) {
+    if (fs.exists(fetchDir) && fs.getFileStatus(fetchDir).isDir()) {
       cnt = 0L;
       long start = Long.MAX_VALUE;
       long end = Long.MIN_VALUE;
@@ -470,7 +470,7 @@
       stats.fetched = cnt;
     }
     Path parseDir = new Path(segment, ParseData.DIR_NAME);
-    if (fs.exists(fetchDir) && fs.isDirectory(fetchDir)) {
+    if (fs.exists(fetchDir) && fs.getFileStatus(fetchDir).isDir()) {
       cnt = 0L;
       long errors = 0L;
       ParseData value = new ParseData();
@@ -559,14 +559,7 @@
           if (args[i] == null) continue;
           if (args[i].equals("-dir")) {
             Path dir = new Path(args[++i]);
-            Path[] files = fs.listPaths(dir, new PathFilter() {
-              public boolean accept(Path pathname) {
-                try {
-                  if (fs.isDirectory(pathname)) return true;
-                } catch (IOException e) {};
-                return false;
-              }
-            });
+            Path[] files = fs.listPaths(dir, 
HadoopFSUtil.getPassDirectoriesFilter(fs));
             if (files != null && files.length > 0) {
               dirs.addAll(Arrays.asList(files));
             }

Added: lucene/nutch/trunk/src/java/org/apache/nutch/util/HadoopFSUtil.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/HadoopFSUtil.java?rev=613378&view=auto
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/HadoopFSUtil.java (added)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/HadoopFSUtil.java Sat Jan 
19 00:59:29 2008
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.util;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+
+public class HadoopFSUtil {
+
+    /**
+     * Returns PathFilter that passes all paths through.
+     */
+    public static PathFilter getPassAllFilter() {
+        return new PathFilter() {
+            public boolean accept(Path arg0) {
+                return true;
+            }
+        };
+    }
+
+    /**
+     * Returns PathFilter that passes directories through.
+     */
+    public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
+        return new PathFilter() {
+            public boolean accept(final Path path) {
+                try {
+                    return fs.getFileStatus(path).isDir();
+                } catch (IOException ioe) {
+                    return false;
+                }
+            }
+
+        };
+    }
+
+}

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/HadoopFSUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/LockUtil.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/LockUtil.java?rev=613378&r1=613377&r2=613378&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/LockUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/LockUtil.java Sat Jan 19 
00:59:29 2008
@@ -42,7 +42,7 @@
     if (fs.exists(lockFile)) {
       if(!accept)
         throw new IOException("lock file " + lockFile + " already exists.");
-      if (fs.isDirectory(lockFile))
+      if (fs.getFileStatus(lockFile).isDir())
         throw new IOException("lock file " + lockFile + " already exists and 
is a directory.");
       // do nothing - the file already exists.
     } else {
@@ -63,7 +63,7 @@
    */
   public static boolean removeLockFile(FileSystem fs, Path lockFile) throws 
IOException {
     if (!fs.exists(lockFile)) return false;
-    if (fs.isDirectory(lockFile))
+    if (fs.getFileStatus(lockFile).isDir())
       throw new IOException("lock file " + lockFile + " exists but is a 
directory!");
     return fs.delete(lockFile);
   }


Reply via email to