Repository: mahout
Updated Branches:
  refs/heads/master edc0c69ac -> 7552c55e2


MAHOUT-1775 FileNotFoundException caused by aborting the process of downloading 
Wikipedia dataset, closes apache/mahout# 162


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/7552c55e
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/7552c55e
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/7552c55e

Branch: refs/heads/master
Commit: 7552c55e2c0705ebc5fd43cbb2026b4dbf46f1ec
Parents: edc0c69
Author: smarthi <[email protected]>
Authored: Sun Oct 25 00:29:47 2015 -0400
Committer: smarthi <[email protected]>
Committed: Sun Oct 25 00:29:47 2015 -0400

----------------------------------------------------------------------
 examples/bin/classify-wikipedia.sh | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/7552c55e/examples/bin/classify-wikipedia.sh
----------------------------------------------------------------------
diff --git a/examples/bin/classify-wikipedia.sh 
b/examples/bin/classify-wikipedia.sh
index 470a81c..68487dc 100755
--- a/examples/bin/classify-wikipedia.sh
+++ b/examples/bin/classify-wikipedia.sh
@@ -63,6 +63,8 @@ if [ "x$alg" != "xclean" ]; then
   mkdir -p ${WORK_DIR}
     if [ ! -e ${WORK_DIR}/wikixml ]; then
         mkdir -p ${WORK_DIR}/wikixml
+    fi
+    if [ ! -e ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml.bz2 ]; then
         echo "Downloading wikipedia XML dump"
         ########################################################   
         #  Datasets: uncomment and run "clean" to change dataset   
@@ -74,10 +76,11 @@ if [ "x$alg" != "xclean" ]; then
         ######### full wikipedia dump: 10G zipped
         #curl 
http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2 
-o ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml.bz2
         ########################################################
-      
-      echo "Extracting..."
+    fi
+    if [ ! -e ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml ]; then
+        echo "Extracting..."
        
-      cd ${WORK_DIR}/wikixml && bunzip2 enwiki-latest-pages-articles.xml.bz2 
&& cd .. && cd ..
+        cd ${WORK_DIR}/wikixml && bunzip2 enwiki-latest-pages-articles.xml.bz2 
&& cd .. && cd ..
     fi
 
 echo $START_PATH
@@ -186,4 +189,4 @@ elif [ "x$alg" == "xclean" ]; then
   rm -rf $WORK_DIR
   $DFSRM $WORK_DIR
 fi
-# Remove the work directory
\ No newline at end of file
+# Remove the work directory

Reply via email to