Repository: mahout Updated Branches: refs/heads/master edc0c69ac -> 7552c55e2
MAHOUT-1775 FileNotFoundException caused by aborting the process of downloading Wikipedia dataset, closes apache/mahout# 162 Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/7552c55e Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/7552c55e Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/7552c55e Branch: refs/heads/master Commit: 7552c55e2c0705ebc5fd43cbb2026b4dbf46f1ec Parents: edc0c69 Author: smarthi <[email protected]> Authored: Sun Oct 25 00:29:47 2015 -0400 Committer: smarthi <[email protected]> Committed: Sun Oct 25 00:29:47 2015 -0400 ---------------------------------------------------------------------- examples/bin/classify-wikipedia.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mahout/blob/7552c55e/examples/bin/classify-wikipedia.sh ---------------------------------------------------------------------- diff --git a/examples/bin/classify-wikipedia.sh b/examples/bin/classify-wikipedia.sh index 470a81c..68487dc 100755 --- a/examples/bin/classify-wikipedia.sh +++ b/examples/bin/classify-wikipedia.sh @@ -63,6 +63,8 @@ if [ "x$alg" != "xclean" ]; then mkdir -p ${WORK_DIR} if [ ! -e ${WORK_DIR}/wikixml ]; then mkdir -p ${WORK_DIR}/wikixml + fi + if [ ! -e ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml.bz2 ]; then echo "Downloading wikipedia XML dump" ######################################################## # Datasets: uncomment and run "clean" to change dataset @@ -74,10 +76,11 @@ if [ "x$alg" != "xclean" ]; then ######### full wikipedia dump: 10G zipped #curl http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2 -o ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml.bz2 ######################################################## - - echo "Extracting..." + fi + if [ ! -e ${WORK_DIR}/wikixml/enwiki-latest-pages-articles.xml ]; then + echo "Extracting..." - cd ${WORK_DIR}/wikixml && bunzip2 enwiki-latest-pages-articles.xml.bz2 && cd .. && cd .. + cd ${WORK_DIR}/wikixml && bunzip2 enwiki-latest-pages-articles.xml.bz2 && cd .. && cd .. fi echo $START_PATH @@ -186,4 +189,4 @@ elif [ "x$alg" == "xclean" ]; then rm -rf $WORK_DIR $DFSRM $WORK_DIR fi -# Remove the work directory \ No newline at end of file +# Remove the work directory
