svn commit: r377494 - in /lucene/nutch/trunk/src/plugin: parse-msexcel/ parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ parse-mspowerpoint/ parse-mspowerpoint/src/java/org/apache/nutch/parse/ms

2006-02-13 Thread jerome
Author: jerome
Date: Mon Feb 13 13:28:13 2006
New Revision: 377494

URL: http://svn.apache.org/viewcvs?rev=377494view=rev
Log:
Make use of lib-parsems in word, powerpoint and excel parsers

Removed:

lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/PropertiesReaderListener.java

lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PowerPointDocumentException.java

lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java
Modified:
lucene/nutch/trunk/src/plugin/parse-msexcel/build.xml
lucene/nutch/trunk/src/plugin/parse-msexcel/plugin.xml

lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java

lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/MSExcelParser.java

lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/package.html
lucene/nutch/trunk/src/plugin/parse-mspowerpoint/build.xml
lucene/nutch/trunk/src/plugin/parse-mspowerpoint/plugin.xml

lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java

lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java

lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/package.html
lucene/nutch/trunk/src/plugin/parse-msword/build.xml
lucene/nutch/trunk/src/plugin/parse-msword/plugin.xml

lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/FastSavedException.java

lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java

lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/PasswordProtectedException.java

lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java

lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/package.html

Modified: lucene/nutch/trunk/src/plugin/parse-msexcel/build.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msexcel/build.xml?rev=377494r1=377493r2=377494view=diff
==
--- lucene/nutch/trunk/src/plugin/parse-msexcel/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-msexcel/build.xml Mon Feb 13 13:28:13 
2006
@@ -2,19 +2,23 @@
 
 project name=parse-msexcel default=jar
 
-   import file=../build-plugin.xml /
+  import file=../build-plugin.xml /
 
   path id=plugin.deps
 fileset dir=../lib-jakarta-poi/lib
   include name=*.jar /
 /fileset
+fileset dir=../../../build/lib-parsems
+  include name=*.jar /
+/fileset
   /path
 
-   !-- for junit test --
-   mkdir dir=${build.test}/data /
-   copy todir=${build.test}/data
-   fileset dir=sample
-   include name=*.xls /
-   /fileset
-   /copy
+  !-- for junit test --
+  mkdir dir=${build.test}/data /
+  copy todir=${build.test}/data
+fileset dir=sample
+  include name=*.xls /
+/fileset
+  /copy
+
 /project

Modified: lucene/nutch/trunk/src/plugin/parse-msexcel/plugin.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msexcel/plugin.xml?rev=377494r1=377493r2=377494view=diff
==
--- lucene/nutch/trunk/src/plugin/parse-msexcel/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-msexcel/plugin.xml Mon Feb 13 13:28:13 
2006
@@ -14,6 +14,7 @@
requires
  import plugin=nutch-extensionpoints/
  import plugin=lib-jakarta-poi/
+ import plugin=lib-parsems/
/requires
 
extension id=org.apache.nutch.parse.msexcel

Modified: 
lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java?rev=377494r1=377493r2=377494view=diff
==
--- 
lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-msexcel/src/java/org/apache/nutch/parse/msexcel/ExcelExtractor.java
 Mon Feb 13 13:28:13 2006
@@ -16,17 +16,17 @@
 package org.apache.nutch.parse.msexcel;
 
 // JDK imports
-import java.io.IOException;
 import java.io.InputStream;
-import java.util.Date;
-import java.util.Properties;
 
 // Jakarta POI imports
 import org.apache.poi.hssf.usermodel.HSSFCell;
 import org.apache.poi.hssf.usermodel.HSSFRow;
 import org.apache.poi.hssf.usermodel.HSSFSheet;
 import 

svn commit: r377501 - in /lucene/nutch/trunk: ./ src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/ src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/

2006-02-13 Thread jerome
Author: jerome
Date: Mon Feb 13 13:43:15 2006
New Revision: 377501

URL: http://svn.apache.org/viewcvs?rev=377501view=rev
Log:
Javadoc updates for ms parsers

Added:

lucene/nutch/trunk/src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/package.html
   (with props)
Modified:
lucene/nutch/trunk/build.xml
lucene/nutch/trunk/default.properties

lucene/nutch/trunk/src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/MSBaseParser.java

lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/FilteredStringWriter.java

Modified: lucene/nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/build.xml?rev=377501r1=377500r2=377501view=diff
==
--- lucene/nutch/trunk/build.xml (original)
+++ lucene/nutch/trunk/build.xml Mon Feb 13 13:43:15 2006
@@ -249,6 +249,7 @@
 
packageset dir=${src.dir}/
packageset dir=${plugins.dir}/lib-http/src/java/
+   packageset dir=${plugins.dir}/lib-parsems/src/java/
packageset dir=${plugins.dir}/ontology/src/java/
packageset dir=${plugins.dir}/protocol-file/src/java/
packageset dir=${plugins.dir}/protocol-ftp/src/java/

Modified: lucene/nutch/trunk/default.properties
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/default.properties?rev=377501r1=377500r2=377501view=diff
==
--- lucene/nutch/trunk/default.properties (original)
+++ lucene/nutch/trunk/default.properties Mon Feb 13 13:43:15 2006
@@ -68,6 +68,7 @@
 plugin.msword=org.apache.nutch.parse.msword*
 # Unfortunately, ontology on core and plugin uses the same package:
 # plugin.ontology=org.apache.nutch.ontology*
+plugin.parsems=org.apache.nutch.parse.ms*
 plugin.pdf=org.apache.nutch.parse.pdf*
 plugin.rss=org.apache.nutch.parse.rss*
 plugin.rtf=org.apache.nutch.parse.rtf*
@@ -95,6 +96,7 @@
${plugin.msexcel}:\
${plugin.mspowerpoint}:\
${plugin.msword}:\
+   ${plugin.parsems}:\
${plugin.pdf}:\
${plugin.rss}:\
${plugin.rtf}:\

Modified: 
lucene/nutch/trunk/src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/MSBaseParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/MSBaseParser.java?rev=377501r1=377500r2=377501view=diff
==
--- 
lucene/nutch/trunk/src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/MSBaseParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/MSBaseParser.java
 Mon Feb 13 13:43:15 2006
@@ -56,7 +56,7 @@
 
   /**
* Parses a Content with a specific [EMAIL PROTECTED] MSExtractor Microsoft 
document
-   * extractor.
+   * extractor}.
*/
   protected Parse getParse(MSExtractor extractor, Content content) {
 

Added: 
lucene/nutch/trunk/src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/package.html
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/package.html?rev=377501view=auto
==
--- 
lucene/nutch/trunk/src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/package.html
 (added)
+++ 
lucene/nutch/trunk/src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/package.html
 Mon Feb 13 13:43:15 2006
@@ -0,0 +1,5 @@
+html
+body
+pCommon API for Microsoft copy; documents parsing./p
+/body
+/html

Propchange: 
lucene/nutch/trunk/src/plugin/lib-parsems/src/java/org/apache/nutch/parse/ms/package.html
--
svn:eol-style = native

Modified: 
lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/FilteredStringWriter.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/FilteredStringWriter.java?rev=377501r1=377500r2=377501view=diff
==
--- 
lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/FilteredStringWriter.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/FilteredStringWriter.java
 Mon Feb 13 13:43:15 2006
@@ -23,7 +23,6 @@
  * 
  * @author Stephan Strittmatter - http://www.sybit.de
  * @version 1.0
- * @create 19.01.2005
  */
 public class FilteredStringWriter extends StringWriter {
 
@@ -67,4 +66,4 @@
   super.write(ch);
 }
   }
-}
\ No newline at end of file
+}