Added: incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java?view=auto&rev=567100
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java (added)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java Fri Aug 
17 11:17:06 2007
@@ -0,0 +1,236 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.StringTokenizer;
+
+import junit.framework.TestCase;
+import org.apache.tika.config.Content;
+import org.apache.tika.config.LiusConfig;
+import org.apache.tika.exception.LiusException;
+import org.apache.tika.log.LiusLogger;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserFactory;
+import org.apache.tika.utils.Utils;
+import net.hedges.mimeinfo.MimeInfoException;
+
+/**
+ * Junit test class   
+ * @author Rida Benjelloun ([EMAIL PROTECTED])  
+ */
+public class TestParsers extends TestCase {
+
+    private LiusConfig tc;
+
+    private File classDir;
+
+    private String config;
+
+    public void setUp() {
+        String sep = File.separator;
+        StringTokenizer st = new StringTokenizer(System.getProperty(
+
+        "java.class.path"), File.pathSeparator);
+
+        classDir = new File(st.nextToken());
+
+        config = classDir.getParent() + sep + "config" + sep + "config.xml";
+
+        String log4j = classDir.getParent() + sep + "Config" + sep + "log4j"
+                + sep + "log4j.properties";
+
+        tc = LiusConfig.getInstance(config);
+
+        LiusLogger.setLoggerConfigFile(log4j);
+
+    }
+
+    /*
+     * public void testConfig(){ TikaConfig tc =
+     * TikaConfig.getInstance("C:\\tika\\config\\tikaConfig2.xml"); 
ParserConfig
+     * pc = tc.getParserConfig("text/html"); assertEquals("parse-html",
+     * pc.getName()); }
+     */
+
+    public void testPDFExtraction() {
+        Parser parser = null;
+        File testFile = new File(classDir.getParent() + File.separator
+                + "testFiles" + File.separator + "testPDF.PDF");
+        try {
+            parser = ParserFactory.getParser(testFile, tc);
+        } catch (MimeInfoException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (LiusException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    public void testTXTExtraction() {
+        Parser parser = null;
+        File testFile = new File(classDir.getParent() + File.separator
+                + "testFiles" + File.separator + "testTXT.txt");
+        try {
+            parser = ParserFactory.getParser(testFile, tc);
+        } catch (MimeInfoException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (LiusException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    public void testRTFExtraction() {
+        Parser parser = null;
+        File testFile = new File(classDir.getParent() + File.separator
+                + "testFiles" + File.separator + "testRTF.rtf");
+        try {
+            parser = ParserFactory.getParser(testFile, tc);
+
+        } catch (MimeInfoException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (LiusException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    public void testXMLExtraction() {
+        Parser parser = null;
+        File testFile = new File(classDir.getParent() + File.separator
+                + "testFiles" + File.separator + "testXML.xml");
+        try {
+            parser = ParserFactory.getParser(testFile, tc);
+        } catch (MimeInfoException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (LiusException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    public void testPPTExtraction() {
+        Parser parser = null;
+        File testFile = new File(classDir.getParent() + File.separator
+                + "testFiles" + File.separator + "testPPT.ppt");
+        try {
+            parser = ParserFactory.getParser(testFile, tc);
+            System.out.println(parser.getStrContent());
+        } catch (MimeInfoException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (LiusException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    public void testWORDxtraction() {
+        Parser parser = null;
+        File testFile = new File(classDir.getParent() + File.separator
+                + "testFiles" + File.separator + "testWORD.doc");
+        try {
+            parser = ParserFactory.getParser(testFile, tc);
+            System.out.println(parser.getStrContent());
+        } catch (MimeInfoException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (LiusException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    public void testEXCELExtraction() {
+        Parser parser = null;
+        File testFile = new File(classDir.getParent() + File.separator
+                + "testFiles" + File.separator + "testEXCEL.xls");
+        try {
+            parser = ParserFactory.getParser(testFile, tc);
+            // System.out.println(parser.getStrContent());
+            printContentsInfo(parser);
+        } catch (MimeInfoException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (LiusException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    public void testOOExtraction() {
+        Parser parser = null;
+        File testFile = new File(classDir.getParent() + File.separator
+                + "testFiles" + File.separator + "testOO2.odt");
+        try {
+            parser = ParserFactory.getParser(testFile, tc);
+            // System.out.println(parser.getStrContent());
+            printContentsInfo(parser);
+        } catch (MimeInfoException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (LiusException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    public void testHTMLExtraction() {
+        Parser parser = null;
+        File testFile = new File(classDir.getParent() + File.separator
+                + "testFiles" + File.separator + "testHTML.html");
+        try {
+            parser = ParserFactory.getParser(testFile, tc);
+            assertEquals("Title : Test Indexation Html", 
(parser.getContent("title")).getValue());
+            // System.out.println(parser.getStrContent());
+            printContentsInfo(parser);
+        } catch (MimeInfoException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (LiusException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    private void printContentsInfo(Parser parser) {
+        String mimeType = parser.getMimeType();
+        System.out.println("Mime : " + mimeType);
+        String strContent = parser.getStrContent();
+        Collection<Content> structuredContent = parser.getContents();
+        Utils.print(structuredContent);
+        System.out.println("==============");
+        // Content title = parser.getContent("title");
+    }
+
+}

Propchange: incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/tika/trunk/src/test/resources/testHTML.html
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/testHTML.html?view=auto&rev=567100
==============================================================================
--- incubator/tika/trunk/src/test/resources/testHTML.html (added)
+++ incubator/tika/trunk/src/test/resources/testHTML.html Fri Aug 17 11:17:06 
2007
@@ -0,0 +1,9 @@
+<html>
+       <head>
+               <title>Title : Test Indexation Html</title>     
+       </head>
+       <body>
+               <h1>Test Indexation Html</h1>
+               <p>Indexation du fichier</p>
+       </body> 
+</html>
\ No newline at end of file

Propchange: incubator/tika/trunk/src/test/resources/testHTML.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/tika/trunk/src/test/resources/testRTF.rtf
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/testRTF.rtf?view=auto&rev=567100
==============================================================================
--- incubator/tika/trunk/src/test/resources/testRTF.rtf (added)
+++ incubator/tika/trunk/src/test/resources/testRTF.rtf Fri Aug 17 11:17:06 2007
@@ -0,0 +1,17 @@
+{\rtf1\ansi\ansicpg1252\uc1\deff0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang1036\deflangfe1036{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose
 02020603050405020304}Times New Roman;}{\f37\froman\fcharset238\fprq2 Times New 
Roman CE;}
+{\f38\froman\fcharset204\fprq2 Times New Roman 
Cyr;}{\f40\froman\fcharset161\fprq2 Times New Roman 
Greek;}{\f41\froman\fcharset162\fprq2 Times New Roman 
Tur;}{\f42\froman\fcharset177\fprq2 Times New Roman (Hebrew);}
+{\f43\froman\fcharset178\fprq2 Times New Roman 
(Arabic);}{\f44\froman\fcharset186\fprq2 Times New Roman 
Baltic;}{\f45\froman\fcharset163\fprq2 Times New Roman 
(Vietnamese);}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;
+\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;
+\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{\ql 
\li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 
\fs24\lang1036\langfe1036\cgrid\langnp1036\langfenp1036 \snext0 
Normal;}{\*\cs10 \additive \ssemihidden 
+Default Paragraph 
Font;}{\*\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv
 
+\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 
\fs20\lang1024\langfe1024\cgrid\langnp1024\langfenp1024 \snext11 \ssemihidden 
Normal Table;}}{\*\latentstyles\lsdstimax156\lsdlockeddef0}{\*\rsidtbl 
\rsid2954171\rsid10375891}
+{\*\generator Microsoft Word 11.0.6568;}{\info{\title Test d\'92indexation 
Word}{\author Bibliotheque}{\operator 
Bibliotheque}{\creatim\yr2006\mo5\dy18\hr12\min19}{\revtim\yr2006\mo5\dy18\hr12\min19}{\version2}{\edmins0}{\nofpages1}{\nofwords3}
+{\nofchars21}{\*\company Universite 
Laval}{\nofcharsws23}{\vern24579}}\paperw11906\paperh16838\margl1417\margr1417\margt1417\margb1417
 
+\deftab708\widowctrl\ftnbj\aenddoc\hyphhotz425\noxlattoyen\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1417\dgvorigin1417\dghshow1\dgvshow1
+\jexpand\viewkind1\viewscale100\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct\asianbrkrule\nojkernpunct\rsidroot2954171
 \fet0
+\sectd 
\linex0\headery708\footery708\colsx708\endnhere\sectlinegrid360\sectdefaultcl\sftnbj
 {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta 
.}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3
+\pndec\pnstart1\pnindent720\pnhang {\pntxta 
.}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta 
)}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta 
)}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}
+{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta 
)}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta 
)}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta 
)}}\pard\plain 
+\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 
\fs24\lang1036\langfe1036\cgrid\langnp1036\langfenp1036 {\insrsid2954171 Test 
d\rquote indexation Word
+\par 
+\par }}
\ No newline at end of file

Added: incubator/tika/trunk/src/test/resources/testTXT.txt
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/testTXT.txt?view=auto&rev=567100
==============================================================================
--- incubator/tika/trunk/src/test/resources/testTXT.txt (added)
+++ incubator/tika/trunk/src/test/resources/testTXT.txt Fri Aug 17 11:17:06 2007
@@ -0,0 +1,2 @@
+Test d'indexation de Txt
+http://www.apache.org

Propchange: incubator/tika/trunk/src/test/resources/testTXT.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/tika/trunk/src/test/resources/testXML.xml
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/testXML.xml?view=auto&rev=567100
==============================================================================
--- incubator/tika/trunk/src/test/resources/testXML.xml (added)
+++ incubator/tika/trunk/src/test/resources/testXML.xml Fri Aug 17 11:17:06 2007
@@ -0,0 +1,31 @@
+<oaidc:dc xmlns:dc="http://purl.org/dc/elements/1.1/"; 
xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/";>
+
+       <dc:title>Archimède et Lius</dc:title>
+
+       <dc:creator>Rida Benjelloun</dc:creator>
+
+       <dc:subject>Java</dc:subject>
+
+       <dc:subject>XML</dc:subject>
+
+       <dc:subject>XSLT</dc:subject>
+
+       <dc:subject>JDOM</dc:subject>
+ 
+       <dc:subject>Indexation</dc:subject>
+
+       <dc:description>Framework d'indexation des documents XML, HTML, PDF 
etc.. </dc:description>
+
+       <dc:identifier>http://www.apache.org</dc:identifier>
+
+       <dc:date>2000-12</dc:date>
+
+       <dc:type>test</dc:type>
+
+       <dc:format>application/msword</dc:format>
+
+       <dc:language>Fr</dc:language>
+
+       <dc:rights>Non restreint</dc:rights>    
+
+</oaidc:dc>
\ No newline at end of file

Propchange: incubator/tika/trunk/src/test/resources/testXML.xml
------------------------------------------------------------------------------
    svn:eol-style = native


Reply via email to