Added: 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecognizer.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecognizer.java?rev=777643&view=auto
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecognizer.java
 (added)
+++ 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecognizer.java
 Fri May 22 18:21:59 2009
@@ -0,0 +1,55 @@
+/**
+*******************************************************************************
+* Copyright (C) 2005, International Business Machines Corporation and         *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*/
+package org.apache.tika.parser.txt;
+
+/**
+ * Abstract class for recognizing a single charset.
+ * Part of the implementation of ICU's CharsetDetector.
+ * 
+ * Each specific charset that can be recognized will have an instance
+ * of some subclass of this class.  All interaction between the overall
+ * CharsetDetector and the stuff specific to an individual charset happens
+ * via the interface provided here.
+ * 
+ * Instances of CharsetDetector DO NOT have or maintain 
+ * state pertaining to a specific match or detect operation.
+ * The WILL be shared by multiple instances of CharsetDetector.
+ * They encapsulate const charset-specific information.
+ * 
+ * @internal
+ */
+abstract class CharsetRecognizer {
+    /**
+     * Get the IANA name of this charset.
+     * @return the charset name.
+     */
+    abstract String      getName();
+    
+    /**
+     * Get the ISO language code for this charset.
+     * @return the language code, or <code>null</code> if the language cannot 
be determined.
+     */
+    public   String      getLanguage()
+    {
+        return null;
+    }
+    
+    /**
+     * Test the match of this charset with the input text data
+     *      which is obtained via the CharsetDetector object.
+     * 
+     * @param det  The CharsetDetector, which contains the input text
+     *             to be checked for being in this charset.
+     * @return     Two values packed into one int  (Damn java, anyhow)
+     *             <br/>
+     *             bits 0-7:  the match confidence, ranging from 0-100
+     *             <br/>
+     *             bits 8-15: The match reason, an enum-like value.
+     */
+    abstract int         match(CharsetDetector det);
+
+}

Modified: 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java?rev=777643&r1=777642&r2=777643&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
 Fri May 22 18:21:59 2009
@@ -28,9 +28,6 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import com.ibm.icu.text.CharsetDetector;
-import com.ibm.icu.text.CharsetMatch;
-
 /**
  * Text parser
  */

Modified: lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/LICENSE.txt
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/LICENSE.txt?rev=777643&r1=777642&r2=777643&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/LICENSE.txt 
(original)
+++ lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/LICENSE.txt Fri 
May 22 18:21:59 2009
@@ -200,3 +200,43 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+
+
+APACHE TIKA SUBCOMPONENTS
+
+Apache Tika includes a number of subcomponents with separate copyright notices
+and license terms. Your use of these subcomponents is subject to the terms and
+conditions of the following licenses.
+
+Charset detection code from ICU4J (http://site.icu-project.org/)
+
+    Copyright (c) 1995-2009 International Business Machines Corporation
+    and others
+
+    All rights reserved.
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, and/or sell copies of the Software, and to permit persons
+    to whom the Software is furnished to do so, provided that the above
+    copyright notice(s) and this permission notice appear in all copies
+    of the Software and that both the above copyright notice(s) and this
+    permission notice appear in supporting documentation.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+    IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+    BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+    OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+    WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+    ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+    SOFTWARE.
+
+    Except as contained in this notice, the name of a copyright holder shall
+    not be used in advertising or otherwise to promote the sale, use or other
+    dealings in this Software without prior written authorization of the
+    copyright holder.

Modified: lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/NOTICE.txt
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/NOTICE.txt?rev=777643&r1=777642&r2=777643&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/NOTICE.txt 
(original)
+++ lucene/tika/trunk/tika-parsers/src/main/resources/META-INF/NOTICE.txt Fri 
May 22 18:21:59 2009
@@ -3,3 +3,7 @@
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
+
+This product includes software developed by the following copyright owners:
+
+Copyright (c) 1995-2009 International Business Machines Corporation and others


Reply via email to