Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -34,54 +37,52 @@ import org.apache.tika.metadata.Property */ @SuppressWarnings("deprecation") public class MetadataAwareLuceneIndexer { + private Tika tika; - private Tika tika; - - private IndexWriter writer; - - public MetadataAwareLuceneIndexer(IndexWriter writer, Tika tika) { - this.writer = writer; - this.tika = tika; - } - - public void indexContentSpecificMet(File file) throws Exception { - Metadata met = new Metadata(); - try (InputStream is = new FileInputStream(file)) { - tika.parse(is, met); - Document document = new Document(); - for (String key : met.names()) { - String[] values = met.getValues(key); - for (String val : values) { - document.add(new Field(key, val, Store.YES, Index.ANALYZED)); - } - writer.addDocument(document); - } - } - } - - public void indexWithDublinCore(File file) throws Exception { - Metadata met = new Metadata(); - met.add(Metadata.CREATOR, "Manning"); - met.add(Metadata.CREATOR, "Tika in Action"); - met.set(Metadata.DATE, new Date()); - met.set(Metadata.FORMAT, tika.detect(file)); - met.set(DublinCore.SOURCE, file.toURI().toURL().toString()); - met.add(Metadata.SUBJECT, "File"); - met.add(Metadata.SUBJECT, "Indexing"); - met.add(Metadata.SUBJECT, "Metadata"); - met.set(Property.externalClosedChoise(Metadata.RIGHTS, "public", - "private"), "public"); - try (InputStream is = new FileInputStream(file)) { - tika.parse(is, met); - Document document = new Document(); - for (String key : met.names()) { - String[] values = met.getValues(key); - for (String val : values) { - document.add(new Field(key, val, Store.YES, Index.ANALYZED)); - } - writer.addDocument(document); - } - } - } + private IndexWriter writer; + public MetadataAwareLuceneIndexer(IndexWriter writer, Tika tika) { + this.writer = writer; + this.tika = tika; + } + + public void indexContentSpecificMet(File file) throws Exception { + Metadata met = new Metadata(); + try (InputStream is = new FileInputStream(file)) { + tika.parse(is, met); + Document document = new Document(); + for (String key : met.names()) { + String[] values = met.getValues(key); + for (String val : values) { + document.add(new Field(key, val, Store.YES, Index.ANALYZED)); + } + writer.addDocument(document); + } + } + } + + public void indexWithDublinCore(File file) throws Exception { + Metadata met = new Metadata(); + met.add(Metadata.CREATOR, "Manning"); + met.add(Metadata.CREATOR, "Tika in Action"); + met.set(Metadata.DATE, new Date()); + met.set(Metadata.FORMAT, tika.detect(file)); + met.set(DublinCore.SOURCE, file.toURI().toURL().toString()); + met.add(Metadata.SUBJECT, "File"); + met.add(Metadata.SUBJECT, "Indexing"); + met.add(Metadata.SUBJECT, "Metadata"); + met.set(Property.externalClosedChoise(Metadata.RIGHTS, "public", + "private"), "public"); + try (InputStream is = new FileInputStream(file)) { + tika.parse(is, met); + Document document = new Document(); + for (String key : met.names()) { + String[] values = met.getValues(key); + for (String val : values) { + document.add(new Field(key, val, Store.YES, Index.ANALYZED)); + } + writer.addDocument(document); + } + } + } }
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -38,23 +41,23 @@ import static java.nio.charset.StandardC * Demonstrates how to call the different components within Tika: its * {@link Detector} framework (aka MIME identification and repository), its * {@link Parser} interface, its {@link LanguageIdentifier} and other goodies. - * + * <p> * It also shows the "easy way" via {@link AutoDetectParser} */ public class MyFirstTika { public static void main(String[] args) throws Exception { String filename = args[0]; TikaConfig tikaConfig = TikaConfig.getDefaultConfig(); - + Metadata metadata = new Metadata(); String text = parseUsingComponents(filename, tikaConfig, metadata); System.out.println("Parsed Metadata: "); System.out.println(metadata); System.out.println("Parsed Text: "); System.out.println(text); - + System.out.println("-------------------------"); - + metadata = new Metadata(); text = parseUsingAutoDetect(filename, tikaConfig, metadata); System.out.println("Parsed Metadata: "); @@ -62,19 +65,20 @@ public class MyFirstTika { System.out.println("Parsed Text: "); System.out.println(text); } - - public static String parseUsingAutoDetect(String filename, TikaConfig tikaConfig, - Metadata metadata) throws Exception { + + public static String parseUsingAutoDetect(String filename, TikaConfig tikaConfig, + Metadata metadata) throws Exception { System.out.println("Handling using AutoDetectParser: [" + filename + "]"); - + AutoDetectParser parser = new AutoDetectParser(tikaConfig); ContentHandler handler = new BodyContentHandler(); TikaInputStream stream = TikaInputStream.get(new File(filename), metadata); parser.parse(stream, handler, metadata, new ParseContext()); return handler.toString(); } - public static String parseUsingComponents(String filename, TikaConfig tikaConfig, - Metadata metadata) throws Exception { + + public static String parseUsingComponents(String filename, TikaConfig tikaConfig, + Metadata metadata) throws Exception { MimeTypes mimeRegistry = tikaConfig.getMimeRepository(); System.out.println("Examining: [" + filename + "]"); @@ -106,7 +110,7 @@ public class MyFirstTika { // Have the file parsed to get the content and metadata ContentHandler handler = new BodyContentHandler(); parser.parse(stream, handler, metadata, new ParseContext()); - + return handler.toString(); } } Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java Mon Sep 21 17:19:26 2015 @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.tika.example; import java.io.IOException; @@ -44,7 +45,7 @@ public class ParsingExample { /** * Example of how to use Tika's parseToString method to parse the content of a file, * and return any text found. - * + * <p> * Note: Tika.parseToString() will extract content from the outer container * document and any embedded/attached documents. * @@ -60,20 +61,20 @@ public class ParsingExample { /** * Example of how to use Tika to parse a file when you do not know its file type * ahead of time. - * + * <p> * AutoDetectParser attempts to discover the file's type automatically, then call * the exact Parser built for that file type. - * + * <p> * The stream to be parsed by the Parser. In this case, we get a file from the * resources folder of this project. - * + * <p> * Handlers are used to get the exact information you want out of the host of * information gathered by Parsers. The body content handler, intuitively, extracts * everything that would go between HTML body tags. - * + * <p> * The Metadata object will be filled by the Parser with Metadata discovered about * the file being parsed. - * + * <p> * Note: This example will extract content from the outer document and all * embedded documents. However, if you choose to use a {@link ParseContext}, * make sure to set a {@link Parser} or else embedded content will not be @@ -154,7 +155,6 @@ public class ParsingExample { */ public List<Metadata> recursiveParserWrapperExample() throws IOException, SAXException, TikaException { - Parser p = new AutoDetectParser(); ContentHandlerFactory factory = new BasicContentHandlerFactory( BasicContentHandlerFactory.HANDLER_TYPE.HTML, -1); @@ -187,7 +187,7 @@ public class ParsingExample { */ public String serializedRecursiveParserWrapperExample() throws IOException, SAXException, TikaException { - List metadataList = recursiveParserWrapperExample(); + List<Metadata> metadataList = recursiveParserWrapperExample(); StringWriter writer = new StringWriter(); JsonMetadataList.toJson(metadataList, writer); return writer.toString(); @@ -195,7 +195,6 @@ public class ParsingExample { /** - * * @param outputPath -- output directory to place files * @return list of files created * @throws IOException @@ -207,9 +206,9 @@ public class ParsingExample { InputStream stream = ParsingExample.class.getResourceAsStream("test_recursive_embedded.docx"); ExtractEmbeddedFiles ex = new ExtractEmbeddedFiles(); ex.extract(stream, outputPath); - List<Path> ret = new ArrayList<Path>(); + List<Path> ret = new ArrayList<>(); try (DirectoryStream<Path> dirStream = Files.newDirectoryStream(outputPath)) { - for (Path entry: dirStream) { + for (Path entry : dirStream) { ret.add(entry); } } Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,15 +20,13 @@ package org.apache.tika.example; import java.security.Key; public class Pharmacy { + private static Key key = null; - private static Key key = null; - - public static Key getKey() { - return key; - } - - public static void setKey(Key key) { - Pharmacy.key = key; - } - + public static Key getKey() { + return key; + } + + public static void setKey(Key key) { + Pharmacy.key = key; + } } Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -26,27 +29,24 @@ import org.apache.tika.sax.TeeContentHan import org.xml.sax.ContentHandler; public class PrescriptionParser extends XMLParser { + private static final long serialVersionUID = 7690682277511967388L; - private static final long serialVersionUID = 7690682277511967388L; - - @Override - protected ContentHandler getContentHandler(ContentHandler handler, - Metadata metadata, ParseContext context) { - String xpd = "http://example.com/2011/xpd"; - - ContentHandler doctor = new ElementMetadataHandler(xpd, "doctor", - metadata, "xpd:doctor"); - ContentHandler patient = new ElementMetadataHandler(xpd, "patient", - metadata, "xpd:patient"); - - return new TeeContentHandler(super.getContentHandler(handler, metadata, - context), doctor, patient); - } - - @Override - public Set<MediaType> getSupportedTypes(ParseContext context) { - return Collections.singleton(MediaType - .application("x-prescription+xml")); - } - + @Override + protected ContentHandler getContentHandler(ContentHandler handler, + Metadata metadata, ParseContext context) { + String xpd = "http://example.com/2011/xpd"; + + ContentHandler doctor = new ElementMetadataHandler(xpd, "doctor", + metadata, "xpd:doctor"); + ContentHandler patient = new ElementMetadataHandler(xpd, "patient", + metadata, "xpd:patient"); + + return new TeeContentHandler(super.getContentHandler(handler, metadata, + context), doctor, patient); + } + + @Override + public Set<MediaType> getSupportedTypes(ParseContext context) { + return Collections.singleton(MediaType.application("x-prescription+xml")); + } } Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -35,114 +38,108 @@ import org.apache.tika.metadata.DublinCo import org.apache.tika.metadata.Metadata; /** - * * Builds on top of the LuceneIndexer and the Metadata discussions in Chapter 6 * to output an RSS (or RDF) feed of files crawled by the LuceneIndexer within * the last N minutes. */ @SuppressWarnings("deprecation") public class RecentFiles { + private IndexReader reader; - private IndexReader reader; - - private SimpleDateFormat rssDateFormat = new SimpleDateFormat( - "E, dd MMM yyyy HH:mm:ss z", Locale.getDefault()); - - public String generateRSS(File indexFile) throws CorruptIndexException, - IOException { - StringBuffer output = new StringBuffer(); - output.append(getRSSHeaders()); - IndexSearcher searcher = null; - try { - reader = IndexReader.open(new SimpleFSDirectory(indexFile)); - searcher = new IndexSearcher(reader); - GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); - gc.setTime(new Date()); - String nowDateTime = ISO8601.format(gc); - gc.add(java.util.GregorianCalendar.MINUTE, -5); - String fiveMinsAgo = ISO8601.format(gc); - TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(), - fiveMinsAgo, nowDateTime, true, true); - TopScoreDocCollector collector = TopScoreDocCollector.create(20, - true); - searcher.search(query, collector); - ScoreDoc[] hits = collector.topDocs().scoreDocs; - for (int i = 0; i < hits.length; i++) { - Document doc = searcher.doc(hits[i].doc); - output.append(getRSSItem(doc)); - } - - } finally { - if (reader != null) reader.close(); - if (searcher != null) searcher.close(); - } - - output.append(getRSSFooters()); - return output.toString(); - } - - public String getRSSItem(Document doc) { - StringBuffer output = new StringBuffer(); - output.append("<item>"); - output.append(emitTag("guid", doc.get(DublinCore.SOURCE.getName()), - "isPermalink", "true")); - output.append(emitTag("title", doc.get(Metadata.TITLE), null, null)); - output.append(emitTag("link", doc.get(DublinCore.SOURCE.getName()), - null, null)); - output.append(emitTag("author", doc.get(Metadata.CREATOR), null, null)); - for (String topic : doc.getValues(Metadata.SUBJECT)) { - output.append(emitTag("category", topic, null, null)); - } - output.append(emitTag("pubDate", rssDateFormat.format(ISO8601.parse(doc - .get(Metadata.DATE.toString()))), null, null)); - output.append(emitTag("description", doc.get(Metadata.TITLE), null, - null)); - output.append("</item>"); - return output.toString(); - } - - public String getRSSHeaders() { - StringBuffer output = new StringBuffer(); - output.append("<?xml version=\"1.0\" encoding=\"utf-8\">"); - output.append("<rss version=\"2.0\">"); - output.append(" <channel>"); - output.append(" <title>Tika in Action: Recent Files Feed." - + "</title>"); - output.append(" <description>Chapter 6 Examples demonstrating " - + "use of Tika Metadata for RSS.</description>"); - output.append(" <link>tikainaction.rss</link>"); - output.append(" <lastBuildDate>" + rssDateFormat.format(new Date()) - + "</lastBuildDate>"); - output.append(" <generator>Manning Publications: Tika in Action" - + "</generator>"); - output.append(" <copyright>All Rights Reserved</copyright>"); - return output.toString(); - } - - public String getRSSFooters() { - StringBuffer output = new StringBuffer(); - output.append(" </channel>"); - return output.toString(); - } - - private String emitTag(String tagName, String value, String attributeName, - String attributeValue) { - StringBuffer output = new StringBuffer(); - output.append("<"); - output.append(tagName); - if (attributeName != null) { - output.append(" "); - output.append(attributeName); - output.append("=\""); - output.append(attributeValue); - output.append("\""); - } - output.append(">"); - output.append(value); - output.append("</"); - output.append(tagName); - output.append(">"); - return output.toString(); - } + private SimpleDateFormat rssDateFormat = new SimpleDateFormat( + "E, dd MMM yyyy HH:mm:ss z", Locale.getDefault()); + public String generateRSS(File indexFile) throws CorruptIndexException, + IOException { + StringBuffer output = new StringBuffer(); + output.append(getRSSHeaders()); + IndexSearcher searcher = null; + try { + reader = IndexReader.open(new SimpleFSDirectory(indexFile)); + searcher = new IndexSearcher(reader); + GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); + gc.setTime(new Date()); + String nowDateTime = ISO8601.format(gc); + gc.add(java.util.GregorianCalendar.MINUTE, -5); + String fiveMinsAgo = ISO8601.format(gc); + TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(), + fiveMinsAgo, nowDateTime, true, true); + TopScoreDocCollector collector = TopScoreDocCollector.create(20, + true); + searcher.search(query, collector); + ScoreDoc[] hits = collector.topDocs().scoreDocs; + for (int i = 0; i < hits.length; i++) { + Document doc = searcher.doc(hits[i].doc); + output.append(getRSSItem(doc)); + } + + } finally { + if (reader != null) reader.close(); + if (searcher != null) searcher.close(); + } + + output.append(getRSSFooters()); + return output.toString(); + } + + public String getRSSItem(Document doc) { + StringBuilder output = new StringBuilder(); + output.append("<item>"); + output.append(emitTag("guid", doc.get(DublinCore.SOURCE.getName()), + "isPermalink", "true")); + output.append(emitTag("title", doc.get(Metadata.TITLE), null, null)); + output.append(emitTag("link", doc.get(DublinCore.SOURCE.getName()), + null, null)); + output.append(emitTag("author", doc.get(Metadata.CREATOR), null, null)); + for (String topic : doc.getValues(Metadata.SUBJECT)) { + output.append(emitTag("category", topic, null, null)); + } + output.append(emitTag("pubDate", rssDateFormat.format(ISO8601.parse(doc + .get(Metadata.DATE.toString()))), null, null)); + output.append(emitTag("description", doc.get(Metadata.TITLE), null, + null)); + output.append("</item>"); + return output.toString(); + } + + public String getRSSHeaders() { + StringBuilder output = new StringBuilder(); + output.append("<?xml version=\"1.0\" encoding=\"utf-8\">"); + output.append("<rss version=\"2.0\">"); + output.append(" <channel>"); + output.append(" <title>Tika in Action: Recent Files Feed.</title>"); + output.append(" <description>Chapter 6 Examples demonstrating " + + "use of Tika Metadata for RSS.</description>"); + output.append(" <link>tikainaction.rss</link>"); + output.append(" <lastBuildDate>"); + output.append(rssDateFormat.format(new Date())); + output.append("</lastBuildDate>"); + output.append(" <generator>Manning Publications: Tika in Action</generator>"); + output.append(" <copyright>All Rights Reserved</copyright>"); + return output.toString(); + } + + public String getRSSFooters() { + return " </channel>"; + } + + private String emitTag(String tagName, String value, String attributeName, + String attributeValue) { + StringBuilder output = new StringBuilder(); + output.append("<"); + output.append(tagName); + if (attributeName != null) { + output.append(" "); + output.append(attributeName); + output.append("=\""); + output.append(attributeValue); + output.append("\""); + } + output.append(">"); + output.append(value); + output.append("</"); + output.append(tagName); + output.append(">"); + return output.toString(); + } } Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -43,100 +46,92 @@ import static java.nio.charset.StandardC * Demonstrates Tika and its ability to sense symlinks. */ public class RollbackSoftware { - - public static void main(String[] args) throws Exception { - RollbackSoftware r = new RollbackSoftware(); - r.rollback(new File(args[0])); - } - - public void rollback(File deployArea) throws IOException, SAXException, - TikaException { - LinkContentHandler handler = new LinkContentHandler(); - Metadata met = new Metadata(); - DeploymentAreaParser parser = new DeploymentAreaParser(); - parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath(), UTF_8), - handler, met); - List<Link> links = handler.getLinks(); - if (links.size() < 2) - throw new IOException("Must have installed at least 2 versions!"); - Collections.sort(links, new Comparator<Link>() { - public int compare(Link o1, Link o2) { - return o1.getText().compareTo(o2.getText()); - } - }); - - this.updateVersion(links.get(links.size() - 2).getText()); - - } - - private void updateVersion(String version) { - System.out.println("Rolling back to version: [" + version + "]"); - } - - class DeploymentAreaParser implements Parser { - - private static final long serialVersionUID = -2356647405087933468L; - - /* - * (non-Javadoc) - * - * @see org.apache.tika.parser.Parser#getSupportedTypes( - * org.apache.tika.parser.ParseContext) - */ - public Set<MediaType> getSupportedTypes(ParseContext context) { - return Collections.unmodifiableSet(new HashSet<MediaType>(Arrays - .asList(MediaType.TEXT_PLAIN))); - } - - /* - * (non-Javadoc) - * - * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, - * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata) - */ - public void parse(InputStream is, ContentHandler handler, - Metadata metadata) throws IOException, SAXException, - TikaException { - parse(is, handler, metadata, new ParseContext()); - } - - /* - * (non-Javadoc) - * - * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, - * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata, - * org.apache.tika.parser.ParseContext) - */ - - public void parse(InputStream is, ContentHandler handler, - Metadata metadata, ParseContext context) throws IOException, - SAXException, TikaException { - - File deployArea = new File(IOUtils.toString(is, UTF_8)); - File[] versions = deployArea.listFiles(new FileFilter() { - - public boolean accept(File pathname) { - return !pathname.getName().startsWith("current"); - } - }); - - XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, - metadata); - xhtml.startDocument(); - for (File v : versions) { - if (isSymlink(v)) - continue; - xhtml.startElement("a", "href", v.toURI().toURL().toExternalForm()); - xhtml.characters(v.getName()); - xhtml.endElement("a"); - } - - } - - } - - private boolean isSymlink(File f) throws IOException { - return !f.getAbsolutePath().equals(f.getCanonicalPath()); - } - + public static void main(String[] args) throws Exception { + RollbackSoftware r = new RollbackSoftware(); + r.rollback(new File(args[0])); + } + + public void rollback(File deployArea) throws IOException, SAXException, + TikaException { + LinkContentHandler handler = new LinkContentHandler(); + Metadata met = new Metadata(); + DeploymentAreaParser parser = new DeploymentAreaParser(); + parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath(), UTF_8), + handler, met); + List<Link> links = handler.getLinks(); + if (links.size() < 2) + throw new IOException("Must have installed at least 2 versions!"); + Collections.sort(links, new Comparator<Link>() { + public int compare(Link o1, Link o2) { + return o1.getText().compareTo(o2.getText()); + } + }); + + this.updateVersion(links.get(links.size() - 2).getText()); + } + + private void updateVersion(String version) { + System.out.println("Rolling back to version: [" + version + "]"); + } + + class DeploymentAreaParser implements Parser { + private static final long serialVersionUID = -2356647405087933468L; + + /* + * (non-Javadoc) + * + * @see org.apache.tika.parser.Parser#getSupportedTypes( + * org.apache.tika.parser.ParseContext) + */ + public Set<MediaType> getSupportedTypes(ParseContext context) { + return Collections.unmodifiableSet(new HashSet<MediaType>(Arrays + .asList(MediaType.TEXT_PLAIN))); + } + + /* + * (non-Javadoc) + * + * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, + * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata) + */ + public void parse(InputStream is, ContentHandler handler, + Metadata metadata) throws IOException, SAXException, + TikaException { + parse(is, handler, metadata, new ParseContext()); + } + + /* + * (non-Javadoc) + * + * @see org.apache.tika.parser.Parser#parse(java.io.InputStream, + * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata, + * org.apache.tika.parser.ParseContext) + */ + public void parse(InputStream is, ContentHandler handler, + Metadata metadata, ParseContext context) throws IOException, + SAXException, TikaException { + + File deployArea = new File(IOUtils.toString(is, UTF_8)); + File[] versions = deployArea.listFiles(new FileFilter() { + public boolean accept(File pathname) { + return !pathname.getName().startsWith("current"); + } + }); + + XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, + metadata); + xhtml.startDocument(); + for (File v : versions) { + if (isSymlink(v)) + continue; + xhtml.startElement("a", "href", v.toURI().toURL().toExternalForm()); + xhtml.characters(v.getName()); + xhtml.endElement("a"); + } + } + } + + private boolean isSymlink(File f) throws IOException { + return !f.getAbsolutePath().equals(f.getCanonicalPath()); + } } Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -15,20 +18,19 @@ package org.apache.tika.example; import java.io.File; + import org.apache.tika.Tika; public class SimpleTextExtractor { - - public static void main(String[] args) throws Exception { - // Create a Tika instance with the default configuration - Tika tika = new Tika(); - - // Parse all given files and print out the extracted - // text content - for (String file : args) { - String text = tika.parseToString(new File(file)); - System.out.print(text); - } - } - + public static void main(String[] args) throws Exception { + // Create a Tika instance with the default configuration + Tika tika = new Tika(); + + // Parse all given files and print out the extracted + // text content + for (String file : args) { + String text = tika.parseToString(new File(file)); + System.out.print(text); + } + } } Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -19,14 +22,12 @@ import java.io.File; import org.apache.tika.Tika; public class SimpleTypeDetector { + public static void main(String[] args) throws Exception { + Tika tika = new Tika(); - public static void main(String[] args) throws Exception { - Tika tika = new Tika(); - - for (String file : args) { - String type = tika.detect(new File(file)); - System.out.println(file + ": " + type); - } - } - + for (String file : args) { + String type = tika.detect(new File(file)); + System.out.println(file + ": " + type); + } + } } Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -26,14 +29,12 @@ import org.springframework.context.suppo import static java.nio.charset.StandardCharsets.UTF_8; public class SpringExample { - - public static void main(String[] args) throws Exception { - ApplicationContext context = new ClassPathXmlApplicationContext( - new String[] { "org/apache/tika/example/spring.xml" }); - Parser parser = context.getBean("tika", Parser.class); - parser.parse(new ByteArrayInputStream("Hello, World!".getBytes(UTF_8)), - new WriteOutContentHandler(System.out), new Metadata(), - new ParseContext()); - } - + public static void main(String[] args) throws Exception { + ApplicationContext context = new ClassPathXmlApplicationContext( + new String[]{"org/apache/tika/example/spring.xml"}); + Parser parser = context.getBean("tika", Parser.class); + parser.parse(new ByteArrayInputStream("Hello, World!".getBytes(UTF_8)), + new WriteOutContentHandler(System.out), new Metadata(), + new ParseContext()); + } } Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -52,149 +55,147 @@ import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class TIAParsingExample { - - public static String parseToStringExample() throws Exception { - File document = new File("example.doc"); - String content = new Tika().parseToString(document); - System.out.print(content); - return content; - } - - public static void parseToReaderExample() throws Exception { - File document = new File("example.doc"); - try (Reader reader = new Tika().parse(document)) { - char[] buffer = new char[1000]; - int n = reader.read(buffer); - while (n != -1) { - System.out.append(CharBuffer.wrap(buffer, 0, n)); - n = reader.read(buffer); - } - } - } - - public static void parseFileInputStream(String filename) throws Exception { - Parser parser = new AutoDetectParser(); - ContentHandler handler = new DefaultHandler(); - Metadata metadata = new Metadata(); - ParseContext context = new ParseContext(); - try (InputStream stream = new FileInputStream(new File(filename))) { - parser.parse(stream, handler, metadata, context); - } - } - - public static void parseURLStream(String address) throws Exception { - Parser parser = new AutoDetectParser(); - ContentHandler handler = new DefaultHandler(); - Metadata metadata = new Metadata(); - ParseContext context = new ParseContext(); - try (InputStream stream = new GZIPInputStream(new URL(address).openStream())) { - parser.parse(stream, handler, metadata, context); - } - } - - public static void parseTikaInputStream(String filename) throws Exception { - Parser parser = new AutoDetectParser(); - ContentHandler handler = new DefaultHandler(); - Metadata metadata = new Metadata(); - ParseContext context = new ParseContext(); - try (InputStream stream = TikaInputStream.get(new File(filename))) { - parser.parse(stream, handler, metadata, context); - } - } - - public static File tikaInputStreamGetFile(String filename) throws Exception { - try (InputStream stream = TikaInputStream.get(new File(filename))) { - TikaInputStream tikaInputStream = TikaInputStream.get(stream); - File file = tikaInputStream.getFile(); - return file; - } - } - - public static void useHtmlParser() throws Exception { - InputStream stream = new ByteArrayInputStream(new byte[0]); - ContentHandler handler = new DefaultHandler(); - Metadata metadata = new Metadata(); - ParseContext context = new ParseContext(); - Parser parser = new HtmlParser(); - parser.parse(stream, handler, metadata, context); - } - - public static void useCompositeParser() throws Exception { - InputStream stream = new ByteArrayInputStream(new byte[0]); - ContentHandler handler = new DefaultHandler(); - ParseContext context = new ParseContext(); - Map<MediaType, Parser> parsersByType = new HashMap<MediaType, Parser>(); - parsersByType.put(MediaType.parse("text/html"), new HtmlParser()); - parsersByType.put(MediaType.parse("application/xml"), new XMLParser()); - - CompositeParser parser = new CompositeParser(); - parser.setParsers(parsersByType); - parser.setFallback(new TXTParser()); - - Metadata metadata = new Metadata(); - metadata.set(Metadata.CONTENT_TYPE, "text/html"); - parser.parse(stream, handler, metadata, context); - } - - public static void useAutoDetectParser() throws Exception { - InputStream stream = new ByteArrayInputStream(new byte[0]); - ContentHandler handler = new DefaultHandler(); - Metadata metadata = new Metadata(); - ParseContext context = new ParseContext(); - Parser parser = new AutoDetectParser(); - parser.parse(stream, handler, metadata, context); - } - - public static void testTeeContentHandler(String filename) throws Exception { - InputStream stream = new ByteArrayInputStream(new byte[0]); - Metadata metadata = new Metadata(); - ParseContext context = new ParseContext(); - Parser parser = new AutoDetectParser(); - LinkContentHandler linkCollector = new LinkContentHandler(); - try (OutputStream output = new FileOutputStream(new File(filename))) { - ContentHandler handler = new TeeContentHandler( - new BodyContentHandler(output), linkCollector); - parser.parse(stream, handler, metadata, context); - } - } - - public static void testLocale() throws Exception { - InputStream stream = new ByteArrayInputStream(new byte[0]); - ContentHandler handler = new DefaultHandler(); - Metadata metadata = new Metadata(); - Parser parser = new AutoDetectParser(); - ParseContext context = new ParseContext(); - context.set(Locale.class, Locale.ENGLISH); - parser.parse(stream, handler, metadata, context); - } - - public static void testHtmlMapper() throws Exception { - InputStream stream = new ByteArrayInputStream(new byte[0]); - ContentHandler handler = new DefaultHandler(); - Metadata metadata = new Metadata(); - Parser parser = new AutoDetectParser(); - ParseContext context = new ParseContext(); - context.set(HtmlMapper.class, new IdentityHtmlMapper()); - parser.parse(stream, handler, metadata, context); - } - - public static void testCompositeDocument() throws Exception { - InputStream stream = new ByteArrayInputStream(new byte[0]); - ContentHandler handler = new DefaultHandler(); - Metadata metadata = new Metadata(); - Parser parser = new AutoDetectParser(); - ParseContext context = new ParseContext(); - context.set(Parser.class, new ParserDecorator(parser) { - private static final long serialVersionUID = 4424210691523343833L; - - @Override - public void parse(InputStream stream, ContentHandler handler, - Metadata metadata, ParseContext context) - throws IOException, SAXException, TikaException { - // custom processing of the component document - } - }); - parser.parse(stream, handler, metadata, context); - } - + public static String parseToStringExample() throws Exception { + File document = new File("example.doc"); + String content = new Tika().parseToString(document); + System.out.print(content); + return content; + } + + public static void parseToReaderExample() throws Exception { + File document = new File("example.doc"); + try (Reader reader = new Tika().parse(document)) { + char[] buffer = new char[1000]; + int n = reader.read(buffer); + while (n != -1) { + System.out.append(CharBuffer.wrap(buffer, 0, n)); + n = reader.read(buffer); + } + } + } + + public static void parseFileInputStream(String filename) throws Exception { + Parser parser = new AutoDetectParser(); + ContentHandler handler = new DefaultHandler(); + Metadata metadata = new Metadata(); + ParseContext context = new ParseContext(); + try (InputStream stream = new FileInputStream(new File(filename))) { + parser.parse(stream, handler, metadata, context); + } + } + + public static void parseURLStream(String address) throws Exception { + Parser parser = new AutoDetectParser(); + ContentHandler handler = new DefaultHandler(); + Metadata metadata = new Metadata(); + ParseContext context = new ParseContext(); + try (InputStream stream = new GZIPInputStream(new URL(address).openStream())) { + parser.parse(stream, handler, metadata, context); + } + } + + public static void parseTikaInputStream(String filename) throws Exception { + Parser parser = new AutoDetectParser(); + ContentHandler handler = new DefaultHandler(); + Metadata metadata = new Metadata(); + ParseContext context = new ParseContext(); + try (InputStream stream = TikaInputStream.get(new File(filename))) { + parser.parse(stream, handler, metadata, context); + } + } + + public static File tikaInputStreamGetFile(String filename) throws Exception { + try (InputStream stream = TikaInputStream.get(new File(filename))) { + TikaInputStream tikaInputStream = TikaInputStream.get(stream); + File file = tikaInputStream.getFile(); + return file; + } + } + + public static void useHtmlParser() throws Exception { + InputStream stream = new ByteArrayInputStream(new byte[0]); + ContentHandler handler = new DefaultHandler(); + Metadata metadata = new Metadata(); + ParseContext context = new ParseContext(); + Parser parser = new HtmlParser(); + parser.parse(stream, handler, metadata, context); + } + + public static void useCompositeParser() throws Exception { + InputStream stream = new ByteArrayInputStream(new byte[0]); + ContentHandler handler = new DefaultHandler(); + ParseContext context = new ParseContext(); + Map<MediaType, Parser> parsersByType = new HashMap<MediaType, Parser>(); + parsersByType.put(MediaType.parse("text/html"), new HtmlParser()); + parsersByType.put(MediaType.parse("application/xml"), new XMLParser()); + + CompositeParser parser = new CompositeParser(); + parser.setParsers(parsersByType); + parser.setFallback(new TXTParser()); + + Metadata metadata = new Metadata(); + metadata.set(Metadata.CONTENT_TYPE, "text/html"); + parser.parse(stream, handler, metadata, context); + } + + public static void useAutoDetectParser() throws Exception { + InputStream stream = new ByteArrayInputStream(new byte[0]); + ContentHandler handler = new DefaultHandler(); + Metadata metadata = new Metadata(); + ParseContext context = new ParseContext(); + Parser parser = new AutoDetectParser(); + parser.parse(stream, handler, metadata, context); + } + + public static void testTeeContentHandler(String filename) throws Exception { + InputStream stream = new ByteArrayInputStream(new byte[0]); + Metadata metadata = new Metadata(); + ParseContext context = new ParseContext(); + Parser parser = new AutoDetectParser(); + LinkContentHandler linkCollector = new LinkContentHandler(); + try (OutputStream output = new FileOutputStream(new File(filename))) { + ContentHandler handler = new TeeContentHandler( + new BodyContentHandler(output), linkCollector); + parser.parse(stream, handler, metadata, context); + } + } + + public static void testLocale() throws Exception { + InputStream stream = new ByteArrayInputStream(new byte[0]); + ContentHandler handler = new DefaultHandler(); + Metadata metadata = new Metadata(); + Parser parser = new AutoDetectParser(); + ParseContext context = new ParseContext(); + context.set(Locale.class, Locale.ENGLISH); + parser.parse(stream, handler, metadata, context); + } + + public static void testHtmlMapper() throws Exception { + InputStream stream = new ByteArrayInputStream(new byte[0]); + ContentHandler handler = new DefaultHandler(); + Metadata metadata = new Metadata(); + Parser parser = new AutoDetectParser(); + ParseContext context = new ParseContext(); + context.set(HtmlMapper.class, new IdentityHtmlMapper()); + parser.parse(stream, handler, metadata, context); + } + + public static void testCompositeDocument() throws Exception { + InputStream stream = new ByteArrayInputStream(new byte[0]); + ContentHandler handler = new DefaultHandler(); + Metadata metadata = new Metadata(); + Parser parser = new AutoDetectParser(); + ParseContext context = new ParseContext(); + context.set(Parser.class, new ParserDecorator(parser) { + private static final long serialVersionUID = 4424210691523343833L; + + @Override + public void parse(InputStream stream, ContentHandler handler, + Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException { + // custom processing of the component document + } + }); + parser.parse(stream, handler, metadata, context); + } } Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -25,87 +28,80 @@ import org.apache.tika.exception.TikaExc import org.apache.tika.metadata.Metadata; /** - * * Generates document summaries for corpus analysis in the Open Relevance * project. - * */ @SuppressWarnings("deprecation") public class TrecDocumentGenerator { - - public TrecDocument summarize(File file) throws FileNotFoundException, - IOException, TikaException { - Tika tika = new Tika(); - Metadata met = new Metadata(); - - String contents = tika.parseToString(new FileInputStream(file), met); - return new TrecDocument(met.get(Metadata.RESOURCE_NAME_KEY), contents, - met.getDate(Metadata.DATE)); - - } - - // copied from - // http://svn.apache.org/repos/asf/lucene/openrelevance/trunk/src/java/org/ - // apache/orp/util/TrecDocument.java - // since the ORP jars aren't published anywhere - class TrecDocument { - private CharSequence docname; - private CharSequence body; - private Date date; - - public TrecDocument(CharSequence docname, CharSequence body, Date date) { - this.docname = docname; - this.body = body; - this.date = date; - } - - public TrecDocument() { - } - - /** - * @return the docname - */ - public CharSequence getDocname() { - return docname; - } - - /** - * @param docname - * the docname to set - */ - public void setDocname(CharSequence docname) { - this.docname = docname; - } - - /** - * @return the body - */ - public CharSequence getBody() { - return body; - } - - /** - * @param body - * the body to set - */ - public void setBody(CharSequence body) { - this.body = body; - } - - /** - * @return the date - */ - public Date getDate() { - return date; - } - - /** - * @param date - * the date to set - */ - public void setDate(Date date) { - this.date = date; - } - } - + public TrecDocument summarize(File file) throws FileNotFoundException, + IOException, TikaException { + Tika tika = new Tika(); + Metadata met = new Metadata(); + + String contents = tika.parseToString(new FileInputStream(file), met); + return new TrecDocument(met.get(Metadata.RESOURCE_NAME_KEY), contents, + met.getDate(Metadata.DATE)); + + } + + // copied from + // http://svn.apache.org/repos/asf/lucene/openrelevance/trunk/src/java/org/ + // apache/orp/util/TrecDocument.java + // since the ORP jars aren't published anywhere + class TrecDocument { + private CharSequence docname; + private CharSequence body; + private Date date; + + public TrecDocument(CharSequence docname, CharSequence body, Date date) { + this.docname = docname; + this.body = body; + this.date = date; + } + + public TrecDocument() { + } + + /** + * @return the docname + */ + public CharSequence getDocname() { + return docname; + } + + /** + * @param docname the docname to set + */ + public void setDocname(CharSequence docname) { + this.docname = docname; + } + + /** + * @return the body + */ + public CharSequence getBody() { + return body; + } + + /** + * @param body the body to set + */ + public void setBody(CharSequence body) { + this.body = body; + } + + /** + * @return the date + */ + public Date getDate() { + return date; + } + + /** + * @param date the date to set + */ + public void setDate(Date date) { + this.date = date; + } + } } Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java (original) +++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -14,34 +17,29 @@ package org.apache.tika.example; -//JDK imports import java.io.IOException; import java.util.Collections; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; /** - * - * * Example code listing from Chapter 1. Lists a zip file's entries using JDK's * standard APIs. - * */ public class ZipListFiles { - public static void main(String[] args) throws Exception { - if (args.length > 0) { - for (String file : args) { - System.out.println("Files in " + file + " file:"); - listZipEntries(file); - } - } - } - - public static void listZipEntries(String path) throws IOException { - ZipFile zip = new ZipFile(path); - for (ZipEntry entry : Collections.list(zip.entries())) { - System.out.println(entry.getName()); - } - } + public static void main(String[] args) throws Exception { + if (args.length > 0) { + for (String file : args) { + System.out.println("Files in " + file + " file:"); + listZipEntries(file); + } + } + } -} \ No newline at end of file + public static void listZipEntries(String path) throws IOException { + ZipFile zip = new ZipFile(path); + for (ZipEntry entry : Collections.list(zip.entries())) { + System.out.println(entry.getName()); + } + } +} Modified: tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java?rev=1704368&r1=1704367&r2=1704368&view=diff ============================================================================== --- tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java (original) +++ tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java Mon Sep 21 17:19:26 2015 @@ -1,9 +1,12 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -14,18 +17,14 @@ package org.apache.tika.example; +import org.junit.Test; import static org.junit.Assert.assertEquals; -import org.junit.Test; - @SuppressWarnings("deprecation") public class AdvancedTypeDetectorTest { - - @Test - public void testDetectWithCustomConfig() throws Exception { - assertEquals("application/xml", - AdvancedTypeDetector.detectWithCustomConfig("pom.xml")); - } - + @Test + public void testDetectWithCustomConfig() throws Exception { + assertEquals("application/xml", AdvancedTypeDetector.detectWithCustomConfig("pom.xml")); + } }
