Author: jukka
Date: Wed Mar 26 09:33:33 2008
New Revision: 641384
URL: http://svn.apache.org/viewvc?rev=641384&view=rev
Log:
TIKA-97: Tika GUI
- New tabs for different views of the parser output
- Improved drag-and-drop support
- Improved error handling
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/gui/ParsingTransferHandler.java
incubator/tika/trunk/src/main/java/org/apache/tika/gui/TikaGUI.java
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/gui/ParsingTransferHandler.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/gui/ParsingTransferHandler.java?rev=641384&r1=641383&r2=641384&view=diff
==============================================================================
---
incubator/tika/trunk/src/main/java/org/apache/tika/gui/ParsingTransferHandler.java
(original)
+++
incubator/tika/trunk/src/main/java/org/apache/tika/gui/ParsingTransferHandler.java
Wed Mar 26 09:33:33 2008
@@ -21,41 +21,24 @@
import java.awt.datatransfer.Transferable;
import java.awt.event.InputEvent;
import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStream;
-import java.io.StringWriter;
import java.util.List;
import javax.swing.Icon;
import javax.swing.JComponent;
-import javax.swing.JEditorPane;
import javax.swing.TransferHandler;
-import javax.swing.table.DefaultTableModel;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.sax.SAXTransformerFactory;
-import javax.xml.transform.sax.TransformerHandler;
-import javax.xml.transform.stream.StreamResult;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.Parser;
-public class ParsingTransferHandler extends TransferHandler {
-
- private final Parser parser = new AutoDetectParser();
+/**
+ * Utility class that turns drag-and-drop events into Tika parse requests.
+ */
+class ParsingTransferHandler extends TransferHandler {
private final TransferHandler delegate;
- private final DefaultTableModel table;
-
- private final JEditorPane editor;
+ private final TikaGUI tika;
- public ParsingTransferHandler(
- TransferHandler delegate,
- DefaultTableModel table, JEditorPane editor) {
+ public ParsingTransferHandler(TransferHandler delegate, TikaGUI tika) {
this.delegate = delegate;
- this.table = table;
- this.editor = editor;
+ this.tika = tika;
}
public boolean canImport(JComponent component, DataFlavor[] flavors) {
@@ -73,37 +56,11 @@
List<?> files = (List<?>)
transferable.getTransferData(DataFlavor.javaFileListFlavor);
for (Object file : files) {
- importFile((File) file);
+ tika.importFile((File) file);
}
return true;
} catch (Exception e) {
- e.printStackTrace();
return false;
- }
- }
-
- private void importFile(File file) throws Exception {
- InputStream input = new FileInputStream(file);
- try {
- StringWriter writer = new StringWriter();
- Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName());
-
- SAXTransformerFactory factory = (SAXTransformerFactory)
- SAXTransformerFactory.newInstance();
- TransformerHandler handler = factory.newTransformerHandler();
- handler.getTransformer().setOutputProperty(
- OutputKeys.METHOD, "html");
- handler.setResult(new StreamResult(writer));
- parser.parse(input, handler, metadata);
-
- table.setRowCount(0);
- for (String name : metadata.names()) {
- table.addRow(new Object[] { name, metadata.get(name) });
- }
- editor.setText(writer.toString());
- } finally {
- input.close();
}
}
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/gui/TikaGUI.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/gui/TikaGUI.java?rev=641384&r1=641383&r2=641384&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/gui/TikaGUI.java
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/gui/TikaGUI.java Wed Mar
26 09:33:33 2008
@@ -16,52 +16,212 @@
*/
package org.apache.tika.gui;
+import java.awt.Dimension;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.util.Arrays;
+
import javax.swing.JEditorPane;
import javax.swing.JFrame;
+import javax.swing.JOptionPane;
import javax.swing.JScrollPane;
import javax.swing.JTabbedPane;
-import javax.swing.JTable;
+import javax.swing.ProgressMonitorInputStream;
import javax.swing.SwingUtilities;
import javax.swing.UIManager;
-import javax.swing.table.DefaultTableModel;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.sax.SAXTransformerFactory;
+import javax.xml.transform.sax.TransformerHandler;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.TeeContentHandler;
+import org.apache.tika.sax.WriteOutContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.tika.sax.xpath.MatchingContentHandler;
+import org.apache.tika.sax.xpath.XPathParser;
+import org.xml.sax.ContentHandler;
/**
- * Simple Swing GUI for Apache Tika. Opens a window with tabs for
- * "Text content" and "Metadata". You can drag and drop files on top
+ * Simple Swing GUI for Apache Tika. You can drag and drop files on top
* of the window to have them parsed.
*/
-public class TikaGUI implements Runnable {
+public class TikaGUI extends JFrame {
- public void run() {
- JFrame frame = new JFrame("Apache Tika");
- frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+ /**
+ * Main method. Sets the Swing look and feel to the operating system
+ * settings, and starts the Tika GUI with an [EMAIL PROTECTED]
AutoDetectParser)
+ * instance as the default parser.
+ *
+ * @param args ignored
+ * @throws Exception if an error occurs
+ */
+ public static void main(String[] args) throws Exception {
+ UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
+ SwingUtilities.invokeLater(new Runnable() {
+ public void run() {
+ new TikaGUI(new AutoDetectParser()).setVisible(true);
+ }
+ });
+ }
- JTabbedPane tabs = new JTabbedPane();
- frame.add(tabs);
+ /**
+ * Configured parser instance.
+ */
+ private final Parser parser;
+
+ /**
+ * Tabs in the Tika GUI window.
+ */
+ private final JTabbedPane tabs;
+
+ /**
+ * Formatted XHTML output.
+ */
+ private final JEditorPane html;
+
+ /**
+ * Plain text output.
+ */
+ private final JEditorPane text;
+
+ /**
+ * Raw XHTML source.
+ */
+ private final JEditorPane xml;
+
+ /**
+ * Document metadata.
+ */
+ private final JEditorPane metadata;
+
+ /**
+ * Parsing errors.
+ */
+ private final JEditorPane errors;
+
+ public TikaGUI(Parser parser) {
+ super("Apache Tika");
+ setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+
+ tabs = new JTabbedPane();
+ add(tabs);
+
+ html = createEditor("Formatted text", "text/html");
+ text = createEditor("Plain text", "text/plain");
+ xml = createEditor("Structured text", "text/plain");
+ metadata = createEditor("Metadata", "text/plain");
+ errors = createEditor("Errors", "text/plain");
- JEditorPane editor = new JEditorPane();
- editor.setContentType("text/html");
- editor.setText("<center>Drop file here</center>");
- tabs.add("Text content", new JScrollPane(editor));
-
- DefaultTableModel model = new DefaultTableModel(
- new Object[][] { { "", "" } },
- new Object[] { "Name", "Value" });
- JTable table = new JTable(model);
- tabs.addTab("Metadata", new JScrollPane(table));
+ setPreferredSize(new Dimension(500, 400));
+ pack();
+
+ this.parser = parser;
+ }
+
+ public void importFile(File file) throws IOException {
+ InputStream input = new FileInputStream(file);
+ try {
+ StringWriter htmlBuffer = new StringWriter();
+ StringWriter textBuffer = new StringWriter();
+ StringWriter xmlBuffer = new StringWriter();
+ StringBuilder metadataBuffer = new StringBuilder();
+
+ ContentHandler handler = new TeeContentHandler(
+ getHtmlHandler(htmlBuffer),
+ getTextContentHandler(textBuffer),
+ getXmlContentHandler(xmlBuffer));
+ Metadata md = new Metadata();
+ md.set(Metadata.RESOURCE_NAME_KEY, file.getName());
+
+ input = new ProgressMonitorInputStream(
+ this, "Parsing file " + file.getName(), input);
+ parser.parse(input, handler, md);
+
+ String[] names = md.names();
+ Arrays.sort(names);
+ for (String name : names) {
+ metadataBuffer.append(name);
+ metadataBuffer.append(": ");
+ metadataBuffer.append(md.get(name));
+ metadataBuffer.append("\n");
+ }
+
+ setText(errors, "");
+ setText(metadata, metadataBuffer.toString());
+ setText(xml, xmlBuffer.toString());
+ setText(text, textBuffer.toString());
+ setText(html, htmlBuffer.toString());
+ tabs.setSelectedIndex(0);
+ } catch (Exception e) {
+ StringWriter writer = new StringWriter();
+ e.printStackTrace(new PrintWriter(writer));
+ setText(errors, writer.toString());
+ setText(metadata, "");
+ setText(xml, "");
+ setText(text, "");
+ setText(html, "");
+ tabs.setSelectedIndex(tabs.getTabCount() - 1);
+ JOptionPane.showMessageDialog(
+ this,
+ "Apache Tika was unable to parse the file "
+ + file.getName() + ".\n See the errors tab for"
+ + " the detailed stack trace of this error.",
+ "Parse error",
+ JOptionPane.ERROR_MESSAGE);
+ } finally {
+ input.close();
+ }
+ }
- table.setTransferHandler(new ParsingTransferHandler(
- table.getTransferHandler(), model, editor));
+ private JEditorPane createEditor(String title, String type) {
+ JEditorPane editor = new JEditorPane();
+ editor.setContentType(type);
editor.setTransferHandler(new ParsingTransferHandler(
- editor.getTransferHandler(), model, editor));
+ editor.getTransferHandler(), this));
+ tabs.add(title, new JScrollPane(editor));
+ return editor;
+ }
- frame.pack();
- frame.setVisible(true);
+ private void setText(JEditorPane editor, String text) {
+ editor.setText(text);
+ editor.setCaretPosition(0);
}
- public static void main(String[] args) throws Exception {
- UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
- SwingUtilities.invokeLater(new TikaGUI());
+ private ContentHandler getHtmlHandler(Writer writer)
+ throws TransformerConfigurationException {
+ SAXTransformerFactory factory = (SAXTransformerFactory)
+ SAXTransformerFactory.newInstance();
+ TransformerHandler handler = factory.newTransformerHandler();
+ handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
+ handler.setResult(new StreamResult(writer));
+ return handler;
+ }
+
+ private ContentHandler getTextContentHandler(Writer writer) {
+ XPathParser parser =
+ new XPathParser("xhtml", XHTMLContentHandler.XHTML);
+ return new MatchingContentHandler(
+ new WriteOutContentHandler(writer),
+ parser.parse("/xhtml:html/xhtml:body//text()"));
+ }
+
+ private ContentHandler getXmlContentHandler(Writer writer)
+ throws TransformerConfigurationException {
+ SAXTransformerFactory factory = (SAXTransformerFactory)
+ SAXTransformerFactory.newInstance();
+ TransformerHandler handler = factory.newTransformerHandler();
+ handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
+ handler.setResult(new StreamResult(writer));
+ return handler;
}
}