Author: tallison
Date: Fri Feb 13 12:43:56 2015
New Revision: 1659545
URL: http://svn.apache.org/r1659545
Log:
TIKA-1511, with new files added...doh
Added:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3Parser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3TableReader.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
Added:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java?rev=1659545&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java
(added)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java
Fri Feb 13 12:43:56 2015
@@ -0,0 +1,188 @@
+package org.apache.tika.parser.jdbc;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
+import org.apache.tika.io.IOExceptionWithCause;
+import org.apache.tika.metadata.Database;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Abstract class that handles iterating through tables within a database.
+ */
+abstract class AbstractDBParser extends AbstractParser {
+
+ private final static byte[] EMPTY_BYTE_ARR = new byte[0];
+
+ private Connection connection;
+
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return null;
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler, Metadata
metadata, ParseContext context) throws IOException, SAXException, TikaException
{
+ connection = getConnection(stream, metadata, context);
+ XHTMLContentHandler xHandler = null;
+ List<String> tableNames = null;
+ try {
+ tableNames = getTableNames(connection, metadata, context);
+ } catch (SQLException e) {
+ throw new IOExceptionWithCause(e);
+ }
+ for (String tableName : tableNames) {
+ //add table names to parent metadata
+ metadata.add(Database.TABLE_NAME, tableName);
+ }
+ xHandler = new XHTMLContentHandler(handler, metadata);
+ xHandler.startDocument();
+
+ try {
+ for (String tableName : tableNames) {
+ JDBCTableReader tableReader = getTableReader(connection,
tableName, context);
+ xHandler.startElement("table", "name",
tableReader.getTableName());
+ xHandler.startElement("thead");
+ xHandler.startElement("tr");
+ for (String header : tableReader.getHeaders()) {
+ xHandler.startElement("th");
+ xHandler.characters(header);
+ xHandler.endElement("th");
+ }
+ xHandler.endElement("tr");
+ xHandler.endElement("thead");
+ xHandler.startElement("tbody");
+ while (tableReader.nextRow(xHandler, context)) {
+ //no-op
+ }
+ xHandler.endElement("tbody");
+ xHandler.endElement("table");
+ }
+ } finally {
+ if (xHandler != null) {
+ xHandler.endDocument();
+ }
+ try {
+ close();
+ } catch (SQLException e) {
+ //swallow
+ }
+ }
+ }
+
+ protected static EmbeddedDocumentExtractor
getEmbeddedDocumentExtractor(ParseContext context) {
+ return context.get(EmbeddedDocumentExtractor.class,
+ new ParsingEmbeddedDocumentExtractor(context));
+ }
+
+ /**
+ * Override this for any special handling of closing the connection.
+ *
+ * @throws java.sql.SQLException
+ * @throws java.io.IOException
+ */
+ protected void close() throws SQLException, IOException {
+ connection.close();
+ }
+
+ /**
+ * Override this for special configuration of the connection, such as
limiting
+ * the number of rows to be held in memory.
+ *
+ * @param stream stream to use
+ * @param metadata metadata that could be used in parameterizing the
connection
+ * @param context parsecontext that could be used in parameterizing the
connection
+ * @return connection
+ * @throws java.io.IOException
+ * @throws org.apache.tika.exception.TikaException
+ */
+ protected Connection getConnection(InputStream stream, Metadata metadata,
ParseContext context) throws IOException, TikaException {
+ String connectionString = getConnectionString(stream, metadata,
context);
+
+ Connection connection = null;
+ try {
+ Class.forName(getJDBCClassName());
+ } catch (ClassNotFoundException e) {
+ throw new TikaException(e.getMessage());
+ }
+ try{
+ connection = DriverManager.getConnection(connectionString);
+ } catch (SQLException e) {
+ throw new IOExceptionWithCause(e);
+ }
+ return connection;
+ }
+
+ /**
+ * Implement for db specific connection information, e.g.
"jdbc:sqlite:/docs/mydb.db"
+ * <p>
+ * Include any optimization settings, user name, password, etc.
+ * <p>
+ * @param stream stream for processing
+ * @param metadata metadata might be useful in determining connection info
+ * @param parseContext context to use to help create connectionString
+ * @return connection string to be used by {@link #getConnection}.
+ * @throws java.io.IOException
+ */
+ abstract protected String getConnectionString(InputStream stream,
+ Metadata metadata, ParseContext
parseContext) throws IOException;
+
+ /**
+ * JDBC class name, e.g. org.sqlite.JDBC
+ * @return jdbc class name
+ */
+ abstract protected String getJDBCClassName();
+
+ /**
+ *
+ * Returns the names of the tables to process
+ *
+ * @param connection Connection to use to make the sql call(s) to get the
names of the tables
+ * @param metadata Metadata to use (potentially) in decision about which
tables to extract
+ * @param context ParseContext to use (potentially) in decision about
which tables to extract
+ * @return
+ * @throws java.sql.SQLException
+ */
+ abstract protected List<String> getTableNames(Connection connection,
Metadata metadata,
+ ParseContext context) throws
SQLException;
+
+ /**
+ * Given a connection and a table name, return the JDBCTableReader for
this db.
+ *
+ * @param connection
+ * @param tableName
+ * @return
+ */
+ abstract protected JDBCTableReader getTableReader(Connection connection,
String tableName, ParseContext parseContext);
+
+}
Added:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java?rev=1659545&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
(added)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
Fri Feb 13 12:43:56 2015
@@ -0,0 +1,300 @@
+package org.apache.tika.parser.jdbc;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Blob;
+import java.sql.Clob;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.sql.Types;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.io.FilenameUtils;
+import org.apache.tika.io.IOExceptionWithCause;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Database;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeType;
+import org.apache.tika.mime.MimeTypeException;
+import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * General base class to iterate through rows of a JDBC table
+ */
+class JDBCTableReader {
+
+ private final static Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
+ private final Connection connection;
+ private final String tableName;
+ int maxClobLength = 1000000;
+ ResultSet results = null;
+ int rows = 0;
+ private TikaConfig tikaConfig = null;
+ private Detector detector = null;
+ private MimeTypes mimeTypes = null;
+
+ public JDBCTableReader(Connection connection, String tableName,
ParseContext context) {
+ this.connection = connection;
+ this.tableName = tableName;
+ this.tikaConfig = context.get(TikaConfig.class);
+ }
+
+ public boolean nextRow(ContentHandler handler, ParseContext context)
throws IOException, SAXException {
+ //lazy initialization
+ if (results == null) {
+ reset();
+ }
+ try {
+ if (!results.next()) {
+ return false;
+ }
+ } catch (SQLException e) {
+ throw new IOExceptionWithCause(e);
+ }
+ try {
+ ResultSetMetaData meta = results.getMetaData();
+ handler.startElement(XHTMLContentHandler.XHTML, "tr", "tr",
EMPTY_ATTRIBUTES);
+ for (int i = 1; i <= meta.getColumnCount(); i++) {
+ handler.startElement(XHTMLContentHandler.XHTML, "td", "td",
EMPTY_ATTRIBUTES);
+ handleCell(meta, i, handler, context);
+ handler.endElement(XHTMLContentHandler.XHTML, "td", "td");
+ }
+ handler.endElement(XHTMLContentHandler.XHTML, "tr", "tr");
+ } catch (SQLException e) {
+ throw new IOExceptionWithCause(e);
+ }
+ rows++;
+ return true;
+ }
+
+ private void handleCell(ResultSetMetaData rsmd, int i, ContentHandler
handler, ParseContext context) throws SQLException, IOException, SAXException {
+ switch (rsmd.getColumnType(i)) {
+ case Types.BLOB:
+ handleBlob(tableName, rsmd.getColumnName(i), rows, results, i,
handler, context);
+ break;
+ case Types.CLOB:
+ handleClob(tableName, rsmd.getColumnName(i), rows, results, i,
handler, context);
+ break;
+ case Types.BOOLEAN:
+ handleBoolean(results.getBoolean(i), handler);
+ break;
+ case Types.DATE:
+ handleDate(results, i, handler);
+ break;
+ case Types.TIMESTAMP:
+ handleTimeStamp(results, i, handler);
+ break;
+ case Types.INTEGER:
+ handleInteger(rsmd.getColumnTypeName(i), results, i, handler);
+ break;
+ case Types.FLOAT:
+ //this is necessary to handle rounding issues in presentation
+ //Should we just use getString(i)?
+ addAllCharacters(Float.toString(results.getFloat(i)), handler);
+ break;
+ case Types.DOUBLE:
+ addAllCharacters(Double.toString(results.getDouble(i)),
handler);
+ break;
+ default:
+ addAllCharacters(results.getString(i), handler);
+ break;
+ }
+ }
+
+ public List<String> getHeaders() throws IOException {
+ List<String> headers = new LinkedList<String>();
+ //lazy initialization
+ if (results == null) {
+ reset();
+ }
+ try {
+ ResultSetMetaData meta = results.getMetaData();
+ for (int i = 1; i <= meta.getColumnCount(); i++) {
+ headers.add(meta.getColumnName(i));
+ }
+ } catch (SQLException e) {
+ throw new IOExceptionWithCause(e);
+ }
+ return headers;
+ }
+
+ protected void handleInteger(String columnTypeName, ResultSet rs, int
columnIndex, ContentHandler handler) throws SQLException, SAXException {
+ addAllCharacters(Integer.toString(rs.getInt(columnIndex)), handler);
+ }
+
+ private void handleBoolean(boolean aBoolean, ContentHandler handler)
throws SAXException {
+ addAllCharacters(Boolean.toString(aBoolean), handler);
+ }
+
+
+ protected void handleClob(String tableName, String columnName, int rowNum,
+ ResultSet resultSet, int columnIndex,
+ ContentHandler handler, ParseContext context)
throws SQLException, IOException, SAXException {
+ Clob clob = resultSet.getClob(columnIndex);
+ boolean truncated = clob.length() > Integer.MAX_VALUE || clob.length()
> maxClobLength;
+
+ int readSize = (clob.length() < maxClobLength ? (int) clob.length() :
maxClobLength);
+ Metadata m = new Metadata();
+ m.set(Database.TABLE_NAME, tableName);
+ m.set(Database.COLUMN_NAME, columnName);
+ m.set(Database.PREFIX + "ROW_NUM", Integer.toString(rowNum));
+ m.set(Database.PREFIX + "IS_CLOB", "true");
+ m.set(Database.PREFIX + "CLOB_LENGTH", Long.toString(clob.length()));
+ m.set(Database.PREFIX + "IS_CLOB_TRUNCATED",
Boolean.toString(truncated));
+ m.set(Metadata.CONTENT_TYPE, "text/plain; charset=UTF-8");
+ m.set(Metadata.CONTENT_LENGTH, Integer.toString(readSize));
+ m.set(TikaMetadataKeys.RESOURCE_NAME_KEY,
+ //just in case something screwy is going on with the column
name
+ FilenameUtils.normalize(FilenameUtils.getName(columnName + "_"
+ rowNum + ".txt")));
+
+
+ //is there a more efficient way to go from a Reader to an InputStream?
+ String s = clob.getSubString(0, readSize);
+ EmbeddedDocumentExtractor ex =
AbstractDBParser.getEmbeddedDocumentExtractor(context);
+ ex.parseEmbedded(new ByteArrayInputStream(s.getBytes("UTF-8")),
handler, m, true);
+ }
+
+ protected void handleBlob(String tableName, String columnName, int rowNum,
ResultSet resultSet, int columnIndex,
+ ContentHandler handler, ParseContext context)
throws SQLException, IOException, SAXException {
+ Metadata m = new Metadata();
+ m.set(Database.TABLE_NAME, tableName);
+ m.set(Database.COLUMN_NAME, columnName);
+ m.set(Database.PREFIX + "ROW_NUM", Integer.toString(rowNum));
+ m.set(Database.PREFIX + "IS_BLOB", "true");
+ Blob blob = null;
+ InputStream is = null;
+ EmbeddedDocumentExtractor ex =
AbstractDBParser.getEmbeddedDocumentExtractor(context);
+ try {
+ is = TikaInputStream.get(getInputStreamFromBlob(resultSet,
columnIndex, blob, m));
+
+ Attributes attrs = new AttributesImpl();
+ ((AttributesImpl) attrs).addAttribute("", "type", "type", "CDATA",
"blob");
+ ((AttributesImpl) attrs).addAttribute("", "column_name",
"column_name", "CDATA", columnName);
+ ((AttributesImpl) attrs).addAttribute("", "row_number",
"row_number", "CDATA", Integer.toString(rowNum));
+ handler.startElement("", "span", "span", attrs);
+ MediaType mediaType = getDetector().detect(is, new Metadata());
+ String extension = "";
+ try {
+ MimeType mimeType =
getMimeTypes().forName(mediaType.toString());
+ m.set(Metadata.CONTENT_TYPE, mimeType.toString());
+ extension = mimeType.getExtension();
+ } catch (MimeTypeException e) {
+ //swallow
+ }
+ m.set(TikaMetadataKeys.RESOURCE_NAME_KEY,
+ //just in case something screwy is going on with the
column name
+ FilenameUtils.normalize(FilenameUtils.getName(columnName +
"_" + rowNum + extension)));
+
+ ex.parseEmbedded(is, handler, m, true);
+
+ } finally {
+ if (blob != null) {
+ try {
+ blob.free();
+ } catch (SQLException e) {
+ //swallow
+ }
+ }
+ IOUtils.closeQuietly(is);
+ }
+ handler.endElement("", "span", "span");
+ }
+
+ protected InputStream getInputStreamFromBlob(ResultSet resultSet, int
columnIndex, Blob blob, Metadata metadata) throws SQLException {
+ return TikaInputStream.get(blob, metadata);
+ }
+
+ protected void handleDate(ResultSet resultSet, int columnIndex,
ContentHandler handler) throws SAXException, SQLException {
+ addAllCharacters(resultSet.getString(columnIndex), handler);
+ }
+
+ protected void handleTimeStamp(ResultSet resultSet, int columnIndex,
ContentHandler handler) throws SAXException, SQLException {
+ addAllCharacters(resultSet.getString(columnIndex), handler);
+ }
+
+ protected void addAllCharacters(String s, ContentHandler handler) throws
SAXException {
+ char[] chars = s.toCharArray();
+ handler.characters(chars, 0, chars.length);
+ }
+
+ void reset() throws IOException {
+
+ if (results != null) {
+ try {
+ results.close();
+ } catch (SQLException e) {
+ //swallow
+ }
+ }
+
+ String sql = "SELECT * from " + tableName;
+ try {
+ Statement st = connection.createStatement();
+ results = st.executeQuery(sql);
+ } catch (SQLException e) {
+ throw new IOExceptionWithCause(e);
+ }
+ rows = 0;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+
+ protected TikaConfig getTikaConfig() {
+ if (tikaConfig == null) {
+ tikaConfig = TikaConfig.getDefaultConfig();
+ }
+ return tikaConfig;
+ }
+
+ protected Detector getDetector() {
+ if (detector != null) return detector;
+
+ detector = getTikaConfig().getDetector();
+ return detector;
+ }
+
+ protected MimeTypes getMimeTypes() {
+ if (mimeTypes != null) return mimeTypes;
+
+ mimeTypes = getTikaConfig().getMimeRepository();
+ return mimeTypes;
+ }
+
+}
Added:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java?rev=1659545&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
(added)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
Fri Feb 13 12:43:56 2015
@@ -0,0 +1,116 @@
+package org.apache.tika.parser.jdbc;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.tika.io.IOExceptionWithCause;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.sqlite.SQLiteConfig;
+
+/**
+ * This is the implementation of the db parser for SQLite.
+ * <p>
+ * This parser is internal only; it should not be registered in the services
+ * file or configured in the TikaConfig xml file.
+ */
+class SQLite3DBParser extends AbstractDBParser {
+
+ protected static final String SQLITE_CLASS_NAME = "org.sqlite.JDBC";
+
+ /**
+ *
+ * @param context context
+ * @return null (always)
+ */
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return null;
+ }
+
+ @Override
+ protected Connection getConnection(InputStream stream, Metadata metadata,
ParseContext context) throws IOException {
+ String connectionString = getConnectionString(stream, metadata,
context);
+
+ Connection connection = null;
+ try {
+ Class.forName(getJDBCClassName());
+ } catch (ClassNotFoundException e) {
+ throw new IOExceptionWithCause(e);
+ }
+ try{
+ SQLiteConfig config = new SQLiteConfig();
+
+ //good habit, but effectively meaningless here
+ config.setReadOnly(true);
+ connection = config.createConnection(connectionString);
+
+ } catch (SQLException e) {
+ throw new IOException(e.getMessage());
+ }
+ return connection;
+ }
+
+ @Override
+ protected String getConnectionString(InputStream is, Metadata metadata,
ParseContext context) throws IOException {
+ File dbFile = TikaInputStream.get(is).getFile();
+ return "jdbc:sqlite:"+dbFile.getAbsolutePath();
+ }
+
+ @Override
+ protected String getJDBCClassName() {
+ return SQLITE_CLASS_NAME;
+ }
+
+ @Override
+ protected List<String> getTableNames(Connection connection, Metadata
metadata,
+ ParseContext context) throws
SQLException {
+ List<String> tableNames = new LinkedList<String>();
+
+ Statement st = null;
+ try {
+ st = connection.createStatement();
+ String sql = "SELECT name FROM sqlite_master WHERE type='table'";
+ ResultSet rs = st.executeQuery(sql);
+
+ while (rs.next()) {
+ tableNames.add(rs.getString(1));
+ }
+ } finally {
+ if (st != null)
+ st.close();
+ }
+ return tableNames;
+ }
+
+ @Override
+ public JDBCTableReader getTableReader(Connection connection, String
tableName, ParseContext context) {
+ return new SQLite3TableReader(connection, tableName, context);
+ }
+}
Added:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3Parser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3Parser.java?rev=1659545&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3Parser.java
(added)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3Parser.java
Fri Feb 13 12:43:56 2015
@@ -0,0 +1,78 @@
+package org.apache.tika.parser.jdbc;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * This is the main class for parsing SQLite3 files. When {@link #parse} is
called,
+ * this creates a new {@link org.apache.tika.parser.jdbc.SQLite3DBParser}.
+ * <p>
+ * Given potential conflicts of native libraries in web servers, users will
+ * need to add org.xerial's sqlite-jdbc jar to the class path for this parser
+ * to work. For development and testing, this jar is specified in
tika-parsers'
+ * pom.xml, but it is currently set to "provided."
+ * <p>
+ * Note that this family of jdbc parsers is designed to treat each CLOB and
each BLOB
+ * as embedded documents.
+ *
+ */
+public class SQLite3Parser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -752276948656079347L;
+
+ private static final MediaType MEDIA_TYPE =
MediaType.application("x-sqlite3");
+
+ private final Set<MediaType> SUPPORTED_TYPES;
+
+ /**
+ * Checks to see if class is available for org.sqlite.JDBC.
+ * <p>
+ * If not, this class will return an EMPTY_SET for getSupportedTypes()
+ */
+ public SQLite3Parser() {
+ Set<MediaType> tmp;
+ try {
+ Class.forName(SQLite3DBParser.SQLITE_CLASS_NAME);
+ tmp = Collections.singleton(MEDIA_TYPE);
+ } catch (ClassNotFoundException e) {
+ tmp = Collections.EMPTY_SET;
+ }
+ SUPPORTED_TYPES = tmp;
+ }
+
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler, Metadata
metadata, ParseContext context) throws IOException, SAXException, TikaException
{
+ SQLite3DBParser p = new SQLite3DBParser();
+ p.parse(stream, handler, metadata, context);
+ }
+}
Added:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3TableReader.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3TableReader.java?rev=1659545&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3TableReader.java
(added)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3TableReader.java
Fri Feb 13 12:43:56 2015
@@ -0,0 +1,110 @@
+package org.apache.tika.parser.jdbc;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Blob;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Locale;
+
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+
+
+/**
+ * Concrete class for SQLLite table parsing. This overrides
+ * column type handling from JDBCRowHandler.
+ * <p>
+ * This class is not designed to be thread safe (because of DateFormat)!
+ * Need to call a new instance for each parse, as AbstractDBParser does.
+ * <p>
+ * For now, this silently skips cells of type CLOB, because xerial's jdbc
connector
+ * does not currently support them.
+ */
+class SQLite3TableReader extends JDBCTableReader {
+
+
+ DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
+
+ public SQLite3TableReader(Connection connection, String tableName,
ParseContext context) {
+ super(connection, tableName, context);
+ }
+
+
+ /**
+ * No-op for now in {@link SQLite3TableReader}.
+ *
+ * @param tableName
+ * @param fieldName
+ * @param rowNum
+ * @param resultSet
+ * @param columnIndex
+ * @param handler
+ * @param context
+ * @throws java.sql.SQLException
+ * @throws java.io.IOException
+ * @throws org.xml.sax.SAXException
+ */
+ @Override
+ protected void handleClob(String tableName, String fieldName, int rowNum,
+ ResultSet resultSet, int columnIndex,
+ ContentHandler handler, ParseContext context)
throws SQLException, IOException, SAXException {
+ //no-op for now.
+ }
+
+ /**
+ * The jdbc connection to Sqlite does not yet implement blob, have to
getBytes().
+ *
+ * @param resultSet resultSet
+ * @param columnIndex columnIndex for blob
+ * @return
+ * @throws java.sql.SQLException
+ */
+ @Override
+ protected InputStream getInputStreamFromBlob(ResultSet resultSet, int
columnIndex, Blob blob, Metadata m) throws SQLException {
+ return TikaInputStream.get(resultSet.getBytes(columnIndex), m);
+ }
+
+ @Override
+ protected void handleInteger(String columnTypeName, ResultSet rs, int
columnIndex,
+ ContentHandler handler) throws SQLException,
SAXException {
+ //As of this writing, with xerial's sqlite jdbc connector, a timestamp
is
+ //stored as a column of type Integer, but the columnTypeName is
TIMESTAMP, and the
+ //value is a string representing a Long.
+ if (columnTypeName.equals("TIMESTAMP")) {
+
addAllCharacters(parseDateFromLongString(rs.getString(columnIndex)), handler);
+ } else {
+ addAllCharacters(Integer.toString(rs.getInt(columnIndex)),
handler);
+ }
+
+ }
+
+ private String parseDateFromLongString(String longString) throws
SAXException {
+ java.sql.Date d = new java.sql.Date(Long.parseLong(longString));
+ return dateFormat.format(d);
+
+ }
+}
Added:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java?rev=1659545&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
(added)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
Fri Feb 13 12:43:56 2015
@@ -0,0 +1,360 @@
+package org.apache.tika.parser.jdbc;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.extractor.EmbeddedResourceHandler;
+import org.apache.tika.extractor.ParserContainerExtractor;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Database;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.apache.tika.sax.BasicContentHandlerFactory;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.ToXMLContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class SQLite3ParserTest extends TikaTest {
+ private final static String TEST_FILE_NAME = "testSqlite3b.db";
+ private final static String TEST_FILE1 =
"/test-documents/"+TEST_FILE_NAME;;
+
+ @Test
+ public void testBasic() throws Exception {
+ Parser p = new AutoDetectParser();
+
+ //test different types of input streams
+ //actual inputstream, memory buffered bytearray and literal file
+ InputStream[] streams = new InputStream[3];
+ streams[0] = getResourceAsStream(TEST_FILE1);
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ IOUtils.copy(getResourceAsStream(TEST_FILE1), bos);
+ streams[1] = new ByteArrayInputStream(bos.toByteArray());
+ streams[2] = TikaInputStream.get(getResourceAsFile(TEST_FILE1));
+ int tests = 0;
+ for (InputStream stream : streams) {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ //1) getXML closes the stream
+ //2) getXML runs recursively on the contents, so the embedded docs
should show up
+ XMLResult result = getXML(stream, p, metadata);
+ String x = result.xml;
+ //first table name
+ assertContains("<table
name=\"my_table1\"><thead><tr>\t<th>INT_COL</th>", x);
+ //non-ascii
+ assertContains("<td>æ®ææ¯é¡¿å¤§å¦</td>", x);
+ //boolean
+ assertContains("<td>true</td>\t<td>2015-01-02</td>", x);
+ //date test
+ assertContains("2015-01-04", x);
+ //timestamp test
+ assertContains("2015-01-03 15:17:03", x);
+ //first embedded doc's image tag
+ assertContains("alt=\"image1.png\"", x);
+ //second embedded doc's image tag
+ assertContains("alt=\"A description...\"", x);
+ //second table name
+ assertContains("<table
name=\"my_table2\"><thead><tr>\t<th>INT_COL2</th>", x);
+
+ Metadata post = result.metadata;
+ String[] tableNames = post.getValues(Database.TABLE_NAME);
+ assertEquals(2, tableNames.length);
+ assertEquals("my_table1", tableNames[0]);
+ assertEquals("my_table2", tableNames[1]);
+ tests++;
+ }
+ assertEquals(3, tests);
+ }
+
+ //make sure that table cells and rows are properly marked to
+ //yield \t and \n at the appropriate places
+ @Test
+ public void testSpacesInBodyContentHandler() throws Exception {
+ Parser p = new AutoDetectParser();
+ InputStream stream = null;
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ ContentHandler handler = new BodyContentHandler(-1);
+ ParseContext ctx = new ParseContext();
+ ctx.set(Parser.class, p);
+ try {
+ stream = getResourceAsStream(TEST_FILE1);
+ p.parse(stream, handler, metadata, ctx);
+ } finally {
+ stream.close();
+ }
+ String s = handler.toString();
+ assertContains("0\t2.3\t2.4\tlorem", s);
+ assertContains("tempor\n", s);
+ }
+
+ //test what happens if the user forgets to pass in a parser via context
+ //to handle embedded documents
+ @Test
+ public void testNotAddingEmbeddedParserToParseContext() throws Exception {
+ Parser p = new AutoDetectParser();
+
+ InputStream is = getResourceAsStream(TEST_FILE1);
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ ContentHandler handler = new ToXMLContentHandler();
+ p.parse(is, handler, metadata, new ParseContext());
+ String xml = handler.toString();
+ //just includes headers for embedded documents
+ assertContains("<table name=\"my_table1\"><thead><tr>", xml);
+ assertContains("<td><span type=\"blob\" column_name=\"BYTES_COL\"
row_number=\"0\"><div class=\"package-entry\"><h1>BYTES_COL_0.doc</h1>", xml);
+ //but no other content
+ assertNotContained("dog", xml);
+ assertNotContained("alt=\"image1.png\"", xml);
+ //second embedded doc's image tag
+ assertNotContained("alt=\"A description...\"", xml);
+ }
+
+ @Test
+ public void testRecursiveParserWrapper() throws Exception {
+ Parser p = new AutoDetectParser();
+
+ RecursiveParserWrapper wrapper =
+ new RecursiveParserWrapper(p, new BasicContentHandlerFactory(
+ BasicContentHandlerFactory.HANDLER_TYPE.BODY, -1));
+ InputStream is = getResourceAsStream(TEST_FILE1);
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ wrapper.parse(is, new BodyContentHandler(-1), metadata, new
ParseContext());
+ List<Metadata> metadataList = wrapper.getMetadata();
+ int i = 0;
+ assertEquals(5, metadataList.size());
+ //make sure the \t are inserted in a body handler
+
+ String table =
metadataList.get(0).get(RecursiveParserWrapper.TIKA_CONTENT);
+ assertContains("0\t2.3\t2.4\tlorem", table);
+ assertContains("æ®ææ¯é¡¿å¤§å¦", table);
+
+ //make sure the \n is inserted
+ String table2 =
metadataList.get(0).get(RecursiveParserWrapper.TIKA_CONTENT);
+ assertContains("do eiusmod tempor\n", table2);
+
+ assertContains("The quick brown fox",
metadataList.get(2).get(RecursiveParserWrapper.TIKA_CONTENT));
+ assertContains("The quick brown fox",
metadataList.get(4).get(RecursiveParserWrapper.TIKA_CONTENT));
+
+ //confirm .doc was added to blob
+ assertEquals("testSqlite3b.db/BYTES_COL_0.doc/image1.png",
metadataList.get(1).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH));
+ }
+
+ @Test
+ public void testParserContainerExtractor() throws Exception {
+ //There should be 6 embedded documents:
+ //2x tables -- UTF-8 csv representations of the tables
+ //2x word files, one doc and one docx
+ //2x png files, the same image embedded in each of the doc and docx
+
+ ParserContainerExtractor ex = new ParserContainerExtractor();
+ ByteCopyingHandler byteCopier = new ByteCopyingHandler();
+ InputStream is = getResourceAsStream(TEST_FILE1);
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ ex.extract(TikaInputStream.get(is), ex, byteCopier);
+
+ assertEquals(4, byteCopier.bytes.size());
+ String[] strings = new String[4];
+ for (int i = 1; i < byteCopier.bytes.size(); i++) {
+ byte[] byteArr = byteCopier.bytes.get(i);
+ String s = new String(byteArr, 0, Math.min(byteArr.length,1000),
"UTF-8");
+ strings[i] = s;
+ }
+ byte[] oleBytes = new byte[]{
+ (byte)-48,
+ (byte)-49,
+ (byte)17,
+ (byte)-32,
+ (byte)-95,
+ (byte)-79,
+ (byte)26,
+ (byte)-31,
+ (byte)0,
+ (byte)0,
+ };
+ //test OLE
+ for (int i = 0; i < 10; i++) {
+ assertEquals(oleBytes[i], byteCopier.bytes.get(0)[i]);
+ }
+ assertContains("PNG", strings[1]);
+ assertContains("PK", strings[2]);
+ assertContains("PNG", strings[3]);
+ }
+
+ //This confirms that reading the stream twice is not
+ //quadrupling the number of attachments.
+ @Test
+ public void testInputStreamReset() throws Exception {
+ //There should be 8 embedded documents:
+ //4x word files, two docs and two docxs
+ //4x png files, the same image embedded in each of the doc and docx
+
+ ParserContainerExtractor ex = new ParserContainerExtractor();
+ InputStreamResettingHandler byteCopier = new
InputStreamResettingHandler();
+ InputStream is = getResourceAsStream(TEST_FILE1);
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ ex.extract(TikaInputStream.get(is), ex, byteCopier);
+ is.reset();
+ assertEquals(8, byteCopier.bytes.size());
+ }
+
+
+
+ public static class InputStreamResettingHandler implements
EmbeddedResourceHandler {
+
+ public List<byte[]> bytes = new ArrayList<byte[]>();
+
+ @Override
+ public void handle(String filename, MediaType mediaType,
+ InputStream stream) {
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+ if (! stream.markSupported()) {
+ stream = TikaInputStream.get(stream);
+ }
+ stream.mark(1000000);
+ try {
+ IOUtils.copy(stream, os);
+ bytes.add(os.toByteArray());
+ stream.reset();
+ //now try again
+ os.reset();
+ IOUtils.copy(stream, os);
+ bytes.add(os.toByteArray());
+ stream.reset();
+ } catch (IOException e) {
+ //swallow
+ }
+ }
+ }
+
+ //code used for creating the test file
+/*
+ private Connection getConnection(String dbFileName) throws Exception {
+ File testDirectory = new
File(this.getClass().getResource("/test-documents").toURI());
+ System.out.println("Writing to: " + testDirectory.getAbsolutePath());
+ File testDB = new File(testDirectory, dbFileName);
+ Connection c = null;
+ try {
+ Class.forName("org.sqlite.JDBC");
+ c = DriverManager.getConnection("jdbc:sqlite:" +
testDB.getAbsolutePath());
+ } catch ( Exception e ) {
+ System.err.println( e.getClass().getName() + ": " + e.getMessage()
);
+ System.exit(0);
+ }
+ return c;
+ }
+
+ @Test
+ public void testCreateDB() throws Exception {
+ Connection c = getConnection("testSQLLite3b.db");
+ Statement st = c.createStatement();
+ String sql = "DROP TABLE if exists my_table1";
+ st.execute(sql);
+ sql = "CREATE TABLE my_table1 (" +
+ "INT_COL INT PRIMARY KEY, "+
+ "FLOAT_COL FLOAT, " +
+ "DOUBLE_COL DOUBLE, " +
+ "CHAR_COL CHAR(30), "+
+ "VARCHAR_COL VARCHAR(30), "+
+ "BOOLEAN_COL BOOLEAN,"+
+ "DATE_COL DATE,"+
+ "TIME_STAMP_COL TIMESTAMP,"+
+ "BYTES_COL BYTES" +
+ ")";
+ st.execute(sql);
+ sql = "insert into my_table1 (INT_COL, FLOAT_COL, DOUBLE_COL,
CHAR_COL, " +
+ "VARCHAR_COL, BOOLEAN_COL, DATE_COL, TIME_STAMP_COL,
BYTES_COL) " +
+ "values (?,?,?,?,?,?,?,?,?)";
+ SimpleDateFormat f = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ java.util.Date d = f.parse("2015-01-03 15:17:03");
+ System.out.println(d.getTime());
+ long d1Long = 1420229823000L;// 2015-01-02 15:17:03
+ long d2Long = 1420316223000L;// 2015-01-03 15:17:03
+ PreparedStatement ps = c.prepareStatement(sql);
+ ps.setInt(1, 0);
+ ps.setFloat(2, 2.3f);
+ ps.setDouble(3, 2.4d);
+ ps.setString(4, "lorem");
+ ps.setString(5, "æ®ææ¯é¡¿å¤§å¦");
+ ps.setBoolean(6, true);
+ ps.setString(7, "2015-01-02");
+ ps.setString(8, "2015-01-03 15:17:03");
+// ps.setClob(9, new StringReader(clobString));
+ ps.setBytes(9,
getByteArray(this.getClass().getResourceAsStream("/test-documents/testWORD_1img.doc")));//contains
"quick brown fox"
+ ps.executeUpdate();
+ ps.clearParameters();
+
+ ps.setInt(1, 1);
+ ps.setFloat(2, 4.6f);
+ ps.setDouble(3, 4.8d);
+ ps.setString(4, "dolor");
+ ps.setString(5, "sit");
+ ps.setBoolean(6, false);
+ ps.setString(7, "2015-01-04");
+ ps.setString(8, "2015-01-03 15:17:03");
+ //ps.setClob(9, new StringReader("consectetur adipiscing elit"));
+ ps.setBytes(9,
getByteArray(this.getClass().getResourceAsStream("/test-documents/testWORD_1img.docx")));//contains
"The end!"
+
+ ps.executeUpdate();
+
+ //build table2
+ sql = "DROP TABLE if exists my_table2";
+ st.execute(sql);
+
+ sql = "CREATE TABLE my_table2 (" +
+ "INT_COL2 INT PRIMARY KEY, "+
+ "VARCHAR_COL2 VARCHAR(64))";
+ st.execute(sql);
+ sql = "INSERT INTO my_table2 values(0,'sed, do eiusmod tempor')";
+ st.execute(sql);
+ sql = "INSERT INTO my_table2 values(1,'incididunt \nut labore')";
+ st.execute(sql);
+
+ c.close();
+ }
+
+ private byte[] getByteArray(InputStream is) throws IOException {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ byte[] buff = new byte[1024];
+ for (int bytesRead; (bytesRead = is.read(buff)) != -1;) {
+ bos.write(buff, 0, bytesRead);
+ }
+ return bos.toByteArray();
+ }
+
+*/
+
+
+}