This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 479ae0bab TIKA-3836 -- add initial JDBCEmitter
479ae0bab is described below

commit 479ae0bab13d16d38a55247d1684e0e1bd74099b
Author: tballison <[email protected]>
AuthorDate: Fri Aug 12 15:15:36 2022 -0400

    TIKA-3836 -- add initial JDBCEmitter
---
 .../java/org/apache/tika/config/ConfigBase.java    |  16 +-
 tika-eval/tika-eval-app/pom.xml                    |   1 -
 tika-parent/pom.xml                                |   8 +-
 tika-pipes/tika-emitters/pom.xml                   |   1 +
 tika-pipes/tika-emitters/tika-emitter-jdbc/pom.xml |  28 +++
 .../tika/pipes/emitter/jdbc/JDBCEmitter.java       | 267 +++++++++++++++++++++
 .../tika/pipes/emitter/jdbc/JDBCEmitterTest.java   | 138 +++++++++++
 .../tika-config-jdbc-emitter-existing-table.xml    |  44 ++++
 .../resources/configs/tika-config-jdbc-emitter.xml |  51 ++++
 .../tika-pipes-iterator-jdbc/pom.xml               |   1 -
 10 files changed, 548 insertions(+), 7 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java 
b/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
index 04c6454a5..0d145c18a 100644
--- a/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
+++ b/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
@@ -331,11 +331,16 @@ public abstract class ConfigBase {
                     key = m.getNamedItem("from").getTextContent();
                 } else if (m.getNamedItem("key") != null) {
                     key = m.getNamedItem("key").getTextContent();
+                } else if (m.getNamedItem("k") != null) {
+                    key = m.getNamedItem("k").getTextContent();
                 }
+
                 if (m.getNamedItem("to") != null) {
                     value = m.getNamedItem("to").getTextContent();
                 } else if (m.getNamedItem("value") != null) {
                     value = m.getNamedItem("value").getTextContent();
+                } else if (m.getNamedItem("v") != null) {
+                    value = m.getNamedItem("v").getTextContent();
                 }
                 if (key == null) {
                     throw new TikaConfigException("must specify a 'key' or 
'from' value in a map " +
@@ -363,9 +368,14 @@ public abstract class ConfigBase {
         for (int i = 0; i < nodeList.getLength(); i++) {
             Node n = nodeList.item(i);
             if (n.getNodeType() == 1) {
-                if (n.hasAttributes() && 
n.getAttributes().getNamedItem("from") != null &&
-                        n.getAttributes().getNamedItem("to") != null) {
-                    return true;
+                if (n.hasAttributes()) {
+                    if (n.getAttributes().getNamedItem("from") != null &&
+                            n.getAttributes().getNamedItem("to") != null) {
+                        return true;
+                    } else if (n.getAttributes().getNamedItem("k") != null &&
+                            n.getAttributes().getNamedItem("v") != null) {
+                        return true;
+                    }
                 }
             }
         }
diff --git a/tika-eval/tika-eval-app/pom.xml b/tika-eval/tika-eval-app/pom.xml
index 9472f236d..8bcaea796 100644
--- a/tika-eval/tika-eval-app/pom.xml
+++ b/tika-eval/tika-eval-app/pom.xml
@@ -53,7 +53,6 @@
     <dependency>
       <groupId>com.h2database</groupId>
       <artifactId>h2</artifactId>
-      <version>${h2.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.poi</groupId>
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index ad2b808ad..654c99cd8 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -314,7 +314,6 @@
     <fakeload.version>0.4.0</fakeload.version>
     <geoapi.version>3.0.1</geoapi.version>
     <guava.version>31.1-jre</guava.version>
-    <h2.version>2.1.214</h2.version>
     <httpcomponents.version>4.5.13</httpcomponents.version>
     <httpcore.version>4.4.15</httpcore.version>
     <!-- versions greater than 62.2 are not compatible with icu4j handler in 
lucene 8.11.1 -->
@@ -322,7 +321,7 @@
     <imageio.version>1.4.0</imageio.version>
     <jackcess.version>4.0.1</jackcess.version>
     <jackcess.encrypt.version>4.0.1</jackcess.encrypt.version>
-    <jackrabbit.version>2.21.12</jackrabbit.version>
+    <jackrabbit.version>2.21.11</jackrabbit.version>
     <jackson.version>2.13.3</jackson.version>
     <javax.annotation.version>1.3.2</javax.annotation.version>
     <javax.jcr.version>2.0</javax.jcr.version>
@@ -476,6 +475,11 @@
         <artifactId>protobuf-java</artifactId>
         <version>3.21.5</version>
       </dependency>
+      <dependency>
+        <groupId>com.h2database</groupId>
+        <artifactId>h2</artifactId>
+        <version>2.1.214</version>
+      </dependency>
       <dependency>
         <groupId>com.ibm.icu</groupId>
         <artifactId>icu4j</artifactId>
diff --git a/tika-pipes/tika-emitters/pom.xml b/tika-pipes/tika-emitters/pom.xml
index c5777b20b..ea8e013aa 100644
--- a/tika-pipes/tika-emitters/pom.xml
+++ b/tika-pipes/tika-emitters/pom.xml
@@ -39,6 +39,7 @@
     <module>tika-emitter-opensearch</module>
     <module>tika-emitter-gcs</module>
     <module>tika-emitter-az-blob</module>
+    <module>tika-emitter-jdbc</module>
   </modules>
 
   <scm>
diff --git a/tika-pipes/tika-emitters/tika-emitter-jdbc/pom.xml 
b/tika-pipes/tika-emitters/tika-emitter-jdbc/pom.xml
new file mode 100644
index 000000000..84ef9bc77
--- /dev/null
+++ b/tika-pipes/tika-emitters/tika-emitter-jdbc/pom.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xmlns="http://maven.apache.org/POM/4.0.0";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+  <parent>
+    <artifactId>tika-emitters</artifactId>
+    <groupId>org.apache.tika</groupId>
+    <version>2.4.2-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>tika-emitter-jdbc</artifactId>
+  <name>Apache Tika jdbc emitter</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.h2database</groupId>
+      <artifactId>h2</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>
\ No newline at end of file
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java
 
b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java
new file mode 100644
index 000000000..85b3449e2
--- /dev/null
+++ 
b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/main/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitter.java
@@ -0,0 +1,267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.emitter.jdbc;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.sql.Types;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.emitter.AbstractEmitter;
+import org.apache.tika.pipes.emitter.TikaEmitterException;
+import org.apache.tika.utils.StringUtils;
+
+/**
+ * This is only an initial, basic implementation of an emitter for JDBC.
+ * For now, it only processes the first metadata object in the list.
+ * <p>
+ * Later implementations may handle embedded files along the lines of
+ * the OpenSearch/Solr emitters.
+ */
+public class JDBCEmitter extends AbstractEmitter implements Initializable, 
Closeable {
+
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(JDBCEmitter.class);
+    private static volatile boolean INITIALIZED = false;
+    //the "write" lock is used to make the connection and to configure the 
insertstatement
+    //the "read" lock is used for preparing the insert and inserting
+    private static ReadWriteLock READ_WRITE_LOCK = new 
ReentrantReadWriteLock();
+    private String connectionString;
+    private String insert;
+    private String createTable;
+    private String idColumn;
+    private Map<String, String> keys;
+    private Connection connection;
+    private PreparedStatement insertStatement;
+
+    @Field
+    public void setCreateTable(String createTable) {
+        this.createTable = createTable;
+    }
+
+    @Field
+    public void setInsert(String insert) {
+        this.insert = insert;
+    }
+
+    @Field
+    public void setIdColumn(String idColumn) {
+        this.idColumn = idColumn;
+    }
+
+    @Field
+    public void setConnection(String connectionString) {
+        this.connectionString = connectionString;
+    }
+
+    /**
+     * The implementation of keys should be a LinkedHashMap because
+     * order matters!
+     * <p>
+     * Key is the name of the metadata field, value is the type of column:
+     * boolean, string, int, long
+     *
+     * @param keys
+     */
+    @Field
+    public void setKeys(Map<String, String> keys) {
+        this.keys = keys;
+    }
+
+    @Override
+    public void emit(String emitKey, List<Metadata> metadataList)
+            throws IOException, TikaEmitterException {
+        if (metadataList == null || metadataList.size() < 1) {
+            return;
+        }
+        //we aren't currently batching inserts
+        //because of risk of crashing in pipes handler.
+        READ_WRITE_LOCK.readLock().lock();
+        try {
+            try {
+                insertStatement.clearParameters();
+                int i = 0;
+                insertStatement.setString(++i, emitKey);
+                for (Map.Entry<String, String> e : keys.entrySet()) {
+                    updateValue(insertStatement, ++i, e.getKey(), 
e.getValue(), metadataList);
+                }
+                insertStatement.execute();
+            } finally {
+                READ_WRITE_LOCK.readLock().unlock();
+            }
+        } catch (SQLException e) {
+            try {
+                LOGGER.warn("problem during emit; going to try to reconnect", 
e);
+                //something went wrong
+                //try to reconnect
+                reconnect();
+            } catch (SQLException ex) {
+                throw new TikaEmitterException("Couldn't reconnect!", ex);
+            }
+            throw new TikaEmitterException("couldn't emit", e);
+        }
+    }
+
+    private void reconnect() throws SQLException {
+        SQLException ex = null;
+        try {
+            READ_WRITE_LOCK.writeLock().lock();
+            for (int i = 0; i < 3; i++) {
+                try {
+                    connection = DriverManager.getConnection(connectionString);
+                    insertStatement = connection.prepareStatement(insert);
+                    return;
+                } catch (SQLException e) {
+                    LOGGER.warn("couldn't reconnect to db", e);
+                    ex = e;
+                }
+            }
+        } finally {
+            READ_WRITE_LOCK.writeLock().unlock();
+        }
+        throw ex;
+    }
+
+    private void updateValue(PreparedStatement insertStatement, int i, String 
key, String type,
+                             List<Metadata> metadataList) throws SQLException {
+        //for now we're only taking the info from the container document.
+        Metadata metadata = metadataList.get(0);
+        String val = metadata.get(key);
+        switch (type) {
+            case "string":
+                updateString(insertStatement, i, val);
+                break;
+            case "bool":
+            case "boolean":
+                updateBoolean(insertStatement, i, val);
+                break;
+            case "int":
+            case "integer":
+                updateInteger(insertStatement, i, val);
+                break;
+            case "long":
+                updateLong(insertStatement, i, val);
+                break;
+            default:
+                throw new IllegalArgumentException("Can only process: 
'string', 'boolean', 'int' " +
+                        "and 'long' types so far.  Please open a ticket to 
request other types");
+        }
+    }
+
+    private void updateLong(PreparedStatement insertStatement, int i, String 
val)
+            throws SQLException {
+        if (StringUtils.isBlank(val)) {
+            insertStatement.setNull(i, Types.BIGINT);
+        } else {
+            insertStatement.setLong(i, Long.parseLong(val));
+        }
+    }
+
+    private void updateInteger(PreparedStatement insertStatement, int i, 
String val)
+            throws SQLException {
+        if (StringUtils.isBlank(val)) {
+            insertStatement.setNull(i, Types.INTEGER);
+        } else {
+            insertStatement.setInt(i, Integer.parseInt(val));
+        }
+    }
+
+    private void updateBoolean(PreparedStatement insertStatement, int i, 
String val)
+            throws SQLException {
+        if (StringUtils.isBlank(val)) {
+            insertStatement.setNull(i, Types.BOOLEAN);
+        } else {
+            insertStatement.setBoolean(i, Boolean.parseBoolean(val));
+        }
+    }
+
+    private void updateString(PreparedStatement insertStatement, int i, String 
val)
+            throws SQLException {
+        if (val == null) {
+            insertStatement.setNull(i, Types.VARCHAR);
+        } else {
+            insertStatement.setString(i, val);
+        }
+    }
+
+    @Override
+    public void initialize(Map<String, Param> params) throws 
TikaConfigException {
+
+        try {
+            connection = DriverManager.getConnection(connectionString);
+        } catch (SQLException e) {
+            throw new TikaConfigException("couldn't open connection: " + 
connectionString, e);
+        }
+        try {
+            READ_WRITE_LOCK.writeLock().lock();
+            if (!INITIALIZED && !StringUtils.isBlank(createTable)) {
+                try (Statement st = connection.createStatement()) {
+                    st.execute(createTable);
+                    INITIALIZED = true;
+                } catch (SQLException e) {
+                    throw new TikaConfigException("can't create table", e);
+                }
+            }
+        } finally {
+            READ_WRITE_LOCK.writeLock().unlock();
+        }
+        try {
+            insertStatement = connection.prepareStatement(insert);
+        } catch (SQLException e) {
+            throw new TikaConfigException("can't create insert statement", e);
+        }
+
+    }
+
+    @Override
+    public void checkInitialization(InitializableProblemHandler problemHandler)
+            throws TikaConfigException {
+        //require
+    }
+
+    /*
+        TODO: This is currently not ever called.  We need rework the 
PipesParser
+        to ensure that emitters are closed cleanly.
+     */
+    /**
+     * @throws IOException
+     */
+    @Override
+    public void close() throws IOException {
+        try {
+            connection.close();
+        } catch (SQLException e) {
+            throw new IOException(e);
+        }
+    }
+}
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitterTest.java
 
b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitterTest.java
new file mode 100644
index 000000000..15ccc7e64
--- /dev/null
+++ 
b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/java/org/apache/tika/pipes/emitter/jdbc/JDBCEmitterTest.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.emitter.jdbc;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.pipes.emitter.Emitter;
+import org.apache.tika.pipes.emitter.EmitterManager;
+
+public class JDBCEmitterTest {
+
+    @Test
+    public void testBasic(@TempDir Path tmpDir) throws Exception {
+        Files.createDirectories(tmpDir.resolve("db"));
+        Path dbDir = tmpDir.resolve("db/h2");
+        Path config = tmpDir.resolve("tika-config.xml");
+        String connectionString = "jdbc:h2:file:" + dbDir.toAbsolutePath();
+
+        writeConfig("/configs/tika-config-jdbc-emitter.xml",
+                connectionString, config);
+
+        EmitterManager emitterManager = EmitterManager.load(config);
+        Emitter emitter = emitterManager.getEmitter();
+        List<String[]> data = new ArrayList<>();
+        data.add(new String[]{"k1", "true", "k2", "some string1", "k3", "4", 
"k4", "100"});
+        data.add(new String[]{"k1", "false", "k2", "some string2", "k3", "5", 
"k4", "101"});
+        data.add(new String[]{"k1", "true", "k2", "some string3", "k3", "6", 
"k4", "102"});
+        int id = 0;
+        for (String[] d : data) {
+            emitter.emit("id" + id++, m(d));
+        }
+
+        try (Connection connection = 
DriverManager.getConnection(connectionString)) {
+            try (Statement st = connection.createStatement()) {
+                try (ResultSet rs = st.executeQuery("select * from test")) {
+                    int rows = 0;
+                    while (rs.next()) {
+                        assertEquals("id" + rows, rs.getString(1));
+                        assertEquals(rows % 2 == 0, rs.getBoolean(2));
+                        assertEquals("some string" + (rows + 1), 
rs.getString(3));
+                        assertEquals(rows + 4, rs.getInt(4));
+                        assertEquals(100 + rows, rs.getLong(5));
+                        rows++;
+                    }
+                }
+            }
+        }
+    }
+
+    @Test
+    public void testTableExists(@TempDir Path tmpDir) throws Exception {
+        String createTable = "create table test (path varchar(512) primary 
key," +
+                "k1 boolean,k2 varchar(512),k3 integer,k4 long);";
+
+        Files.createDirectories(tmpDir.resolve("db"));
+        Path dbDir = tmpDir.resolve("db/h2");
+        Path config = tmpDir.resolve("tika-config.xml");
+        String connectionString = "jdbc:h2:file:" + dbDir.toAbsolutePath();
+        writeConfig("/configs/tika-config-jdbc-emitter-existing-table.xml",
+                connectionString, config);
+
+        try (Connection connection = 
DriverManager.getConnection(connectionString)) {
+            connection.createStatement().execute(createTable);
+        }
+        EmitterManager emitterManager = EmitterManager.load(config);
+        Emitter emitter = emitterManager.getEmitter();
+        List<String[]> data = new ArrayList<>();
+        data.add(new String[]{"k1", "true", "k2", "some string1", "k3", "4", 
"k4", "100"});
+        data.add(new String[]{"k1", "false", "k2", "some string2", "k3", "5", 
"k4", "101"});
+        data.add(new String[]{"k1", "true", "k2", "some string3", "k3", "6", 
"k4", "102"});
+        int id = 0;
+        for (String[] d : data) {
+            emitter.emit("id" + id++, m(d));
+        }
+
+        try (Connection connection = 
DriverManager.getConnection(connectionString)) {
+            try (Statement st = connection.createStatement()) {
+                try (ResultSet rs = st.executeQuery("select * from test")) {
+                    int rows = 0;
+                    while (rs.next()) {
+                        assertEquals("id" + rows, rs.getString(1));
+                        assertEquals(rows % 2 == 0, rs.getBoolean(2));
+                        assertEquals("some string" + (rows + 1), 
rs.getString(3));
+                        assertEquals(rows + 4, rs.getInt(4));
+                        assertEquals(100 + rows, rs.getLong(5));
+                        rows++;
+                    }
+                }
+            }
+        }
+
+    }
+
+    private void writeConfig(String srcConfig, String dbDir, Path config) 
throws IOException {
+        String xml = IOUtils.resourceToString(srcConfig, 
StandardCharsets.UTF_8);
+        xml = xml.replaceAll("CONNECTION_STRING", dbDir);
+        Files.write(config, xml.getBytes(StandardCharsets.UTF_8));
+    }
+
+    private List<Metadata> m(String... strings) {
+        Metadata metadata = new Metadata();
+        for (int i = 0; i < strings.length; i++) {
+            metadata.set(strings[i], strings[++i]);
+        }
+        return Collections.singletonList(metadata);
+    }
+}
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-existing-table.xml
 
b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-existing-table.xml
new file mode 100644
index 000000000..040d98386
--- /dev/null
+++ 
b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-existing-table.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<properties>
+  <emitters>
+    <emitter class="org.apache.tika.pipes.emitter.jdbc.JDBCEmitter">
+      <params>
+        <name>jdbc</name>
+        <connection>CONNECTION_STRING</connection>
+        <idColumn>path</idColumn>
+        <insert>insert into test (path, k1, k2, k3, k4) values (?,?,?,?,?);
+        </insert>
+        <!-- these are the keys in the metadata object.
+            The emitKey is added as the first element in the insert statement.
+            Then the these values are added in order.
+            They must be in the order of the insert statement.
+            The emit key is added as
+            -->
+        <keys>
+          <key k="k1" v="boolean"/>
+          <key k="k2" v="string"/>
+          <key k="k3" v="int"/>
+          <key k="k4" v="long"/>
+        </keys>
+      </params>
+    </emitter>
+  </emitters>
+</properties>
\ No newline at end of file
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
 
b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
new file mode 100644
index 000000000..04206740b
--- /dev/null
+++ 
b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<properties>
+  <emitters>
+    <emitter class="org.apache.tika.pipes.emitter.jdbc.JDBCEmitter">
+      <params>
+        <name>jdbc</name>
+        <connection>CONNECTION_STRING</connection>
+        <createTable>create table test
+          (path varchar(512) primary key,
+          k1 boolean,
+          k2 varchar(512),
+          k3 integer,
+          k4 long);
+        </createTable>
+        <idColumn>path</idColumn>
+        <insert>insert into test (path, k1, k2, k3, k4) values (?,?,?,?,?);
+        </insert>
+        <!-- these are the keys in the metadata object.
+            The emitKey is added as the first element in the insert statement.
+            Then the these values are added in order.
+            They must be in the order of the insert statement.
+            The emit key is added as
+            -->
+        <keys>
+          <key k="k1" v="boolean"/>
+          <key k="k2" v="string"/>
+          <key k="k3" v="int"/>
+          <key k="k4" v="long"/>
+        </keys>
+      </params>
+    </emitter>
+  </emitters>
+</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/pom.xml 
b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/pom.xml
index da762a062..2552fce6f 100644
--- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/pom.xml
+++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/pom.xml
@@ -41,7 +41,6 @@
     <dependency>
       <groupId>com.h2database</groupId>
       <artifactId>h2</artifactId>
-      <version>${h2.version}</version>
       <scope>test</scope>
     </dependency>
   </dependencies>

Reply via email to