jena git commit: JENA-1305: Added support for ElasticSearch V 5.2.1

osma Tue, 28 Mar 2017 07:51:28 -0700

Repository: jena
Updated Branches:
  refs/heads/master 347d7764d -> 1c1325c56



JENA-1305: Added support for ElasticSearch V 5.2.1


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/1c1325c5
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/1c1325c5
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/1c1325c5

Branch: refs/heads/master
Commit: 1c1325c5646f3fd908bf56db0480759a22dcd68c
Parents: 347d776
Author: Anuj Kumar <[email protected]>
Authored: Wed Mar 8 12:06:11 2017 +0100
Committer: Osma Suominen <[email protected]>
Committed: Tue Mar 28 17:49:36 2017 +0300

----------------------------------------------------------------------
 jena-parent/pom.xml                             |  14 +
 jena-text/pom.xml                               |  93 +++-
 .../main/java/examples/JenaESTextExample.java   |  99 ++++
 .../org/apache/jena/query/text/ESSettings.java  | 177 ++++++++
 .../jena/query/text/TextDatasetFactory.java     |  25 ++
 .../org/apache/jena/query/text/TextIndexES.java | 448 +++++++++++++++++++
 .../query/text/assembler/TextAssembler.java     |   2 +
 .../text/assembler/TextIndexESAssembler.java    | 114 +++++
 .../jena/query/text/assembler/TextVocab.java    |   8 +
 jena-text/src/main/resources/data-es.ttl        |  46 ++
 jena-text/src/main/resources/text-config-es.ttl |  64 +++
 .../org/apache/jena/query/text/TS_Text.java     |  14 +-
 .../apache/jena/query/text/it/BaseESTest.java   | 111 +++++
 .../jena/query/text/it/TextIndexESIT.java       | 306 +++++++++++++
 14 files changed, 1511 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-parent/pom.xml
----------------------------------------------------------------------
diff --git a/jena-parent/pom.xml b/jena-parent/pom.xml
index bd18208..59c69ff 100644
--- a/jena-parent/pom.xml
+++ b/jena-parent/pom.xml
@@ -72,6 +72,7 @@
 
     <ver.commons-codec>1.10</ver.commons-codec>
     <ver.lucene>6.4.1</ver.lucene>
+    <ver.elasticsearch>5.2.1</ver.elasticsearch>
     <ver.spatial4j>0.6</ver.spatial4j>
 
     <ver.mockito>1.9.5</ver.mockito>
@@ -275,6 +276,19 @@
         <version>${ver.spatial4j}</version>
       </dependency>
 
+      <!-- ES dependencies-->
+      <dependency>
+        <groupId>org.elasticsearch</groupId>
+        <artifactId>elasticsearch</artifactId>
+        <version>${ver.elasticsearch}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.elasticsearch.client</groupId>
+        <artifactId>transport</artifactId>
+        <version>${ver.elasticsearch}</version>
+      </dependency>
+
       <!-- Logging -->
       <dependency>
         <groupId>org.slf4j</groupId>

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/pom.xml
----------------------------------------------------------------------
diff --git a/jena-text/pom.xml b/jena-text/pom.xml
index 37010f1..03e90a6 100644
--- a/jena-text/pom.xml
+++ b/jena-text/pom.xml
@@ -81,6 +81,32 @@
       <artifactId>lucene-queryparser</artifactId>
     </dependency>
 
+      <dependency>
+          <groupId>org.elasticsearch</groupId>
+          <artifactId>elasticsearch</artifactId>
+      </dependency>
+
+      <dependency>
+          <groupId>org.elasticsearch.client</groupId>
+          <artifactId>transport</artifactId>
+      </dependency>
+
+      <dependency>
+          <groupId>junit</groupId>
+          <artifactId>junit</artifactId>
+      </dependency>
+
+      <dependency>
+          <groupId>org.apache.logging.log4j</groupId>
+          <artifactId>log4j-api</artifactId>
+          <version>2.7</version>
+      </dependency>
+      <dependency>
+          <groupId>org.apache.logging.log4j</groupId>
+          <artifactId>log4j-core</artifactId>
+          <version>2.7</version>
+      </dependency>
+
   </dependencies>
 
   <build>
@@ -112,11 +138,72 @@
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-plugin</artifactId>
         <configuration>
-          <includes>
-            <include>**/TS_*.java</include>
-          </includes>
+            <!-- Skip the default running of this plug-in (or everything is 
run twice...see below) -->
+            <skip>true</skip>
         </configuration>
+          <executions>
+              <execution>
+                  <id>unit-tests</id>
+                  <phase>test</phase>
+                  <goals>
+                      <goal>test</goal>
+                  </goals>
+                  <configuration>
+                      <skip>false</skip>
+                      <includes>
+                          <include>**/TS_*.java</include>
+                      </includes>
+                      <excludes>
+                          <exclude>**/*IT.java</exclude>
+                      </excludes>
+                  </configuration>
+              </execution>
+              <execution>
+                  <id>integration-tests</id>
+                  <phase>integration-test</phase>
+                  <goals>
+                      <goal>test</goal>
+                  </goals>
+                  <configuration>
+                      <skip>false</skip>
+                      <includes>
+                          <include>**/*IT.java</include>
+                      </includes>
+                  </configuration>
+              </execution>
+          </executions>
       </plugin>
+        <plugin>
+            <groupId>com.github.alexcojocaru</groupId>
+            <artifactId>elasticsearch-maven-plugin</artifactId>
+            <!-- REPLACE THE FOLLOWING WITH THE PLUGIN VERSION YOU NEED -->
+            <version>5.2</version>
+            <configuration>
+                <clusterName>elasticsearch</clusterName>
+                <transportPort>9500</transportPort>
+                <httpPort>9400</httpPort>
+            </configuration>
+            <executions>
+                <!--
+                    The elasticsearch maven plugin goals are by default bound 
to the
+                    pre-integration-test and post-integration-test phases
+                -->
+                <execution>
+                    <id>start-elasticsearch</id>
+                    <phase>pre-integration-test</phase>
+                    <goals>
+                        <goal>runforked</goal>
+                    </goals>
+                </execution>
+                <execution>
+                    <id>stop-elasticsearch</id>
+                    <phase>post-integration-test</phase>
+                    <goals>
+                        <goal>stop</goal>
+                    </goals>
+                </execution>
+            </executions>
+        </plugin>
 
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/main/java/examples/JenaESTextExample.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/examples/JenaESTextExample.java 
b/jena-text/src/main/java/examples/JenaESTextExample.java
new file mode 100644
index 0000000..3eb3042
--- /dev/null
+++ b/jena-text/src/main/java/examples/JenaESTextExample.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package examples;
+
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.query.*;
+import org.apache.jena.sparql.util.QueryExecUtils;
+
+/**
+ * Simple example class to test the {@link 
org.apache.jena.query.text.assembler.TextIndexESAssembler}
+ * For this class to work properly, an elasticsearch node should be up and 
running, otherwise it will fail.
+ * You can find the details of downloading and running an ElasticSearch 
version here: https://www.elastic.co/downloads/past-releases/elasticsearch-5-2-1
+ * Unzip the file in your favourite directory and then execute the appropriate 
file under the bin directory.
+ * It will take less than a minute.
+ * In order to visualize what is written in ElasticSearch, you need to 
download and run Kibana: https://www.elastic.co/downloads/kibana
+ * To run kibana, just go to the bin directory and execute the appropriate 
file.
+ * We need to resort to this mechanism as ElasticSearch has stopped supporting 
embedded ElasticSearch.
+ *
+ * In addition we cant have it in the test package because ElasticSearch
+ * detects the thread origin and stops us from instantiating a client.
+ */
+public class JenaESTextExample {
+
+    public static void main(String[] args) {
+
+        queryData(loadData(createAssembler()));
+    }
+
+
+    private static Dataset createAssembler() {
+        String assemblerFile = "text-config-es.ttl";
+        Dataset ds = DatasetFactory.assemble(assemblerFile,
+                "http://localhost/jena_example/#text_dataset";) ;
+        return ds;
+    }
+
+    private static Dataset loadData(Dataset ds) {
+        JenaTextExample1.loadData(ds, "data-es.ttl");
+        return ds;
+    }
+
+    /**
+     * Query Data
+     * @param ds
+     */
+    private static void queryData(Dataset ds) {
+//        JenaTextExample1.queryData(ds);
+        queryDataWithoutProperty(ds);
+
+
+    }
+
+    public static void queryDataWithoutProperty(Dataset dataset)
+    {
+
+
+        String pre = StrUtils.strjoinNL
+                ( "PREFIX : <http://example/>"
+                        , "PREFIX text: <http://jena.apache.org/text#>"
+                        , "PREFIX rdfs: 
<http://www.w3.org/2000/01/rdf-schema#>") ;
+
+        String qs = StrUtils.strjoinNL
+                ( "SELECT * "
+//                        , " { ?s text:query (rdfs:comment 'this' 'lang:en') 
;"
+                        , " { ?s text:query ('this' 'lang:en*') ;"
+//                        , " { ?s text:query ('this' 'lang:en-GB') ;"
+//                        , " { ?s text:query (rdfs:comment 'this' 
'lang:en-GB') ;"
+//                        , " { ?s text:query (rdfs:comment 'this' 'lang:*') ;"
+//                        , " { ?s text:query (rdfs:comment 'this' 
'lang:none') ;"
+//                        , " { ?s text:query (rdfs:comment 'this') ;"
+//                        , " { ?s text:query ('X1' 'lang:en') ;"
+                        , "      rdfs:label ?label"
+                        , " }") ;
+
+        dataset.begin(ReadWrite.READ) ;
+        try {
+            Query q = QueryFactory.create(pre+"\n"+qs) ;
+            QueryExecution qexec = QueryExecutionFactory.create(q , dataset) ;
+            QueryExecUtils.executeQuery(q, qexec) ;
+        } finally { dataset.end() ; }
+
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/main/java/org/apache/jena/query/text/ESSettings.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/ESSettings.java 
b/jena-text/src/main/java/org/apache/jena/query/text/ESSettings.java
new file mode 100644
index 0000000..0c5a11e
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/ESSettings.java
@@ -0,0 +1,177 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jena.query.text;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Settings for ElasticSearch based indexing
+ */
+public class ESSettings {
+
+    /**
+     * Map of hosts and ports. The host could also be an IP Address
+     */
+    private Map<String,Integer> hostToPortMapping;
+
+    /**
+     * Name of the Cluster. Defaults to 'elasticsearch'
+     */
+    private String clusterName;
+
+    /**
+     * Number of shards. Defaults to '1'
+     */
+    private Integer shards;
+
+    /**
+     * Number of replicas. Defaults to '1'
+     */
+    private Integer replicas;
+
+    /**
+     * Name of the index. Defaults to 'jena-text'
+     */
+    private String indexName;
+
+
+    public Map<String, Integer> getHostToPortMapping() {
+        return hostToPortMapping;
+    }
+
+    public void setHostToPortMapping(Map<String, Integer> hostToPortMapping) {
+        this.hostToPortMapping = hostToPortMapping;
+    }
+
+    public ESSettings.Builder builder() {
+        return new ESSettings.Builder();
+    }
+
+    /**
+     * Convenient builder class for building ESSettings
+     */
+    public static class Builder {
+
+        ESSettings settings;
+
+        public Builder() {
+            this.settings = new ESSettings();
+            this.settings.setClusterName("elasticsearch");
+            this.settings.setShards(1);
+            this.settings.setReplicas(1);
+            this.settings.setHostToPortMapping(new HashMap<>());
+            this.settings.setIndexName("jena-text");
+        }
+
+
+        public Builder indexName(String indexName) {
+            if(indexName != null && !indexName.isEmpty()) {
+                this.settings.setIndexName(indexName);
+            }
+            return this;
+        }
+
+        public Builder clusterName(String clusterName) {
+            if(clusterName != null && !clusterName.isEmpty()) {
+                this.settings.setClusterName(clusterName);
+            }
+            return this;
+
+        }
+
+        public Builder shards(Integer shards) {
+            if (shards != null) {
+                this.settings.setShards(shards);
+            }
+            return this;
+        }
+
+        public Builder replicas(Integer replicas) {
+            if(replicas != null) {
+                this.settings.setReplicas(replicas);
+            }
+            return this;
+        }
+
+        public Builder hostAndPort(String host, Integer port) {
+            if(host != null && port != null) {
+                this.settings.getHostToPortMapping().put(host, port);
+            }
+            return this;
+
+        }
+
+        public Builder hostAndPortMap(Map<String, Integer> hostAndPortMap) {
+            if(hostAndPortMap != null) {
+                this.settings.getHostToPortMapping().putAll(hostAndPortMap);
+            }
+
+            return this;
+        }
+
+        public ESSettings build() {
+            return this.settings;
+        }
+
+    }
+
+    public String getClusterName() {
+        return clusterName;
+    }
+
+    public void setClusterName(String clusterName) {
+        this.clusterName = clusterName;
+    }
+
+    public Integer getShards() {
+        return shards;
+    }
+
+    public void setShards(Integer shards) {
+        this.shards = shards;
+    }
+
+    public Integer getReplicas() {
+        return replicas;
+    }
+
+    public void setReplicas(Integer replicas) {
+        this.replicas = replicas;
+    }
+
+    public String getIndexName() {
+        return indexName;
+    }
+
+    public void setIndexName(String indexName) {
+        this.indexName = indexName;
+    }
+
+
+    @Override
+    public String toString() {
+        return "ESSettings{" +
+                "hostToPortMapping=" + hostToPortMapping +
+                ", clusterName='" + clusterName + '\'' +
+                ", shards=" + shards +
+                ", replicas=" + replicas +
+                ", indexName='" + indexName +
+                '}';
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
----------------------------------------------------------------------
diff --git 
a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java 
b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
index c365fa5..5dba5a2 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java
@@ -164,5 +164,30 @@ public class TextDatasetFactory
         TextIndex index = createLuceneIndex(directory, config) ;
         return create(base, index, true) ;
     }
+
+    /**
+     * Create an ElasticSearch based Index and return a Dataset based on this 
index
+     * @param base the base {@link Dataset}
+     * @param config {@link TextIndexConfig} containing the {@link 
EntityDefinition}
+     * @param settings ElasticSearch specific settings for initializing and 
connecting to an ElasticSearch Cluster
+     * @return The config definition for the index instantiation
+     */
+    public static Dataset createES(Dataset base, TextIndexConfig config, 
ESSettings settings)
+    {
+        TextIndex index = createESIndex(config, settings) ;
+        return create(base, index, true) ;
+    }
+
+    /**
+     * Create an ElasticSearch based Index
+     * @param config {@link TextIndexConfig} containing the {@link 
EntityDefinition}
+     * @param settings ElasticSearch specific settings for initializing and 
connecting to an ElasticSearch Cluster
+     * @return a configured instance of TextIndexES
+     */
+    public static TextIndex createESIndex(TextIndexConfig config, ESSettings 
settings)
+    {
+        return new TextIndexES(config, settings) ;
+    }
+
 }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/main/java/org/apache/jena/query/text/TextIndexES.java
----------------------------------------------------------------------
diff --git 
a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexES.java 
b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexES.java
new file mode 100644
index 0000000..b85d4ed
--- /dev/null
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexES.java
@@ -0,0 +1,448 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import org.apache.commons.lang3.exception.ExceptionUtils;
+import org.apache.jena.graph.Node;
+import org.apache.jena.graph.NodeFactory;
+import org.apache.jena.sparql.util.NodeFactoryExtra;
+import 
org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest;
+import 
org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse;
+import org.elasticsearch.action.get.GetResponse;
+import org.elasticsearch.action.index.IndexRequest;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.action.update.UpdateRequest;
+import org.elasticsearch.action.update.UpdateResponse;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.client.transport.TransportClient;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.transport.InetSocketTransportAddress;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.index.engine.DocumentMissingException;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.script.Script;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.transport.client.PreBuiltTransportClient;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.InetAddress;
+import java.util.*;
+
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+
+/**
+ * Elastic Search Implementation of {@link TextIndex}
+ *
+ */
+public class TextIndexES implements TextIndex {
+
+    /**
+     * The definition of the Entity we are trying to Index
+     */
+    private final EntityDefinition docDef ;
+
+    /**
+     * Thread safe ElasticSearch Java Client to perform Index operations
+     */
+    private static Client client;
+
+    /**
+     * The name of the index. Defaults to 'jena-text'
+     */
+    private final String indexName;
+
+    /**
+     * The parameter representing the cluster name key
+     */
+    static final String CLUSTER_NAME_PARAM = "cluster.name";
+
+    /**
+     * The parameter representing the number of shards key
+     */
+    static final String NUM_OF_SHARDS_PARAM = "number_of_shards";
+
+    /**
+     * The parameter representing the number of replicas key
+     */
+    static final String NUM_OF_REPLICAS_PARAM = "number_of_replicas";
+
+    private static final String DASH = "-";
+
+    private static final String UNDERSCORE = "_";
+
+    private static final String COLON = ":";
+
+    private static final String ASTERISK = "*";
+
+    /**
+     * ES Script for adding/updating the document in the index.
+     * The main reason to use scripts is because we want to modify the values 
of the fields that contains an array of values
+     */
+    private static final String ADD_UPDATE_SCRIPT = "if((ctx._source == null) 
|| (ctx._source.<fieldName> == null) || (ctx._source.<fieldName>.empty == 
true)) " +
+            "{ctx._source.<fieldName>=[params.fieldValue] } else 
{ctx._source.<fieldName>.add(params.fieldValue)}";
+
+    /**
+     * ES Script for deleting a specific value in the field for the given 
document in the index.
+     * The main reason to use scripts is because we want to delete specific 
value of the field that contains an array of values
+     */
+    private static final String DELETE_SCRIPT = "if((ctx._source != null) && 
(ctx._source.<fieldToRemove> != null) && (ctx._source.<fieldToRemove>.empty != 
true) " +
+            "&& (ctx._source.<fieldToRemove>.indexOf(params.valueToRemove) >= 
0)) " +
+            
"{ctx._source.<fieldToRemove>.remove(ctx._source.<fieldToRemove>.indexOf(params.valueToRemove))}";
+
+    /**
+     * Number of maximum results to return in case no limit is specified on 
the search operation
+     */
+    static final Integer MAX_RESULTS = 10000;
+
+    private static final Logger LOGGER      = 
LoggerFactory.getLogger(TextIndexES.class) ;
+
+    /**
+     * Construct an instance of {@link TextIndexES} based on provided {@link 
TextIndexConfig} and {@link ESSettings}
+     * The constructor is responsible for initializing a {@link 
TransportClient} based on the provided configs
+     * and create index based on the provided {@link ESSettings}
+     * @param config an instance of {@link TextIndexConfig}
+     * @param esSettings an instance of {@link ESSettings}
+     */
+    public TextIndexES(TextIndexConfig config, ESSettings esSettings) {
+
+        this.indexName = esSettings.getIndexName();
+        this.docDef = config.getEntDef();
+        docDef.setLangField("lang");
+
+        try {
+            if(client == null) {
+
+                LOGGER.debug("Initializing the Elastic Search Java Client with 
settings: " + esSettings);
+                Settings settings = Settings.builder()
+                        .put(CLUSTER_NAME_PARAM, 
esSettings.getClusterName()).build();
+                List<InetSocketTransportAddress> addresses = new ArrayList<>();
+                for(String host: esSettings.getHostToPortMapping().keySet()) {
+                    InetSocketTransportAddress addr = new 
InetSocketTransportAddress(InetAddress.getByName(host), 
esSettings.getHostToPortMapping().get(host));
+                    addresses.add(addr);
+                }
+
+                InetSocketTransportAddress socketAddresses[] = new 
InetSocketTransportAddress[addresses.size()];
+                client = new 
PreBuiltTransportClient(settings).addTransportAddresses(addresses.toArray(socketAddresses));
+                LOGGER.debug("Successfully initialized the client");
+            }
+
+            IndicesExistsResponse exists = client.admin().indices().exists(new 
IndicesExistsRequest(indexName)).get();
+            if(!exists.isExists()) {
+                Settings indexSettings = Settings.builder()
+                        .put(NUM_OF_SHARDS_PARAM, esSettings.getShards())
+                        .put(NUM_OF_REPLICAS_PARAM, esSettings.getReplicas())
+                        .build();
+                LOGGER.debug("Index with name " + indexName + " does not exist 
yet. Creating one with settings: " + indexSettings.toString());
+                
client.admin().indices().prepareCreate(indexName).setSettings(indexSettings).get();
+            }
+        }catch (Exception e) {
+            throw new TextIndexException("Exception occurred while 
instantiating ElasticSearch Text Index", e);
+        }
+    }
+
+
+    /**
+     * Constructor used mainly for performing Integration tests
+     * @param config an instance of {@link TextIndexConfig}
+     * @param client an instance of {@link TransportClient}. The client should 
already have been initialized with an index
+     */
+    public TextIndexES(TextIndexConfig config, Client client, String 
indexName) {
+        this.docDef = config.getEntDef();
+        this.client = client;
+        this.indexName = indexName;
+    }
+
+    /**
+     * We do not have any specific logic to perform before committing
+     */
+    @Override
+    public void prepareCommit() {
+        //Do Nothing
+
+    }
+
+    /**
+     * Commit happens in the individual get/add/delete operations
+     */
+    @Override
+    public void commit() {
+        // Do Nothing
+    }
+
+    /**
+     * We do not do rollback
+     */
+    @Override
+    public void rollback() {
+       //Do Nothing
+
+    }
+
+    /**
+     * We don't have resources that need to be closed explicitely
+     */
+    @Override
+    public void close() {
+        // Do Nothing
+
+    }
+
+    /**
+     * Update an Entity. Since we are doing Upserts in add entity anyways, we 
simply call {@link #addEntity(Entity)}
+     * method that takes care of updating the Entity as well.
+     * @param entity the entity to update.
+     */
+    @Override
+    public void updateEntity(Entity entity) {
+        //Since Add entity also updates the indexed document in case it 
already exists,
+        // we can simply call the addEntity from here.
+        addEntity(entity);
+    }
+
+
+    /**
+     * Add an Entity to the ElasticSearch Index.
+     * The entity will be added as a new document in ES, if it does not 
already exists.
+     * If the Entity exists, then the entity will simply be updated.
+     * The entity will never be replaced.
+     * @param entity the entity to add
+     */
+    @Override
+    public void addEntity(Entity entity) {
+        LOGGER.debug("Adding/Updating the entity in ES");
+
+        //The field that has a not null value in the current Entity instance.
+        //Required, mainly for building a script for the update command.
+        String fieldToAdd = null;
+        String fieldValueToAdd = null;
+        try {
+            XContentBuilder builder = jsonBuilder()
+                    .startObject();
+
+            for(String field: docDef.fields()) {
+                if(entity.get(field) != null) {
+                    if(entity.getLanguage() != null && 
!entity.getLanguage().isEmpty()) {
+                        //We make sure that the field name contains all 
underscore and no dash (for eg. when the lang value is en-GB)
+                        //The reason to do this is because the script fails 
with exception in case we have "-" in field name.
+                        fieldToAdd = normalizeFieldName(field, 
entity.getLanguage());
+                    } else {
+                        fieldToAdd = field;
+                    }
+
+                    fieldValueToAdd = (String) entity.get(field);
+                    builder = builder.field(fieldToAdd, 
Arrays.asList(fieldValueToAdd));
+                    break;
+                } else {
+                    //We are making sure that the field is at-least added to 
the index.
+                    //This will help us tremendously when we are appending the 
data later in an already indexed document.
+                    builder = builder.field(field, Collections.emptyList());
+                }
+            }
+
+            builder = builder.endObject();
+            IndexRequest indexRequest = new IndexRequest(indexName, 
docDef.getEntityField(), entity.getId())
+                    .source(builder);
+
+            String addUpdateScript = 
ADD_UPDATE_SCRIPT.replaceAll("<fieldName>", fieldToAdd);
+            Map<String, Object> params = new HashMap<>();
+            params.put("fieldValue", fieldValueToAdd);
+
+            UpdateRequest upReq = new UpdateRequest(indexName, 
docDef.getEntityField(), entity.getId())
+                    .script(new Script(Script.DEFAULT_SCRIPT_TYPE, 
Script.DEFAULT_SCRIPT_LANG, addUpdateScript, params))
+                    .upsert(indexRequest);
+
+            UpdateResponse response = client.update(upReq).get();
+
+            LOGGER.debug("Received the following Update response : " + 
response + " for the following entity: " + entity);
+
+        } catch(Exception e) {
+            throw new TextIndexException("Unable to Index the Entity in 
ElasticSearch.", e);
+        }
+    }
+
+    /**
+     * Delete the value of the entity from the existing document, if any.
+     * The document itself will never get deleted. Only the value will get 
deleted.
+     * @param entity entity whose value needs to be deleted
+     */
+    @Override
+    public void deleteEntity(Entity entity) {
+
+        String fieldToRemove = null;
+        String valueToRemove = null;
+        for(String field : docDef.fields()) {
+            if(entity.get(field) != null) {
+                fieldToRemove = field;
+                if(entity.getLanguage()!= null && 
!entity.getLanguage().isEmpty()) {
+                    fieldToRemove = normalizeFieldName(fieldToRemove, 
entity.getLanguage());
+                }
+                valueToRemove = (String)entity.get(field);
+                break;
+            }
+        }
+
+        if(fieldToRemove != null && valueToRemove != null) {
+
+            LOGGER.debug("deleting content related to entity: " + 
entity.getId());
+            String deleteScript = DELETE_SCRIPT.replaceAll("<fieldToRemove>", 
fieldToRemove);
+            Map<String,Object> params = new HashMap<>();
+            params.put("valueToRemove", valueToRemove);
+
+            UpdateRequest updateRequest = new UpdateRequest(indexName, 
docDef.getEntityField(), entity.getId())
+                    .script(new Script(Script.DEFAULT_SCRIPT_TYPE, 
Script.DEFAULT_SCRIPT_LANG,deleteScript,params));
+
+            try {
+                client.update(updateRequest).get();
+            }catch(Exception e) {
+                if( ExceptionUtils.getRootCause(e) instanceof 
DocumentMissingException) {
+                    LOGGER.debug("Trying to delete values from a missing 
document. Ignoring deletion of entity: ", entity);
+                } else {
+                    throw new TextIndexException("Unable to delete entity.", 
e);
+                }
+            }
+        }
+    }
+
+    /**
+     * Get an Entity given the subject Id
+     * @param uri the subject Id of the entity
+     * @return a map of field name and field values;
+     */
+    @Override
+    public Map<String, Node> get(String uri) {
+
+        GetResponse response;
+        Map<String, Node> result = new HashMap<>();
+
+        if(uri != null) {
+            response = client.prepareGet(indexName, docDef.getEntityField(), 
uri).get();
+            if(response != null && !response.isSourceEmpty()) {
+                String entityField = response.getId();
+                Node entity = NodeFactory.createURI(entityField) ;
+                result.put(docDef.getEntityField(), entity);
+                Map<String, Object> source = response.getSource();
+                for (String field: docDef.fields()) {
+                    Object fieldResponse = source.get(field);
+
+                    if(fieldResponse == null) {
+                        //We wont return it.
+                        continue;
+                    }
+                    else if(fieldResponse instanceof List<?>) {
+                        //We are storing the values of fields as a List always.
+                        //If there are values stored in the list, then we 
return the first value,
+                        // else we do not include the field in the returned 
Map of Field -> Node Mapping
+                        List<?> responseList = (List<?>)fieldResponse;
+                        if(responseList != null && responseList.size() > 0) {
+                            String fieldValue = (String)responseList.get(0);
+                            Node fieldNode = 
NodeFactoryExtra.createLiteralNode(fieldValue, null, null);
+                            result.put(field, fieldNode);
+                        }
+                    }
+                }
+            }
+        }
+
+        return result;
+    }
+
+
+    @Override
+    public List<TextHit> query(Node property, String qs, String graphURI, 
String lang) {
+        return query(property, qs, graphURI, lang, MAX_RESULTS);
+    }
+
+    /**
+     * Query the ElasticSearch for the given Node, with the given query String 
and limit.
+     * @param property the node property to make a search for
+     * @param qs the query string
+     * @param limit limit on the number of records to return
+     * @return List of {@link TextHit}s containing the documents that have 
been found
+     */
+    @Override
+    public List<TextHit> query(Node property, String qs, String graphURI, 
String lang, int limit) {
+        if(property != null) {
+            qs = parse(property.getLocalName(), qs, lang);
+        } else {
+            qs = parse(null, qs, lang);
+        }
+
+        LOGGER.debug("Querying ElasticSearch for QueryString: " + qs);
+        SearchResponse response = client.prepareSearch(indexName)
+                .setTypes(docDef.getEntityField())
+                .setQuery(QueryBuilders.queryStringQuery(qs))
+                // Not fetching the source because we are currently not 
interested
+                // in the actual values but only Id of the document. This will 
also speed up search
+                .setFetchSource(false)
+                .setFrom(0).setSize(limit)
+                .get();
+
+        List<TextHit> results = new ArrayList<>() ;
+        for (SearchHit hit : response.getHits()) {
+
+            //It has been decided to return NULL literal values for now.
+            String entityField = hit.getId();
+            Node entityNode = TextQueryFuncs.stringToNode(entityField);
+            Float score = hit.getScore();
+            TextHit textHit = new TextHit(entityNode, score, null);
+            results.add(textHit);
+
+        }
+        return results;
+    }
+
+    @Override
+    public EntityDefinition getDocDef() {
+        return docDef ;
+    }
+
+    private String parse(String fieldName, String qs, String lang) {
+        if(fieldName != null && !fieldName.isEmpty()) {
+            if(lang != null && !lang.equals("none")) {
+                if (!ASTERISK.equals(lang)) {
+                    fieldName = fieldName + UNDERSCORE + lang.replaceAll(DASH, 
UNDERSCORE);
+                    qs = fieldName + COLON + qs;
+                } else {
+                    if(!qs.contains("\\*")) {
+                        fieldName = fieldName + ASTERISK;
+                        qs = fieldName + COLON + qs;
+                    }
+                }
+
+                } else {
+                //Lang is null, but field name is not null
+                qs = fieldName + COLON + qs;
+
+            }
+        }
+        //We do this to enable wild card search
+        return qs.replaceAll("\\*", "\\\\*");
+
+    }
+
+    private String normalizeFieldName(String fieldName, String lang) {
+        //We know that the lang field is not null already
+        StringBuilder sb = new StringBuilder(fieldName);
+        return 
sb.append(UNDERSCORE).append(lang.replaceAll(DASH,UNDERSCORE)).toString();
+
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
----------------------------------------------------------------------
diff --git 
a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
 
b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
index a14f8c6..80b2f7e 100644
--- 
a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
+++ 
b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
@@ -30,12 +30,14 @@ public class TextAssembler
         
         Assembler.general.implementWith(TextVocab.entityMap,        new 
EntityDefinitionAssembler()) ;
         Assembler.general.implementWith(TextVocab.textIndexLucene,  new 
TextIndexLuceneAssembler()) ;
+        Assembler.general.implementWith(TextVocab.textIndexES,  new 
TextIndexESAssembler()) ;
         Assembler.general.implementWith(TextVocab.standardAnalyzer, new 
StandardAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.simpleAnalyzer,   new 
SimpleAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.keywordAnalyzer,  new 
KeywordAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.lowerCaseKeywordAnalyzer, 
new LowerCaseKeywordAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.localizedAnalyzer, new 
LocalizedAnalyzerAssembler()) ;
         Assembler.general.implementWith(TextVocab.configurableAnalyzer, new 
ConfigurableAnalyzerAssembler()) ;
+
     }
 }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexESAssembler.java
----------------------------------------------------------------------
diff --git 
a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexESAssembler.java
 
b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexESAssembler.java
new file mode 100644
index 0000000..f677190
--- /dev/null
+++ 
b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexESAssembler.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler;
+
+import org.apache.jena.assembler.Assembler;
+import org.apache.jena.assembler.Mode;
+import org.apache.jena.assembler.assemblers.AssemblerBase;
+import org.apache.jena.query.text.*;
+import org.apache.jena.rdf.model.Resource;
+import org.apache.jena.sparql.util.graph.GraphUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.apache.jena.query.text.assembler.TextVocab.*;
+
+public class TextIndexESAssembler extends AssemblerBase {
+
+    private static Logger LOGGER      = 
LoggerFactory.getLogger(TextIndexESAssembler.class) ;
+
+    protected static final String COMMA = ",";
+    protected static final String COLON = ":";
+    /*
+    <#index> a :TextIndexES ;
+        text:serverList "127.0.0.1:9300,127.0.0.2:9400,127.0.0.3:9500" ; 
#Comma separated list of hosts:ports
+        text:clusterName "elasticsearch"
+        text:shards "1"
+        text:replicas "1"
+        text:entityMap <#endMap> ;
+        .
+    */
+    
+    @SuppressWarnings("resource")
+    @Override
+    public TextIndex open(Assembler a, Resource root, Mode mode) {
+        try {
+            String listOfHostsAndPorts = GraphUtils.getAsStringValue(root, 
pServerList) ;
+            if(listOfHostsAndPorts == null || listOfHostsAndPorts.isEmpty()) {
+                throw new TextIndexException("Mandatory property 
text:serverList (containing the comma-separated list of host:port) property is 
not specified. " +
+                        "An example value for the property: 127.0.0.1:9300");
+            }
+            String[] hosts = listOfHostsAndPorts.split(COMMA);
+            Map<String,Integer> hostAndPortMapping = new HashMap<>();
+            for(String host : hosts) {
+                String[] hostAndPort = host.split(COLON);
+                if(hostAndPort.length < 2) {
+                    LOGGER.error("Either the host or the port value is 
missing.Please specify the property in host:port format. " +
+                            "Both parts are mandatory. Ignoring this value. 
Moving to the next one.");
+                    continue;
+                }
+                hostAndPortMapping.put(hostAndPort[0], 
Integer.valueOf(hostAndPort[1]));
+            }
+
+            String clusterName = GraphUtils.getAsStringValue(root, 
pClusterName);
+            if(clusterName == null || clusterName.isEmpty()) {
+                LOGGER.warn("ClusterName property is not specified. Defaulting 
to 'elasticsearch'");
+                clusterName = "elasticsearch";
+            }
+
+            String numberOfShards = GraphUtils.getAsStringValue(root, pShards);
+            if(numberOfShards == null || numberOfShards.isEmpty()) {
+                LOGGER.warn("shards property is not specified. Defaulting to 
'1'");
+                numberOfShards = "1";
+            }
+
+            String replicationFactor = GraphUtils.getAsStringValue(root, 
pReplicas);
+            if(replicationFactor == null || replicationFactor.isEmpty()) {
+                LOGGER.warn("replicas property is not specified. Defaulting to 
'1'");
+                replicationFactor = "1";
+            }
+
+            String indexName = GraphUtils.getAsStringValue(root, pIndexName);
+            if(indexName == null || indexName.isEmpty()) {
+                LOGGER.warn("index Name property is not specified. Defaulting 
to 'jena-text'");
+                indexName = "jena-text";
+            }
+
+            Resource r = GraphUtils.getResourceValue(root, pEntityMap) ;
+            EntityDefinition docDef = (EntityDefinition)a.open(r) ;
+            TextIndexConfig config = new TextIndexConfig(docDef);
+
+            //We have to create an ES specific settings class in order to pass 
the Index Initialization specific properties.
+            ESSettings settings = new ESSettings().builder()
+                    .clusterName(clusterName)
+                    .hostAndPortMap(hostAndPortMapping)
+                    .shards(Integer.valueOf(numberOfShards))
+                    .replicas(Integer.valueOf(replicationFactor))
+                    .indexName(indexName)
+                    .build();
+
+            return TextDatasetFactory.createESIndex(config, settings) ;
+        } catch (Exception e) {
+            throw new TextIndexException("An exception occurred while trying 
to open/load the Assembler configuration. ", e);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git 
a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java 
b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 582f2ef..719d404 100644
--- 
a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ 
b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -78,5 +78,13 @@ public class TextVocab
     public static final Resource lowerCaseFilter    = Vocab.resource(NS, 
"LowerCaseFilter");
     public static final Resource asciiFoldingFilter = Vocab.resource(NS, 
"ASCIIFoldingFilter");
 
+    //Elasticsearch
+    public static final Resource textIndexES        = Vocab.resource(NS, 
"TextIndexES") ;
+    public static final Property pServerList        = Vocab.property(NS, 
"serverList");
+    public static final Property pClusterName       = Vocab.property(NS, 
"clusterName");
+    public static final Property pShards            = Vocab.property(NS, 
"shards");
+    public static final Property pReplicas          = Vocab.property(NS, 
"replicas");
+    public static final Property pIndexName          = Vocab.property(NS, 
"indexName");
+
 }
 

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/main/resources/data-es.ttl
----------------------------------------------------------------------
diff --git a/jena-text/src/main/resources/data-es.ttl 
b/jena-text/src/main/resources/data-es.ttl
new file mode 100644
index 0000000..8813e86
--- /dev/null
+++ b/jena-text/src/main/resources/data-es.ttl
@@ -0,0 +1,46 @@
+   # Licensed to the Apache Software Foundation (ASF) under one
+   # or more contributor license agreements.  See the NOTICE file
+   # distributed with this work for additional information
+   # regarding copyright ownership.  The ASF licenses this file
+   # to you under the Apache License, Version 2.0 (the
+   # "License"); you may not use this file except in compliance
+   # with the License.  You may obtain a copy of the License at
+   #
+   #     http://www.apache.org/licenses/LICENSE-2.0
+   #
+   # Unless required by applicable law or agreed to in writing, software
+   # distributed under the License is distributed on an "AS IS" BASIS,
+   # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   # See the License for the specific language governing permissions and
+   # limitations under the License.
+
+@prefix :        <http://example/> .
+@prefix xsd:     <http://www.w3.org/2001/XMLSchema#> .
+@prefix rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs:    <http://www.w3.org/2000/01/rdf-schema#> .
+
+:s a :Thing ;
+   :p :123 ;
+   rdfs:label "Thing 1"@en ;
+   rdfs:comment "It's a complicated comment"@en ;
+   rdfs:comment "this is another comment"@en ;
+   rdfs:comment "this is en GB comment"@en-GB ;
+   :id 123 .
+
+:s1 a :Thing ;
+   :p :123 ;
+   rdfs:label "Thing 2"@en ;
+   :id 123 .
+
+:s2 a :Thing ;
+   :p :123 ;
+   rdfs:label "Whatever"@en ;
+   :id 123 .
+
+   
+:x1 rdfs:label "X2 word"@en .
+:x1 rdfs:label "X1 another word" .
+:x2 rdfs:label "X2 word" .
+:x3 rdfs:label "X3 word" .
+:x3 rdfs:label "X4 word" .
+:x1 rdfs:label "X9 word" .
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/main/resources/text-config-es.ttl
----------------------------------------------------------------------
diff --git a/jena-text/src/main/resources/text-config-es.ttl 
b/jena-text/src/main/resources/text-config-es.ttl
new file mode 100644
index 0000000..7a03384
--- /dev/null
+++ b/jena-text/src/main/resources/text-config-es.ttl
@@ -0,0 +1,64 @@
+    # Licensed to the Apache Software Foundation (ASF) under one
+    # or more contributor license agreements.  See the NOTICE file
+    # distributed with this work for additional information
+    # regarding copyright ownership.  The ASF licenses this file
+    # to you under the Apache License, Version 2.0 (the
+    # "License"); you may not use this file except in compliance
+    # with the License.  You may obtain a copy of the License at
+    #
+    #     http://www.apache.org/licenses/LICENSE-2.0
+    #
+    # Unless required by applicable law or agreed to in writing, software
+    # distributed under the License is distributed on an "AS IS" BASIS,
+    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    # See the License for the specific language governing permissions and
+    # limitations under the License.
+
+ ## Example of a TDB dataset and text index for ElasticSearch
+
+@prefix :        <http://localhost/jena_example/#> .
+@prefix rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs:    <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix tdb:     <http://jena.hpl.hp.com/2008/tdb#> .
+@prefix ja:      <http://jena.hpl.hp.com/2005/11/Assembler#> .
+@prefix text:    <http://jena.apache.org/text#> .
+
+# TDB
+[] ja:loadClass "org.apache.jena.tdb.TDB" .
+tdb:DatasetTDB  rdfs:subClassOf  ja:RDFDataset .
+tdb:GraphTDB    rdfs:subClassOf  ja:Model .
+
+# Text
+[] ja:loadClass "org.apache.jena.query.text.TextQuery" .
+text:TextDataset      rdfs:subClassOf   ja:RDFDataset .
+text:TextIndexES      rdfs:subClassOf   text:TextIndex .
+
+## ---------------------------------------------------------------
+## This URI must be fixed - it's used to assemble the text dataset.
+
+:text_dataset rdf:type     text:TextDataset ;
+    text:dataset   <#dataset> ;
+    text:index     <#indexES> ;
+    .
+
+<#dataset> rdf:type      tdb:DatasetTDB ;
+    tdb:location "--mem--" ;
+    .
+
+<#indexES> a text:TextIndexES ;
+    text:serverList "127.0.0.1:9300" ; # A comma-separated list of Host:Port 
values of the ElasticSearch Cluster nodes.
+    text:clusterName "elasticsearch" ; # Name of the ElasticSearch Cluster. If 
not specified defaults to 'elasticsearch'
+    text:shards "1" ;                  # The number of shards for the index. 
Defaults to 1
+    text:replicas "1" ;                # The number of replicas for the index. 
Defaults to 1
+    text:indexName "jena-text" ;       # Name of the Index. defaults to 
jena-text
+    text:entityMap <#entMap> ;
+    .
+
+<#entMap> a text:EntityMap ;
+    text:entityField      "uri" ; # Defines the Document Type in the ES Index
+    text:defaultField     "text" ; ## Must be defined in the text:maps
+    text:map (
+         # rdfs:label            
+         [ text:field "text" ; text:predicate rdfs:label ]
+         [ text:field "comment" ; text:predicate rdfs:comment ]
+         ) .

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java 
b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 4cb6c21..7259b11 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -18,12 +18,12 @@
 
 package org.apache.jena.query.text;
 
-import org.apache.jena.query.text.assembler.TestEntityMapAssembler ;
-import org.apache.jena.query.text.assembler.TestTextDatasetAssembler ;
-import org.apache.jena.query.text.assembler.TestTextIndexLuceneAssembler ;
-import org.junit.runner.RunWith ;
-import org.junit.runners.Suite ;
-import org.junit.runners.Suite.SuiteClasses ;
+import org.apache.jena.query.text.assembler.TestEntityMapAssembler;
+import org.apache.jena.query.text.assembler.TestTextDatasetAssembler;
+import org.apache.jena.query.text.assembler.TestTextIndexLuceneAssembler;
+import org.junit.runner.RunWith;
+import org.junit.runners.Suite;
+import org.junit.runners.Suite.SuiteClasses;
 
 @RunWith(Suite.class)
 @SuiteClasses({
@@ -50,4 +50,4 @@ import org.junit.runners.Suite.SuiteClasses ;
 })
 
 public class TS_Text
-{ }
+{}

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/test/java/org/apache/jena/query/text/it/BaseESTest.java
----------------------------------------------------------------------
diff --git 
a/jena-text/src/test/java/org/apache/jena/query/text/it/BaseESTest.java 
b/jena-text/src/test/java/org/apache/jena/query/text/it/BaseESTest.java
new file mode 100644
index 0000000..195b4b3
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/it/BaseESTest.java
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jena.query.text.it;
+
+import org.apache.jena.query.text.EntityDefinition;
+import org.apache.jena.query.text.TextIndexConfig;
+import org.apache.jena.query.text.TextIndexES;
+import org.apache.jena.vocabulary.RDFS;
+import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
+import 
org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest;
+import org.elasticsearch.client.transport.TransportClient;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.transport.InetSocketTransportAddress;
+import org.elasticsearch.transport.client.PreBuiltTransportClient;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+
+/**
+ * Base Class for ElasticSearch based Integration tests.
+ */
+public abstract class BaseESTest {
+
+    protected static TransportClient transportClient;
+
+    private final static String ADDRESS = "127.0.0.1";
+    private final static int PORT = 9500;
+    private final static String CLUSTER_NAME = "elasticsearch";
+    protected final static String INDEX_NAME = "jena-text";
+
+    protected static TextIndexES classToTest;
+
+    static final String DOC_TYPE = "text";
+
+    /**
+     * Make sure that we have connectivity to the locally running ES node.
+     * The ES is started during the pre-integration-test phase
+     */
+    @BeforeClass
+    public static void setupTransportClient() {
+
+        Settings settings = Settings.builder().put("cluster.name", 
CLUSTER_NAME).build();
+        transportClient = new PreBuiltTransportClient(settings);
+        try {
+            transportClient.addTransportAddress(
+                    new 
InetSocketTransportAddress(InetAddress.getByName(ADDRESS), PORT)
+            );
+        } catch (UnknownHostException ex) {
+            Assert.fail("Failed to create transport client" + ex.getMessage());
+        }
+        classToTest = new TextIndexES(config(), transportClient, INDEX_NAME);
+        Assert.assertNotNull("Transport client was not created successfully", 
transportClient);
+
+
+    }
+
+    /**
+     * Make sure that we always start we a clean index.
+     * This will help keep the tests isolated
+     * @throws Exception
+     */
+    @Before
+    public void beforeTest() throws Exception{
+        //Create Index
+        transportClient.admin().indices().prepareCreate(INDEX_NAME).get();
+        Assert.assertTrue(transportClient.admin().indices().exists(new 
IndicesExistsRequest(INDEX_NAME)).get().isExists());
+
+    }
+
+    /**
+     * Make sure that we always delete the index when completed with the test
+     * This will help keep the tests isolated
+     * @throws Exception
+     */
+    @After
+    public void afterTest() throws Exception{
+        //Delete Index
+        transportClient.admin().indices().delete(new 
DeleteIndexRequest(INDEX_NAME)).get();
+    }
+
+    /**
+     * Simple Config for text index
+     * @return
+     */
+    private static TextIndexConfig config() {
+        EntityDefinition ed = new EntityDefinition(DOC_TYPE, "label", 
RDFS.label);
+        ed.set("comment", RDFS.comment.asNode());
+        ed.setLangField("lang");
+        TextIndexConfig config = new TextIndexConfig(ed);
+        return config;
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/1c1325c5/jena-text/src/test/java/org/apache/jena/query/text/it/TextIndexESIT.java
----------------------------------------------------------------------
diff --git 
a/jena-text/src/test/java/org/apache/jena/query/text/it/TextIndexESIT.java 
b/jena-text/src/test/java/org/apache/jena/query/text/it/TextIndexESIT.java
new file mode 100644
index 0000000..c806d5b
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/it/TextIndexESIT.java
@@ -0,0 +1,306 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jena.query.text.it;
+
+import org.apache.jena.graph.Node;
+import org.apache.jena.query.text.Entity;
+import org.apache.jena.query.text.TextHit;
+import org.apache.jena.vocabulary.RDFS;
+import org.elasticsearch.action.get.GetResponse;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Integration test class for {@link org.apache.jena.query.text.TextIndexES}
+ */
+public class TextIndexESIT extends BaseESTest {
+
+    @Test
+    public void testAddEntity() {
+        String labelKey = "label";
+        String labelValue = "this is a sample Label";
+        Assert.assertNotNull(classToTest);
+        Entity entityToAdd = entity("http://example/x3";, labelKey, labelValue);
+        GetResponse response = addEntity(entityToAdd);
+        Assert.assertTrue(response.getSource().containsKey(labelKey));
+        Assert.assertEquals(labelValue, 
((List)response.getSource().get(labelKey)).get(0));
+    }
+
+    @Test
+    public void testDeleteEntity() {
+        testAddEntity();
+        String labelKey = "label";
+        String labelValue = "this is a sample Label";
+        //Now Delete the entity
+        classToTest.deleteEntity(entity("http://example/x3";, labelKey, 
labelValue));
+
+        //Try to find it
+        GetResponse response = transportClient.prepareGet(INDEX_NAME, 
DOC_TYPE, "http://example/x3";).get();
+        //It Should Exist
+        Assert.assertTrue(response.isExists());
+        //But the field value should now be empty
+        Assert.assertEquals("http://example/x3";, response.getId());
+        Assert.assertTrue(response.getSource().containsKey(labelKey));
+        Assert.assertEquals(0, 
((List)response.getSource().get(labelKey)).size());
+    }
+
+    @Test
+    public void testDeleteWhenNoneExists() {
+
+        GetResponse response = transportClient.prepareGet(INDEX_NAME, 
DOC_TYPE, "http://example/x3";).get();
+        Assert.assertFalse(response.isExists());
+        Assert.assertNotNull(classToTest);
+        classToTest.deleteEntity(entity("http://example/x3";, "label", "doesnt 
matter"));
+        response = transportClient.prepareGet(INDEX_NAME, DOC_TYPE, 
"http://example/x3";).get();
+        Assert.assertFalse(response.isExists());
+
+    }
+
+    @Test
+    public void testQuery() {
+        testAddEntity();
+        // This will search for value "this" only in the label field
+        List<TextHit> result =  classToTest.query(RDFS.label.asNode(), "this", 
null, null, 10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+
+        //This will search for value "this" across all the fields
+        result =  classToTest.query(null, "this", null, null, 10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+
+        //This will search for value "this" in the label_en field, if it 
exists. In this case it doesnt so we should get zero results
+        result =  classToTest.query(RDFS.label.asNode(), "this", null, "en", 
10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(0, result.size());
+
+    }
+
+    @Test
+    public void testQueryWhenNoneExists() {
+        List<TextHit> result =  classToTest.query(RDFS.label.asNode(), 
"this",null, null, 1);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(0, result.size());
+    }
+
+    @Test
+    public void testGet() {
+        testAddEntity();
+        //Now Get the same entity
+        Map<String, Node> response = classToTest.get("http://example/x3";);
+        Assert.assertNotNull(response);
+        Assert.assertEquals(2, response.size());
+    }
+
+    @Test
+    public void testGetWhenNoneExists() {
+        Map<String, Node> response = classToTest.get("http://example/x3";);
+        Assert.assertNotNull(response);
+        Assert.assertEquals(0, response.size());
+    }
+
+    /**
+     * This is an elaborate test that does the following:
+     * 1. Create a Document with ID: "http://example/x3"; , label: Germany and 
lang:en
+     * 2. Makes sure the document is created successfully and is searchable 
based on the label
+     * 3. Next add another label to the same Entity with ID: 
"http://example/x3";, label:Deutschland and lang:de
+     * 4. Makes sure that the document is searchable both with old (Germany) 
and new (Deutschland) values.
+     * 5. Next, it deletes the value: Germany created in step 1.
+     * 6. Makes sure that document is searchable with value: Deutschland but 
NOT with value: Germany
+     * 7. Finally, delete the value: Deutschland
+     * 8. The document should not be searchable with value: Deutschland
+     * 9. The document should still exist
+     */
+    @Test
+    public void testMultipleValuesinMultipleLanguages() throws 
InterruptedException{
+        addEntity(entity("http://example/x3";, "label", "Germany", "en"));
+        List<TextHit> result =  classToTest.query(RDFS.label.asNode(), 
"Germany",null, "en", 10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+        //Next add another label to the same entity
+        addEntity(entity("http://example/x3";, "label", "Deutschland", "de"));
+        //Query with old value
+        result =  classToTest.query(RDFS.label.asNode(), "Germany", null, 
"en", 10);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        //Query with new value
+        result =  classToTest.query(RDFS.label.asNode(), "Deutschland", null, 
"de", 10);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        //Query without lang value
+        result =  classToTest.query(RDFS.label.asNode(), "Deutschland", null, 
null, 10);
+        Assert.assertEquals(0, result.size());
+
+        //Query without lang value as *
+        result =  classToTest.query(RDFS.label.asNode(), "Deutschland", null, 
"*", 10);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        //Now lets delete the Germany label
+        classToTest.deleteEntity(entity("http://example/x3";, "label", 
"Germany", "en"));
+
+        TimeUnit.SECONDS.sleep(1);
+
+        //We should NOT be able to find the entity using Germany label anymore
+        result =  classToTest.query(RDFS.label.asNode(), "Germany", null, 
null, 10);
+        Assert.assertEquals(0, result.size());
+
+        result =  classToTest.query(RDFS.label.asNode(), "Germany", null, 
"en", 10);
+        Assert.assertEquals(0, result.size());
+
+        //But we should be able to find it with the Deutschland label value
+        result =  classToTest.query(RDFS.label.asNode(), "Deutschland", null, 
"de", 10);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        //Now lets delete the Deutschland label
+        classToTest.deleteEntity(entity("http://example/x3";, "label", 
"Deutschland", "de"));
+
+        //if the Delete and query happens almost instantly, then there are 
chances to still get false positives
+        //Thus sleeping for couple of seconds to give ES time to clean up.
+        TimeUnit.SECONDS.sleep(1);
+        //We should NOT be able to find the entity using Deutschland label 
anymore
+        result =  classToTest.query(RDFS.label.asNode(), "Deutschland", null, 
null, 10);
+        Assert.assertEquals(0, result.size());
+
+        result =  classToTest.query(RDFS.label.asNode(), "Deutschland", null, 
"de", 10);
+        Assert.assertEquals(0, result.size());
+
+
+    }
+
+    /**
+     * This test tries to save the same label values in different languages 
and makes sure that they are saved properly
+     */
+    @Test
+    public void testSameLabelInDifferentLanguages() throws 
InterruptedException{
+        addEntity(entity("http://example/x3";, "label", "Berlin", "en"));
+        List<TextHit> result =  classToTest.query(RDFS.label.asNode(), 
"Berlin", null, "en", 10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        //Next add Berlin with 'de' language
+        addEntity(entity("http://example/x3";, "label", "Berlin", "de"));
+        result =  classToTest.query(RDFS.label.asNode(), "Berlin", null, "de", 
10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        //Now let's remove Berlin for language 'en'
+        classToTest.deleteEntity(entity("http://example/x3";, "label", 
"Berlin", "en"));
+        //We should still be able to find the Document
+        result =  classToTest.query(RDFS.label.asNode(), "Berlin", null, "de", 
10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        //Now Lets remove Berlin for language 'de'
+        classToTest.deleteEntity(entity("http://example/x3";, "label", 
"Berlin", "de"));
+
+        //if the Delete and query happens almost instantly, then there are 
chances to still get false positives
+        //Thus sleeping for couple of seconds to give ES time to clean up
+        TimeUnit.SECONDS.sleep(1);
+        //Now we should NOT be able to find the document
+        result =  classToTest.query(RDFS.label.asNode(), "Berlin", null, "de", 
10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(0, result.size());
+    }
+
+    @Test
+    public void testLanguageTagSubCodes() {
+        addEntity(entity("http://example/x3";, "label", "color", "en-US"));
+        addEntity(entity("http://example/x3";, "label", "colour", "en-GB"));
+
+        //Let's find it using color
+        List<TextHit> result =  classToTest.query(RDFS.label.asNode(), 
"color", null, "en-US", 10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        result =  classToTest.query(RDFS.label.asNode(), "color", null, 
"none", 10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(0, result.size());
+
+        //Next Lets find it using colour
+        result =  classToTest.query(RDFS.label.asNode(), "colour", null, 
"en-GB", 10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        result =  classToTest.query(RDFS.label.asNode(), "colour", null, 
"none", 10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(0, result.size());
+
+        //Next lets find it after specifying the lang parameter
+        result =  classToTest.query(RDFS.label.asNode(), "colour",null, "en*", 
10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        result =  classToTest.query(RDFS.label.asNode(), "color",null, "en*", 
10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        //Now lets find it by specifying exact lang values
+        result =  classToTest.query(RDFS.label.asNode(), "colour",null, 
"en-GB", 10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        result =  classToTest.query(RDFS.label.asNode(), "color",null, 
"en-US", 10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(1, result.size());
+        Assert.assertEquals("http://example/x3";, 
result.get(0).getNode().getURI());
+
+        //We should NOT be able to find anything for wrong language
+        result =  classToTest.query(RDFS.label.asNode(), "color",null, 
"en-GB", 10);
+        Assert.assertNotNull(result);
+        Assert.assertEquals(0, result.size());
+
+
+    }
+    private Entity entity(String id, String fieldName, String fieldValue) {
+        return entity(id, fieldName, fieldValue, null);
+    }
+
+    private Entity entity(String id, String fieldName, String fieldValue, 
String lang) {
+        Entity entity = new Entity(id, null, lang, null);
+        entity.put(fieldName, fieldValue);
+        return entity;
+    }
+
+    private GetResponse addEntity(Entity entityToAdd) {
+        classToTest.addEntity(entityToAdd);
+        GetResponse response = transportClient.prepareGet(INDEX_NAME, 
DOC_TYPE, entityToAdd.getId()).get();
+
+        Assert.assertNotNull(response);
+        Assert.assertEquals(entityToAdd.getId(), response.getId());
+        return response;
+
+    }
+
+}

jena git commit: JENA-1305: Added support for ElasticSearch V 5.2.1

Reply via email to