ATLAS-1961: Basic search improvement in use of Solr index for attribute 
filtering (# 4)


Project: http://git-wip-us.apache.org/repos/asf/atlas/repo
Commit: http://git-wip-us.apache.org/repos/asf/atlas/commit/e0fb7dc1
Tree: http://git-wip-us.apache.org/repos/asf/atlas/tree/e0fb7dc1
Diff: http://git-wip-us.apache.org/repos/asf/atlas/diff/e0fb7dc1

Branch: refs/heads/feature-odf
Commit: e0fb7dc17000724fada89cb573db9b4fa1654a2e
Parents: 3d3be40
Author: Madhan Neethiraj <mad...@apache.org>
Authored: Thu Jul 20 01:19:57 2017 -0700
Committer: Madhan Neethiraj <mad...@apache.org>
Committed: Thu Jul 20 14:23:18 2017 -0700

----------------------------------------------------------------------
 .../ClassificationSearchProcessor.java          | 35 +++++++++++++-------
 .../atlas/discovery/EntitySearchProcessor.java  | 26 ++++++++-------
 .../discovery/FullTextSearchProcessor.java      | 27 ++++++++++++---
 .../apache/atlas/discovery/SearchProcessor.java | 18 +++++-----
 4 files changed, 68 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/atlas/blob/e0fb7dc1/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java
----------------------------------------------------------------------
diff --git 
a/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java
 
b/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java
index b6e0de5..745f9d7 100644
--- 
a/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java
+++ 
b/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java
@@ -58,7 +58,7 @@ public class ClassificationSearchProcessor extends 
SearchProcessor {
         if (useSolrSearch) {
             StringBuilder solrQuery = new StringBuilder();
 
-            constructTypeTestQuery(solrQuery, classificationType, 
typeAndSubTypes);
+            constructTypeTestQuery(solrQuery, typeAndSubTypes);
             constructFilterQuery(solrQuery, classificationType, 
filterCriteria, solrAttributes);
 
             String solrQueryString = 
STRAY_AND_PATTERN.matcher(solrQuery).replaceAll(")");
@@ -95,16 +95,22 @@ public class ClassificationSearchProcessor extends 
SearchProcessor {
         }
 
         try {
-            final int startIdx  = context.getSearchParameters().getOffset();
-            final int limit     = context.getSearchParameters().getLimit();
-            int       qryOffset = nextProcessor == null ? startIdx : 0;
-            int       resultIdx = qryOffset;
+            final int     startIdx   = 
context.getSearchParameters().getOffset();
+            final int     limit      = 
context.getSearchParameters().getLimit();
+            final boolean activeOnly = 
context.getSearchParameters().getExcludeDeletedEntities();
+
+            // query to start at 0, even though startIdx can be higher - 
because few results in earlier retrieval could
+            // have been dropped: like non-active-entities or 
duplicate-entities (same entity pointed to by multiple
+            // classifications in the result)
+            //
+            // first 'startIdx' number of entries will be ignored
+            int qryOffset = 0;
+            int resultIdx = qryOffset;
 
             final Set<String>       processedGuids         = new HashSet<>();
             final List<AtlasVertex> entityVertices         = new ArrayList<>();
             final List<AtlasVertex> classificationVertices = new ArrayList<>();
 
-
             for (; ret.size() < limit; qryOffset += limit) {
                 entityVertices.clear();
                 classificationVertices.clear();
@@ -138,15 +144,20 @@ public class ClassificationSearchProcessor extends 
SearchProcessor {
 
                     for (AtlasEdge edge : edges) {
                         AtlasVertex entityVertex = edge.getOutVertex();
-                        String      guid         = 
AtlasGraphUtilsV1.getIdFromVertex(entityVertex);
 
-                        if (!processedGuids.contains(guid)) {
-                            if 
(!context.getSearchParameters().getExcludeDeletedEntities() || 
AtlasGraphUtilsV1.getState(entityVertex) == AtlasEntity.Status.ACTIVE) {
-                                entityVertices.add(entityVertex);
-                            }
+                        if (activeOnly && 
AtlasGraphUtilsV1.getState(entityVertex) != AtlasEntity.Status.ACTIVE) {
+                            continue;
+                        }
+
+                        String guid = 
AtlasGraphUtilsV1.getIdFromVertex(entityVertex);
 
-                            processedGuids.add(guid);
+                        if (processedGuids.contains(guid)) {
+                            continue;
                         }
+
+                        entityVertices.add(entityVertex);
+
+                        processedGuids.add(guid);
                     }
                 }
 

http://git-wip-us.apache.org/repos/asf/atlas/blob/e0fb7dc1/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java
----------------------------------------------------------------------
diff --git 
a/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java
 
b/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java
index 6f629eb..a3525c9 100644
--- 
a/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java
+++ 
b/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java
@@ -18,6 +18,7 @@
 package org.apache.atlas.discovery;
 
 import org.apache.atlas.model.discovery.SearchParameters.FilterCriteria;
+import org.apache.atlas.model.instance.AtlasEntity;
 import org.apache.atlas.repository.Constants;
 import org.apache.atlas.repository.graphdb.*;
 import org.apache.atlas.repository.store.graph.v1.AtlasGraphUtilsV1;
@@ -60,7 +61,7 @@ public class EntitySearchProcessor extends SearchProcessor {
         StringBuilder solrQuery = new StringBuilder();
 
         if (typeSearchBySolr) {
-            constructTypeTestQuery(solrQuery, entityType, typeAndSubTypes);
+            constructTypeTestQuery(solrQuery, typeAndSubTypes);
         }
 
         if (attrSearchBySolr) {
@@ -70,6 +71,10 @@ public class EntitySearchProcessor extends SearchProcessor {
         }
 
         if (solrQuery.length() > 0) {
+            if (context.getSearchParameters().getExcludeDeletedEntities()) {
+                constructStateTestQuery(solrQuery);
+            }
+
             String solrQueryString = 
STRAY_AND_PATTERN.matcher(solrQuery).replaceAll(")");
 
             solrQueryString = 
STRAY_OR_PATTERN.matcher(solrQueryString).replaceAll(")");
@@ -128,10 +133,14 @@ public class EntitySearchProcessor extends 
SearchProcessor {
         }
 
         try {
-            final int startIdx  = context.getSearchParameters().getOffset();
-            final int limit     = context.getSearchParameters().getLimit();
-            int       qryOffset = (nextProcessor == null && (graphQuery == 
null || indexQuery == null)) ? startIdx : 0;
-            int       resultIdx = qryOffset;
+            final int startIdx = context.getSearchParameters().getOffset();
+            final int limit    = context.getSearchParameters().getLimit();
+
+            // when subsequent filtering stages are involved, query should 
start at 0 even though startIdx can be higher
+            //
+            // first 'startIdx' number of entries will be ignored
+            int qryOffset = (nextProcessor != null || (graphQuery != null && 
indexQuery != null)) ? 0 : startIdx;
+            int resultIdx = qryOffset;
 
             final List<AtlasVertex> entityVertices = new ArrayList<>();
 
@@ -154,13 +163,6 @@ public class EntitySearchProcessor extends SearchProcessor 
{
                     while (idxQueryResult.hasNext()) {
                         AtlasVertex vertex = idxQueryResult.next().getVertex();
 
-                        // skip non-entity vertices
-                        if (!AtlasGraphUtilsV1.isEntityVertex(vertex)) {
-                            LOG.warn("EntitySearchProcessor.execute(): 
ignoring non-entity vertex (id={})", vertex.getId()); // might cause duplicate 
entries in result
-
-                            continue;
-                        }
-
                         entityVertices.add(vertex);
                     }
 

http://git-wip-us.apache.org/repos/asf/atlas/blob/e0fb7dc1/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java
----------------------------------------------------------------------
diff --git 
a/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java
 
b/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java
index 22d91e0..1b19a0e 100644
--- 
a/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java
+++ 
b/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java
@@ -18,6 +18,7 @@
 package org.apache.atlas.discovery;
 
 import org.apache.atlas.model.discovery.SearchParameters;
+import org.apache.atlas.model.instance.AtlasEntity;
 import org.apache.atlas.repository.Constants;
 import org.apache.atlas.repository.graph.GraphHelper;
 import org.apache.atlas.repository.graphdb.AtlasIndexQuery;
@@ -74,6 +75,10 @@ public class FullTextSearchProcessor extends SearchProcessor 
{
             }
         }
 
+        if (context.getSearchParameters().getExcludeDeletedEntities()) {
+            queryString.append(AND_STR).append("(ACTIVE)");
+        }
+
         queryString.append(")");
 
         indexQuery = context.getGraph().indexQuery(Constants.FULLTEXT_INDEX, 
queryString.toString());
@@ -94,10 +99,16 @@ public class FullTextSearchProcessor extends 
SearchProcessor {
         }
 
         try {
-            final int startIdx  = context.getSearchParameters().getOffset();
-            final int limit     = context.getSearchParameters().getLimit();
-            int       qryOffset = nextProcessor == null ? startIdx : 0;
-            int       resultIdx = qryOffset;
+            final int     startIdx   = 
context.getSearchParameters().getOffset();
+            final int     limit      = 
context.getSearchParameters().getLimit();
+            final boolean activeOnly = 
context.getSearchParameters().getExcludeDeletedEntities();
+
+            // query to start at 0, even though startIdx can be higher - 
because few results in earlier retrieval could
+            // have been dropped: like vertices of non-entity or 
non-active-entity
+            //
+            // first 'startIdx' number of entries will be ignored
+            int qryOffset = 0;
+            int resultIdx = qryOffset;
 
             final List<AtlasVertex> entityVertices = new ArrayList<>();
 
@@ -121,8 +132,14 @@ public class FullTextSearchProcessor extends 
SearchProcessor {
 
                     // skip non-entity vertices
                     if (!AtlasGraphUtilsV1.isEntityVertex(vertex)) {
-                        LOG.warn("FullTextSearchProcessor.execute(): ignoring 
non-entity vertex (id={})", vertex.getId()); // might cause duplicate entries 
in result
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("FullTextSearchProcessor.execute(): 
ignoring non-entity vertex (id={})", vertex.getId());
+                        }
+
+                        continue;
+                    }
 
+                    if (activeOnly && AtlasGraphUtilsV1.getState(vertex) != 
AtlasEntity.Status.ACTIVE) {
                         continue;
                     }
 

http://git-wip-us.apache.org/repos/asf/atlas/blob/e0fb7dc1/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
----------------------------------------------------------------------
diff --git 
a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java 
b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
index 2e75dfe..7950127 100644
--- a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
+++ b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
@@ -181,7 +181,7 @@ public abstract class SearchProcessor {
         return ret;
     }
 
-    protected void constructTypeTestQuery(StringBuilder solrQuery, 
AtlasStructType type, Set<String> typeAndAllSubTypes) {
+    protected void constructTypeTestQuery(StringBuilder solrQuery, Set<String> 
typeAndAllSubTypes) {
         String typeAndSubtypesString = StringUtils.join(typeAndAllSubTypes, 
SPACE_STRING);
 
         if (CollectionUtils.isNotEmpty(typeAndAllSubTypes)) {
@@ -193,14 +193,6 @@ public abstract class SearchProcessor {
                     .append(typeAndSubtypesString)
                     .append(")");
         }
-
-        if (type instanceof AtlasEntityType && 
context.getSearchParameters().getExcludeDeletedEntities()) {
-            if (solrQuery.length() > 0) {
-                solrQuery.append(AND_STR);
-            }
-
-            
solrQuery.append("v.\"").append(Constants.STATE_PROPERTY_KEY).append("\":ACTIVE");
-        }
     }
 
     protected void constructFilterQuery(StringBuilder solrQuery, 
AtlasStructType type, FilterCriteria filterCriteria, Set<String> 
solrAttributes) {
@@ -219,6 +211,14 @@ public abstract class SearchProcessor {
         }
     }
 
+    protected void constructStateTestQuery(StringBuilder solrQuery) {
+        if (solrQuery.length() > 0) {
+            solrQuery.append(AND_STR);
+        }
+
+        
solrQuery.append("v.\"").append(Constants.STATE_PROPERTY_KEY).append("\":ACTIVE");
+    }
+
     private String toSolrQuery(AtlasStructType type, FilterCriteria criteria, 
Set<String> solrAttributes, int level) {
         return toSolrQuery(type, criteria, solrAttributes, new 
StringBuilder(), level);
     }

Reply via email to