ATLAS-1961: Basic search improvement in use of Solr index for attribute filtering (# 4)
Project: http://git-wip-us.apache.org/repos/asf/atlas/repo Commit: http://git-wip-us.apache.org/repos/asf/atlas/commit/e0fb7dc1 Tree: http://git-wip-us.apache.org/repos/asf/atlas/tree/e0fb7dc1 Diff: http://git-wip-us.apache.org/repos/asf/atlas/diff/e0fb7dc1 Branch: refs/heads/feature-odf Commit: e0fb7dc17000724fada89cb573db9b4fa1654a2e Parents: 3d3be40 Author: Madhan Neethiraj <mad...@apache.org> Authored: Thu Jul 20 01:19:57 2017 -0700 Committer: Madhan Neethiraj <mad...@apache.org> Committed: Thu Jul 20 14:23:18 2017 -0700 ---------------------------------------------------------------------- .../ClassificationSearchProcessor.java | 35 +++++++++++++------- .../atlas/discovery/EntitySearchProcessor.java | 26 ++++++++------- .../discovery/FullTextSearchProcessor.java | 27 ++++++++++++--- .../apache/atlas/discovery/SearchProcessor.java | 18 +++++----- 4 files changed, 68 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/atlas/blob/e0fb7dc1/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java index b6e0de5..745f9d7 100644 --- a/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java +++ b/repository/src/main/java/org/apache/atlas/discovery/ClassificationSearchProcessor.java @@ -58,7 +58,7 @@ public class ClassificationSearchProcessor extends SearchProcessor { if (useSolrSearch) { StringBuilder solrQuery = new StringBuilder(); - constructTypeTestQuery(solrQuery, classificationType, typeAndSubTypes); + constructTypeTestQuery(solrQuery, typeAndSubTypes); constructFilterQuery(solrQuery, classificationType, filterCriteria, solrAttributes); String solrQueryString = STRAY_AND_PATTERN.matcher(solrQuery).replaceAll(")"); @@ -95,16 +95,22 @@ public class ClassificationSearchProcessor extends SearchProcessor { } try { - final int startIdx = context.getSearchParameters().getOffset(); - final int limit = context.getSearchParameters().getLimit(); - int qryOffset = nextProcessor == null ? startIdx : 0; - int resultIdx = qryOffset; + final int startIdx = context.getSearchParameters().getOffset(); + final int limit = context.getSearchParameters().getLimit(); + final boolean activeOnly = context.getSearchParameters().getExcludeDeletedEntities(); + + // query to start at 0, even though startIdx can be higher - because few results in earlier retrieval could + // have been dropped: like non-active-entities or duplicate-entities (same entity pointed to by multiple + // classifications in the result) + // + // first 'startIdx' number of entries will be ignored + int qryOffset = 0; + int resultIdx = qryOffset; final Set<String> processedGuids = new HashSet<>(); final List<AtlasVertex> entityVertices = new ArrayList<>(); final List<AtlasVertex> classificationVertices = new ArrayList<>(); - for (; ret.size() < limit; qryOffset += limit) { entityVertices.clear(); classificationVertices.clear(); @@ -138,15 +144,20 @@ public class ClassificationSearchProcessor extends SearchProcessor { for (AtlasEdge edge : edges) { AtlasVertex entityVertex = edge.getOutVertex(); - String guid = AtlasGraphUtilsV1.getIdFromVertex(entityVertex); - if (!processedGuids.contains(guid)) { - if (!context.getSearchParameters().getExcludeDeletedEntities() || AtlasGraphUtilsV1.getState(entityVertex) == AtlasEntity.Status.ACTIVE) { - entityVertices.add(entityVertex); - } + if (activeOnly && AtlasGraphUtilsV1.getState(entityVertex) != AtlasEntity.Status.ACTIVE) { + continue; + } + + String guid = AtlasGraphUtilsV1.getIdFromVertex(entityVertex); - processedGuids.add(guid); + if (processedGuids.contains(guid)) { + continue; } + + entityVertices.add(entityVertex); + + processedGuids.add(guid); } } http://git-wip-us.apache.org/repos/asf/atlas/blob/e0fb7dc1/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java index 6f629eb..a3525c9 100644 --- a/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java +++ b/repository/src/main/java/org/apache/atlas/discovery/EntitySearchProcessor.java @@ -18,6 +18,7 @@ package org.apache.atlas.discovery; import org.apache.atlas.model.discovery.SearchParameters.FilterCriteria; +import org.apache.atlas.model.instance.AtlasEntity; import org.apache.atlas.repository.Constants; import org.apache.atlas.repository.graphdb.*; import org.apache.atlas.repository.store.graph.v1.AtlasGraphUtilsV1; @@ -60,7 +61,7 @@ public class EntitySearchProcessor extends SearchProcessor { StringBuilder solrQuery = new StringBuilder(); if (typeSearchBySolr) { - constructTypeTestQuery(solrQuery, entityType, typeAndSubTypes); + constructTypeTestQuery(solrQuery, typeAndSubTypes); } if (attrSearchBySolr) { @@ -70,6 +71,10 @@ public class EntitySearchProcessor extends SearchProcessor { } if (solrQuery.length() > 0) { + if (context.getSearchParameters().getExcludeDeletedEntities()) { + constructStateTestQuery(solrQuery); + } + String solrQueryString = STRAY_AND_PATTERN.matcher(solrQuery).replaceAll(")"); solrQueryString = STRAY_OR_PATTERN.matcher(solrQueryString).replaceAll(")"); @@ -128,10 +133,14 @@ public class EntitySearchProcessor extends SearchProcessor { } try { - final int startIdx = context.getSearchParameters().getOffset(); - final int limit = context.getSearchParameters().getLimit(); - int qryOffset = (nextProcessor == null && (graphQuery == null || indexQuery == null)) ? startIdx : 0; - int resultIdx = qryOffset; + final int startIdx = context.getSearchParameters().getOffset(); + final int limit = context.getSearchParameters().getLimit(); + + // when subsequent filtering stages are involved, query should start at 0 even though startIdx can be higher + // + // first 'startIdx' number of entries will be ignored + int qryOffset = (nextProcessor != null || (graphQuery != null && indexQuery != null)) ? 0 : startIdx; + int resultIdx = qryOffset; final List<AtlasVertex> entityVertices = new ArrayList<>(); @@ -154,13 +163,6 @@ public class EntitySearchProcessor extends SearchProcessor { while (idxQueryResult.hasNext()) { AtlasVertex vertex = idxQueryResult.next().getVertex(); - // skip non-entity vertices - if (!AtlasGraphUtilsV1.isEntityVertex(vertex)) { - LOG.warn("EntitySearchProcessor.execute(): ignoring non-entity vertex (id={})", vertex.getId()); // might cause duplicate entries in result - - continue; - } - entityVertices.add(vertex); } http://git-wip-us.apache.org/repos/asf/atlas/blob/e0fb7dc1/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java index 22d91e0..1b19a0e 100644 --- a/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java +++ b/repository/src/main/java/org/apache/atlas/discovery/FullTextSearchProcessor.java @@ -18,6 +18,7 @@ package org.apache.atlas.discovery; import org.apache.atlas.model.discovery.SearchParameters; +import org.apache.atlas.model.instance.AtlasEntity; import org.apache.atlas.repository.Constants; import org.apache.atlas.repository.graph.GraphHelper; import org.apache.atlas.repository.graphdb.AtlasIndexQuery; @@ -74,6 +75,10 @@ public class FullTextSearchProcessor extends SearchProcessor { } } + if (context.getSearchParameters().getExcludeDeletedEntities()) { + queryString.append(AND_STR).append("(ACTIVE)"); + } + queryString.append(")"); indexQuery = context.getGraph().indexQuery(Constants.FULLTEXT_INDEX, queryString.toString()); @@ -94,10 +99,16 @@ public class FullTextSearchProcessor extends SearchProcessor { } try { - final int startIdx = context.getSearchParameters().getOffset(); - final int limit = context.getSearchParameters().getLimit(); - int qryOffset = nextProcessor == null ? startIdx : 0; - int resultIdx = qryOffset; + final int startIdx = context.getSearchParameters().getOffset(); + final int limit = context.getSearchParameters().getLimit(); + final boolean activeOnly = context.getSearchParameters().getExcludeDeletedEntities(); + + // query to start at 0, even though startIdx can be higher - because few results in earlier retrieval could + // have been dropped: like vertices of non-entity or non-active-entity + // + // first 'startIdx' number of entries will be ignored + int qryOffset = 0; + int resultIdx = qryOffset; final List<AtlasVertex> entityVertices = new ArrayList<>(); @@ -121,8 +132,14 @@ public class FullTextSearchProcessor extends SearchProcessor { // skip non-entity vertices if (!AtlasGraphUtilsV1.isEntityVertex(vertex)) { - LOG.warn("FullTextSearchProcessor.execute(): ignoring non-entity vertex (id={})", vertex.getId()); // might cause duplicate entries in result + if (LOG.isDebugEnabled()) { + LOG.debug("FullTextSearchProcessor.execute(): ignoring non-entity vertex (id={})", vertex.getId()); + } + + continue; + } + if (activeOnly && AtlasGraphUtilsV1.getState(vertex) != AtlasEntity.Status.ACTIVE) { continue; } http://git-wip-us.apache.org/repos/asf/atlas/blob/e0fb7dc1/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java index 2e75dfe..7950127 100644 --- a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java +++ b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java @@ -181,7 +181,7 @@ public abstract class SearchProcessor { return ret; } - protected void constructTypeTestQuery(StringBuilder solrQuery, AtlasStructType type, Set<String> typeAndAllSubTypes) { + protected void constructTypeTestQuery(StringBuilder solrQuery, Set<String> typeAndAllSubTypes) { String typeAndSubtypesString = StringUtils.join(typeAndAllSubTypes, SPACE_STRING); if (CollectionUtils.isNotEmpty(typeAndAllSubTypes)) { @@ -193,14 +193,6 @@ public abstract class SearchProcessor { .append(typeAndSubtypesString) .append(")"); } - - if (type instanceof AtlasEntityType && context.getSearchParameters().getExcludeDeletedEntities()) { - if (solrQuery.length() > 0) { - solrQuery.append(AND_STR); - } - - solrQuery.append("v.\"").append(Constants.STATE_PROPERTY_KEY).append("\":ACTIVE"); - } } protected void constructFilterQuery(StringBuilder solrQuery, AtlasStructType type, FilterCriteria filterCriteria, Set<String> solrAttributes) { @@ -219,6 +211,14 @@ public abstract class SearchProcessor { } } + protected void constructStateTestQuery(StringBuilder solrQuery) { + if (solrQuery.length() > 0) { + solrQuery.append(AND_STR); + } + + solrQuery.append("v.\"").append(Constants.STATE_PROPERTY_KEY).append("\":ACTIVE"); + } + private String toSolrQuery(AtlasStructType type, FilterCriteria criteria, Set<String> solrAttributes, int level) { return toSolrQuery(type, criteria, solrAttributes, new StringBuilder(), level); }