Repository: incubator-atlas Updated Branches: refs/heads/master c572e5412 -> 40e2e37d4
ATLAS-1630: fix incorrect pagination of results in basic search (#3) Project: http://git-wip-us.apache.org/repos/asf/incubator-atlas/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-atlas/commit/40e2e37d Tree: http://git-wip-us.apache.org/repos/asf/incubator-atlas/tree/40e2e37d Diff: http://git-wip-us.apache.org/repos/asf/incubator-atlas/diff/40e2e37d Branch: refs/heads/master Commit: 40e2e37d4fafe34ea58149a549f2106b6bf617a3 Parents: c572e54 Author: Madhan Neethiraj <[email protected]> Authored: Sat Mar 4 15:30:45 2017 -0800 Committer: Madhan Neethiraj <[email protected]> Committed: Sun Mar 5 12:10:27 2017 -0800 ---------------------------------------------------------------------- .../atlas/discovery/EntityDiscoveryService.java | 117 ++++++++++++++----- .../atlas/util/AtlasGremlin2QueryProvider.java | 4 +- .../atlas/util/AtlasGremlinQueryProvider.java | 1 - 3 files changed, 86 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/40e2e37d/repository/src/main/java/org/apache/atlas/discovery/EntityDiscoveryService.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/discovery/EntityDiscoveryService.java b/repository/src/main/java/org/apache/atlas/discovery/EntityDiscoveryService.java index e1f0acd..881c5a3 100644 --- a/repository/src/main/java/org/apache/atlas/discovery/EntityDiscoveryService.java +++ b/repository/src/main/java/org/apache/atlas/discovery/EntityDiscoveryService.java @@ -37,6 +37,7 @@ import org.apache.atlas.query.SelectExpressionHelper; import org.apache.atlas.repository.Constants; import org.apache.atlas.repository.MetadataRepository; import org.apache.atlas.repository.graph.AtlasGraphProvider; +import org.apache.atlas.repository.graph.GraphHelper; import org.apache.atlas.repository.graphdb.AtlasGraph; import org.apache.atlas.repository.graphdb.AtlasIndexQuery; import org.apache.atlas.repository.graphdb.AtlasIndexQuery.Result; @@ -64,6 +65,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import static org.apache.atlas.AtlasErrorCode.DISCOVERY_QUERY_FAILED; import static org.apache.atlas.AtlasErrorCode.UNKNOWN_TYPENAME; @@ -166,9 +168,9 @@ public class EntityDiscoveryService implements AtlasDiscoveryService { LOG.debug("Executing basic search query: {} with type: {} and classification: {}", query, typeName, classification); } - Map<String, Object> bindings = new HashMap<>(); - QueryParams params = validateSearchParams(limit, offset); - String basicQuery = "g.V()"; + final QueryParams params = validateSearchParams(limit, offset); + Set<String> typeNames = null; + Set<String> classificationNames = null; if (StringUtils.isNotEmpty(typeName)) { AtlasEntityType entityType = typeRegistry.getEntityTypeByName(typeName); @@ -177,9 +179,7 @@ public class EntityDiscoveryService implements AtlasDiscoveryService { throw new AtlasBaseException(UNKNOWN_TYPENAME, typeName); } - bindings.put("typeNames", entityType.getTypeAndAllSubTypes()); - - basicQuery += gremlinQueryProvider.getQuery(AtlasGremlinQuery.BASIC_SEARCH_TYPE_FILTER); + typeNames = entityType.getTypeAndAllSubTypes(); ret.setType(typeName); } @@ -191,50 +191,103 @@ public class EntityDiscoveryService implements AtlasDiscoveryService { throw new AtlasBaseException(CLASSIFICATION_NOT_FOUND, classification); } - bindings.put("traitNames", classificationType.getTypeAndAllSubTypes()); - - basicQuery += gremlinQueryProvider.getQuery(AtlasGremlinQuery.BASIC_SEARCH_CLASSIFICATION_FILTER); + classificationNames = classificationType.getTypeAndAllSubTypes(); ret.setClassification(classification); } + // if query was provided, perform indexQuery and filter for typeName & classification in memory; this approach + // results in a faster and accurate results than using CONTAINS/CONTAINS_PREFIX filter on entityText property if (StringUtils.isNotEmpty(query)) { - bindings.put("queryStr", query); + final String idxQuery = String.format("v.\"%s\":(%s)", Constants.ENTITY_TEXT_PROPERTY_KEY, query); + final Iterator<Result<?,?>> qryResult = graph.indexQuery(Constants.FULLTEXT_INDEX, idxQuery).vertices(); + final int startIdx = params.offset(); + final int resultSize = params.limit(); - basicQuery += gremlinQueryProvider.getQuery(AtlasGremlinQuery.BASIC_SEARCH_QUERY_FILTER); + int resultIdx = 0; - ret.setQueryText(query); - } + while (qryResult.hasNext()) { + AtlasVertex<?,?> vertex = qryResult.next().getVertex(); - bindings.put("offset", params.offset()); - bindings.put("limit", params.limit()); + String vertexTypeName = GraphHelper.getTypeName(vertex); - basicQuery += gremlinQueryProvider.getQuery(AtlasGremlinQuery.TO_RANGE_LIST); + // skip non-entity vertices + if (StringUtils.isEmpty(vertexTypeName) || StringUtils.isEmpty(GraphHelper.getGuid(vertex))) { + continue; + } - ScriptEngine scriptEngine = graph.getGremlinScriptEngine(); + if (typeNames != null && !typeNames.contains(vertexTypeName)) { + continue; + } - try { - Object result = graph.executeGremlinScript(scriptEngine, bindings, basicQuery, false); + if (classificationNames != null) { + List<String> traitNames = GraphHelper.getTraitNames(vertex); - if (result instanceof List && CollectionUtils.isNotEmpty((List) result)) { - List queryResult = (List) result; - Object firstElement = queryResult.get(0); + if (CollectionUtils.isEmpty(traitNames) || + !CollectionUtils.containsAny(classificationNames, traitNames)) { + continue; + } + } - if (firstElement instanceof AtlasVertex) { - for (Object element : queryResult) { - if (element instanceof AtlasVertex) { - ret.addEntity(entityRetriever.toAtlasEntityHeader((AtlasVertex) element)); + resultIdx++; - } else { - LOG.warn("searchUsingBasicQuery({}): expected an AtlasVertex; found unexpected entry in result {}", basicQuery, element); + if (resultIdx <= startIdx) { + continue; + } + + AtlasEntityHeader header = entityRetriever.toAtlasEntityHeader(vertex); + + ret.addEntity(header); + + if (ret.getEntities().size() == resultSize) { + break; + } + } + } else { + final Map<String, Object> bindings = new HashMap<>(); + String basicQuery = "g.V()"; + + if (typeNames != null) { + bindings.put("typeNames", typeNames); + + basicQuery += gremlinQueryProvider.getQuery(AtlasGremlinQuery.BASIC_SEARCH_TYPE_FILTER); + } + + if (classificationNames != null) { + bindings.put("traitNames", classificationNames); + + basicQuery += gremlinQueryProvider.getQuery(AtlasGremlinQuery.BASIC_SEARCH_CLASSIFICATION_FILTER); + } + + bindings.put("startIdx", params.offset()); + bindings.put("endIdx", params.offset() + params.limit()); + + basicQuery += gremlinQueryProvider.getQuery(AtlasGremlinQuery.TO_RANGE_LIST); + + ScriptEngine scriptEngine = graph.getGremlinScriptEngine(); + + try { + Object result = graph.executeGremlinScript(scriptEngine, bindings, basicQuery, false); + + if (result instanceof List && CollectionUtils.isNotEmpty((List) result)) { + List queryResult = (List) result; + Object firstElement = queryResult.get(0); + + if (firstElement instanceof AtlasVertex) { + for (Object element : queryResult) { + if (element instanceof AtlasVertex) { + ret.addEntity(entityRetriever.toAtlasEntityHeader((AtlasVertex) element)); + } else { + LOG.warn("searchUsingBasicQuery({}): expected an AtlasVertex; found unexpected entry in result {}", basicQuery, element); + } } } } + } catch (ScriptException e) { + throw new AtlasBaseException(DISCOVERY_QUERY_FAILED, basicQuery); + } finally { + graph.releaseGremlinScriptEngine(scriptEngine); } - } catch (ScriptException e) { - throw new AtlasBaseException(DISCOVERY_QUERY_FAILED, basicQuery); - } finally { - graph.releaseGremlinScriptEngine(scriptEngine); } return ret; http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/40e2e37d/repository/src/main/java/org/apache/atlas/util/AtlasGremlin2QueryProvider.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/util/AtlasGremlin2QueryProvider.java b/repository/src/main/java/org/apache/atlas/util/AtlasGremlin2QueryProvider.java index 8855246..9acc1a9 100644 --- a/repository/src/main/java/org/apache/atlas/util/AtlasGremlin2QueryProvider.java +++ b/repository/src/main/java/org/apache/atlas/util/AtlasGremlin2QueryProvider.java @@ -65,14 +65,12 @@ public class AtlasGremlin2QueryProvider extends AtlasGremlinQueryProvider { "(it.object.'__superTypeNames'.contains('DataSet')) : false)})." + "path().toList()"; - case BASIC_SEARCH_QUERY_FILTER: - return ".has('entityText', com.thinkaurelius.titan.core.attribute.Text.CONTAINS, queryStr)"; case BASIC_SEARCH_TYPE_FILTER: return ".has('__typeName', T.in, typeNames)"; case BASIC_SEARCH_CLASSIFICATION_FILTER: return ".has('__traitNames', T.in, traitNames)"; case TO_RANGE_LIST: - return " [offset..<limit].toList()"; + return " [startIdx..<endIdx].toList()"; } // Should never reach this point return null; http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/40e2e37d/repository/src/main/java/org/apache/atlas/util/AtlasGremlinQueryProvider.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/util/AtlasGremlinQueryProvider.java b/repository/src/main/java/org/apache/atlas/util/AtlasGremlinQueryProvider.java index ad8f073..633fad0 100644 --- a/repository/src/main/java/org/apache/atlas/util/AtlasGremlinQueryProvider.java +++ b/repository/src/main/java/org/apache/atlas/util/AtlasGremlinQueryProvider.java @@ -56,7 +56,6 @@ public abstract class AtlasGremlinQueryProvider { PARTIAL_LINEAGE, // Discovery Queries - BASIC_SEARCH_QUERY_FILTER, BASIC_SEARCH_TYPE_FILTER, BASIC_SEARCH_CLASSIFICATION_FILTER, TO_RANGE_LIST
