This is an automated email from the ASF dual-hosted git repository. sarath pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/atlas.git
commit 4a4d0e486fd96ca81bdeec2d6be5be3975f16aac Author: Bolke de Bruin <[email protected]> AuthorDate: Thu Aug 22 11:58:24 2019 +0200 Use fulltext indices for dsl search Per janusgraph documentation https://docs.janusgraph.org/latest/index-parameters.html strings are indexed as text by default. Atlas uses string search which is suboptimal and leads to significant performance loss. This switches to use fulltext predicates when available which give a significant speedup. Signed-off-by: Ashutosh Mestry <[email protected]> (cherry picked from commit 8792f162dfea3d471c7a0f5672984462ee434fba) --- .../java/org/apache/atlas/query/GremlinClause.java | 7 ++++--- .../apache/atlas/query/GremlinQueryComposer.java | 10 +++++++++- .../atlas/query/GremlinQueryComposerTest.java | 23 ++++++++++++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/repository/src/main/java/org/apache/atlas/query/GremlinClause.java b/repository/src/main/java/org/apache/atlas/query/GremlinClause.java index ca8419a..55ccabd 100644 --- a/repository/src/main/java/org/apache/atlas/query/GremlinClause.java +++ b/repository/src/main/java/org/apache/atlas/query/GremlinClause.java @@ -43,9 +43,10 @@ enum GremlinClause { RANGE("range(%s, %s + %s)"), SELECT("select('%s')"), TO_LIST("toList()"), - TEXT_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textRegex(%s))"), - TEXT_PREFIX("has('%s', org.janusgraph.core.attribute.Text.textPrefix(%s))"), - TEXT_SUFFIX("has('%s', org.janusgraph.core.attribute.Text.textRegex(\".*\" + %s))"), + STRING_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textRegex(%s))"), + TEXT_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textContainsRegex(%s))"), + TEXT_PREFIX("has('%s', org.janusgraph.core.attribute.Text.textContainsPrefix(%s))"), + TEXT_SUFFIX("has('%s', org.janusgraph.core.attribute.Text.textContainsRegex(\".*\" + %s))"), TRAIT("outE('classifiedAs').has('__name', within('%s')).outV()"), ANY_TRAIT("or(has('__traitNames'), has('__propagatedTraitNames'))"), NO_TRAIT("and(hasNot('__traitNames'), hasNot('__propagatedTraitNames'))"), diff --git a/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java b/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java index e64a894..294dc00 100644 --- a/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java +++ b/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java @@ -171,6 +171,7 @@ public class GremlinQueryComposer { } String currentType = context.getActiveTypeName(); + IdentifierHelper.Info org = null; IdentifierHelper.Info lhsI = createInfo(lhs); if (!lhsI.isPrimitive()) { @@ -193,7 +194,14 @@ public class GremlinQueryComposer { rhs = addQuotesIfNecessary(lhsI, rhs); SearchParameters.Operator op = SearchParameters.Operator.fromString(operator); if (op == SearchParameters.Operator.LIKE) { - add(GremlinClause.TEXT_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs)); + final AtlasStructType.AtlasAttribute attribute = context.getActiveEntityType().getAttribute(lhsI.getAttributeName()); + final AtlasStructDef.AtlasAttributeDef.IndexType indexType = attribute.getAttributeDef().getIndexType(); + + if (indexType == AtlasStructDef.AtlasAttributeDef.IndexType.STRING) { + add(GremlinClause.STRING_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs)); + } else { + add(GremlinClause.TEXT_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs)); + } } else if (op == SearchParameters.Operator.IN) { add(GremlinClause.HAS_OPERATOR, getPropertyForClause(lhsI), "within", rhs); } else { diff --git a/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java b/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java index b73d427..ca32ffc 100644 --- a/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java +++ b/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java @@ -17,17 +17,23 @@ */ package org.apache.atlas.query; +import afu.org.checkerframework.checker.igj.qual.I; +import jnr.ffi.annotations.In; import org.apache.atlas.AtlasErrorCode; import org.apache.atlas.exception.AtlasBaseException; import org.apache.atlas.model.TypeCategory; +import org.apache.atlas.model.typedef.AtlasStructDef; import org.apache.atlas.query.antlr4.AtlasDSLParser; import org.apache.atlas.type.AtlasEntityType; +import org.apache.atlas.type.AtlasStructType; import org.apache.atlas.type.AtlasType; import org.apache.atlas.type.AtlasTypeRegistry; import org.apache.commons.lang.StringUtils; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import static org.mockito.Matchers.anyString; +import static org.mockito.Matchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; @@ -163,6 +169,8 @@ public class GremlinQueryComposerTest { verify("from DB where (name = \"Reporting\") select name, owner", getExpected(exSel, exMain)); verify("Table where Asset.name like \"Tab*\"", "g.V().has('__typeName', 'Table').has('Asset.__s_name', org.janusgraph.core.attribute.Text.textRegex(\"Tab.*\")).dedup().limit(25).toList()"); + verify("Table where owner like \"Tab*\"", + "g.V().has('__typeName', 'Table').has('Table.owner', org.janusgraph.core.attribute.Text.textContainsRegex(\"Tab.*\")).dedup().limit(25).toList()"); verify("from Table where (db.name = \"Reporting\")", "g.V().has('__typeName', 'Table').out('__Table.db').has('DB.name', eq(\"Reporting\")).dedup().in('__Table.db').dedup().limit(25).toList()"); } @@ -409,6 +417,21 @@ public class GremlinQueryComposerTest { } else { type = mock(AtlasEntityType.class); when(type.getTypeCategory()).thenReturn(TypeCategory.ENTITY); + + AtlasStructType.AtlasAttribute attr = mock(AtlasStructType.AtlasAttribute.class); + AtlasStructDef.AtlasAttributeDef def = mock(AtlasStructDef.AtlasAttributeDef.class); + when(def.getIndexType()).thenReturn(AtlasStructDef.AtlasAttributeDef.IndexType.DEFAULT); + when(attr.getAttributeDef()).thenReturn(def); + + AtlasStructType.AtlasAttribute attr_s = mock(AtlasStructType.AtlasAttribute.class); + AtlasStructDef.AtlasAttributeDef def_s = mock(AtlasStructDef.AtlasAttributeDef.class); + when(def_s.getIndexType()).thenReturn(AtlasStructDef.AtlasAttributeDef.IndexType.STRING); + + when(attr_s.getAttributeDef()).thenReturn(def_s); + + when(((AtlasEntityType) type).getAttribute(anyString())).thenReturn(attr); + when(((AtlasEntityType) type).getAttribute(eq("name"))).thenReturn(attr_s); + } if(typeName.equals("PIII")) {
