Repository: atlas Updated Branches: refs/heads/master c746a0505 -> 8253653bc
ATLAS-2353: Fix for ordering of elements when using select with groupBy Signed-off-by: Ashutosh Mestry <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/atlas/repo Commit: http://git-wip-us.apache.org/repos/asf/atlas/commit/8253653b Tree: http://git-wip-us.apache.org/repos/asf/atlas/tree/8253653b Diff: http://git-wip-us.apache.org/repos/asf/atlas/diff/8253653b Branch: refs/heads/master Commit: 8253653bcadd52e2658749df362cf78616a546e1 Parents: c746a05 Author: Ashutosh Mestry <[email protected]> Authored: Thu Jan 11 14:01:07 2018 -0800 Committer: Ashutosh Mestry <[email protected]> Committed: Thu Jan 11 14:03:30 2018 -0800 ---------------------------------------------------------------------- .../java/org/apache/atlas/query/AtlasDSL.java | 2 +- .../java/org/apache/atlas/query/DSLVisitor.java | 1 + .../org/apache/atlas/query/GremlinClause.java | 7 +- .../atlas/query/GremlinQueryComposer.java | 46 +++++--- .../atlas/query/SelectClauseComposer.java | 113 ++++++++++++------- .../org/apache/atlas/query/DSLQueriesTest.java | 19 ++-- .../atlas/query/GremlinQueryComposerTest.java | 8 +- 7 files changed, 128 insertions(+), 68 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/atlas/blob/8253653b/repository/src/main/java/org/apache/atlas/query/AtlasDSL.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/query/AtlasDSL.java b/repository/src/main/java/org/apache/atlas/query/AtlasDSL.java index 60c6606..b771447 100644 --- a/repository/src/main/java/org/apache/atlas/query/AtlasDSL.java +++ b/repository/src/main/java/org/apache/atlas/query/AtlasDSL.java @@ -158,7 +158,7 @@ public class AtlasDSL { } public boolean needTransformation() { - return (hasGroupBy && hasSelect && hasOrderBy) || (hasGroupBy && hasOrderBy) || hasSelect; + return (hasGroupBy && hasSelect && hasOrderBy) || hasSelect; } } } http://git-wip-us.apache.org/repos/asf/atlas/blob/8253653b/repository/src/main/java/org/apache/atlas/query/DSLVisitor.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/query/DSLVisitor.java b/repository/src/main/java/org/apache/atlas/query/DSLVisitor.java index 4085b8a..75be85f 100644 --- a/repository/src/main/java/org/apache/atlas/query/DSLVisitor.java +++ b/repository/src/main/java/org/apache/atlas/query/DSLVisitor.java @@ -115,6 +115,7 @@ public class DSLVisitor extends AtlasDSLParserBaseVisitor<Void> { } selectClauseComposer.setItems(items); + selectClauseComposer.setAttributes(items); selectClauseComposer.setLabels(labels); gremlinQueryComposer.addSelect(selectClauseComposer); } http://git-wip-us.apache.org/repos/asf/atlas/blob/8253653b/repository/src/main/java/org/apache/atlas/query/GremlinClause.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/query/GremlinClause.java b/repository/src/main/java/org/apache/atlas/query/GremlinClause.java index 4923563..5a4ab4c 100644 --- a/repository/src/main/java/org/apache/atlas/query/GremlinClause.java +++ b/repository/src/main/java/org/apache/atlas/query/GremlinClause.java @@ -51,7 +51,8 @@ enum GremlinClause { SELECT_FN("def f(r){ t=[[%s]]; %s r.each({t.add([%s])}); t.unique(); }; "), SELECT_ONLY_AGG_FN("def f(r){ t=[[%s]]; %s t.add([%s]); t;}; "), SELECT_ONLY_AGG_GRP_FN("def f(l){ t=[[%s]]; l.get(0).each({k,r -> L:{ %s t.add([%s]); } }); t; }; "), - SELECT_MULTI_ATTR_GRP_FN("def f(l){ t=[[%s]]; l.get(0).each({k,r -> L:{ %s r.each({t.add([%s])}) } }); t.unique(); }; "), + // Optional sorting required here + SELECT_MULTI_ATTR_GRP_FN("def f(l){ h=[[%s]]; t=[]; l.get(0).each({k,r -> L:{ %s r.each({t.add([%s])}) } }); h.plus(t.unique()%s); }; "), INLINE_ASSIGNMENT("def %s=%s;"), INLINE_LIST_RANGE("[%s..<%s]"), INLINE_COUNT("r.size()"), @@ -60,6 +61,10 @@ enum GremlinClause { INLINE_MIN("r.min({it.value('%s')}).value('%s')"), INLINE_GET_PROPERTY("it.value('%s')"), INLINE_TRANSFORM_CALL("f(%s)"), + INLINE_DEFAULT_SORT(".sort{a,b -> a[0] <=> b[0]}"), + // idx of the tuple field to be sorted on + INLINE_SORT_ASC(".sort{a,b -> a[%s] <=> b[%s]}"), + INLINE_SORT_DESC(".sort{a,b -> b[%s] <=> a[%s]}"), V("V()"), VALUE_MAP("valueMap(%s)"); http://git-wip-us.apache.org/repos/asf/atlas/blob/8253653b/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java b/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java index 6ccd44c..76f31a3 100644 --- a/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java +++ b/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java @@ -23,21 +23,26 @@ import org.apache.atlas.type.AtlasEntityType; import org.apache.atlas.type.AtlasType; import org.apache.atlas.type.AtlasTypeRegistry; import org.apache.commons.lang.StringUtils; -import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.inject.Inject; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.TimeZone; import java.util.stream.Collectors; import java.util.stream.Stream; public class GremlinQueryComposer { private static final Logger LOG = LoggerFactory.getLogger(GremlinQueryComposer.class); + private final String EMPTY_STRING = ""; private static final String ISO8601_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; private final int DEFAULT_QUERY_RESULT_LIMIT = 25; private final int DEFAULT_QUERY_RESULT_OFFSET = 0; @@ -179,7 +184,7 @@ public class GremlinQueryComposer { public void addSelect(SelectClauseComposer selectClauseComposer) { process(selectClauseComposer); if (!(queryMetadata.hasOrderBy() && queryMetadata.hasGroupBy())) { - addSelectTransformation(selectClauseComposer); + addSelectTransformation(selectClauseComposer, null, false); } this.context.setSelectClauseComposer(selectClauseComposer); } @@ -200,6 +205,7 @@ public class GremlinQueryComposer { context.addAlias(scc.getLabel(i), ia.getQualifiedName()); } + // Update the qualifiedNames and the assignment expressions if (scc.updateAsApplicable(i, ia.getQualifiedName())) { continue; } @@ -309,10 +315,7 @@ public class GremlinQueryComposer { IdentifierHelper.Advice ia = getAdvice(name); if (queryMetadata.hasSelect() && queryMetadata.hasGroupBy()) { - addOrderByClause(ia.getQualifiedName(), isDesc); - moveToLast(GremlinClause.GROUP_BY); - - addSelectTransformation(this.context.selectClauseComposer); + addSelectTransformation(this.context.selectClauseComposer, ia.getQualifiedName(), isDesc); } else if (queryMetadata.hasGroupBy()) { addOrderByClause(ia.getQualifiedName(), isDesc); moveToLast(GremlinClause.GROUP_BY); @@ -321,7 +324,9 @@ public class GremlinQueryComposer { } } - private void addSelectTransformation(final SelectClauseComposer selectClauseComposer) { + private void addSelectTransformation(final SelectClauseComposer selectClauseComposer, + final String orderByQualifiedAttrName, + final boolean isDesc) { GremlinClause fn; if (selectClauseComposer.isSelectNoop) { fn = GremlinClause.SELECT_NOOP_FN; @@ -335,10 +340,25 @@ public class GremlinQueryComposer { GremlinClause.SELECT_ONLY_AGG_FN : GremlinClause.SELECT_FN; } - queryClauses.add(0, fn, - selectClauseComposer.getLabelHeader(), - selectClauseComposer.hasAssignmentExpr() ? selectClauseComposer.getAssignmentExprString(): "", - selectClauseComposer.getItemsString()); + if (StringUtils.isEmpty(orderByQualifiedAttrName)) { + queryClauses.add(0, fn, + selectClauseComposer.getLabelHeader(), + selectClauseComposer.hasAssignmentExpr() ? selectClauseComposer.getAssignmentExprString(): EMPTY_STRING, + selectClauseComposer.getItemsString(), EMPTY_STRING); + } else { + int itemIdx = selectClauseComposer.getAttrIndex(orderByQualifiedAttrName); + GremlinClause sortClause = GremlinClause.INLINE_DEFAULT_SORT; + if (itemIdx != -1) { + sortClause = isDesc ? GremlinClause.INLINE_SORT_DESC : GremlinClause.INLINE_SORT_ASC; + } + String idxStr = String.valueOf(itemIdx); + queryClauses.add(0, fn, + selectClauseComposer.getLabelHeader(), + selectClauseComposer.hasAssignmentExpr() ? selectClauseComposer.getAssignmentExprString(): EMPTY_STRING, + selectClauseComposer.getItemsString(), + sortClause.get(idxStr, idxStr) + ); + } queryClauses.add(GremlinClause.INLINE_TRANSFORM_CALL); } http://git-wip-us.apache.org/repos/asf/atlas/blob/8253653b/repository/src/main/java/org/apache/atlas/query/SelectClauseComposer.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/query/SelectClauseComposer.java b/repository/src/main/java/org/apache/atlas/query/SelectClauseComposer.java index f3f63dd..b93e223 100644 --- a/repository/src/main/java/org/apache/atlas/query/SelectClauseComposer.java +++ b/repository/src/main/java/org/apache/atlas/query/SelectClauseComposer.java @@ -24,7 +24,10 @@ import java.util.Map; import java.util.StringJoiner; class SelectClauseComposer { + public boolean isSelectNoop; + private String[] labels; + private String[] attributes; // Qualified names private String[] items; private Map<String, String> itemAssignmentExprs; @@ -33,7 +36,6 @@ class SelectClauseComposer { private int maxIdx = -1; private int minIdx = -1; private int aggCount = 0; - public boolean isSelectNoop; public SelectClauseComposer() {} @@ -42,7 +44,7 @@ class SelectClauseComposer { } public void setItems(final String[] items) { - this.items = items; + this.items = Arrays.copyOf(items, items.length); } public boolean updateAsApplicable(int currentIndex, String qualifiedName) { @@ -59,11 +61,77 @@ class SelectClauseComposer { } else if (currentIndex == getSumIdx()) { ret = assign(currentIndex, "sum", qualifiedName, GremlinClause.INLINE_ASSIGNMENT, GremlinClause.INLINE_SUM); + } else { + attributes[currentIndex] = qualifiedName; } return ret; } + public String[] getAttributes() { + return attributes; + } + + public void setAttributes(final String[] attributes) { + this.attributes = Arrays.copyOf(attributes, attributes.length); + } + + public boolean assign(int i, String qualifiedName, GremlinClause clause) { + items[i] = clause.get(qualifiedName); + return true; + } + + public String[] getLabels() { + return labels; + } + + public void setLabels(final String[] labels) { + this.labels = labels; + } + + public boolean hasAssignmentExpr() { + return itemAssignmentExprs != null && !itemAssignmentExprs.isEmpty(); + } + + public boolean onlyAggregators() { + return aggCount > 0 && aggCount == items.length; + } + + public String getLabelHeader() { + return getJoinedQuotedStr(getLabels()); + } + + public String getItemsString() { + return String.join(",", getItems()); + } + + public String getAssignmentExprString(){ + return String.join(" ", itemAssignmentExprs.values()); + } + + public String getItem(int i) { + return items[i]; + } + + public String getAttribute(int i) { + return attributes[i]; + } + + public String getLabel(int i) { + return labels[i]; + } + + public int getAttrIndex(String attr) { + int ret = -1; + for (int i = 0; i < attributes.length; i++) { + if (attributes[i].equals(attr)) { + ret = i; + break; + } + } + return ret; + } + private boolean assign(String item, String assignExpr) { if (itemAssignmentExprs == null) { itemAssignmentExprs = new LinkedHashMap<>(); @@ -73,11 +141,6 @@ class SelectClauseComposer { return true; } - public boolean assign(int i, String qualifiedName, GremlinClause clause) { - items[i] = clause.get(qualifiedName); - return true; - } - private boolean assign(int i, String s, String p1, GremlinClause clause) { items[i] = s; return assign(items[i], clause.get(s, p1)); @@ -125,34 +188,6 @@ class SelectClauseComposer { aggCount++; } - public String[] getLabels() { - return labels; - } - - public void setLabels(final String[] labels) { - this.labels = labels; - } - - public boolean hasAssignmentExpr() { - return itemAssignmentExprs != null && !itemAssignmentExprs.isEmpty(); - } - - public boolean onlyAggregators() { - return aggCount > 0 && aggCount == items.length; - } - - public String getLabelHeader() { - return getJoinedQuotedStr(getLabels()); - } - - public String getItemsString() { - return String.join(",", getItems()); - } - - public String getAssignmentExprString(){ - return String.join(" ", itemAssignmentExprs.values()); - } - private String getJoinedQuotedStr(String[] elements) { StringJoiner joiner = new StringJoiner(","); Arrays.stream(elements) @@ -160,12 +195,4 @@ class SelectClauseComposer { .forEach(joiner::add); return joiner.toString(); } - - public String getItem(int i) { - return items[i]; - } - - public String getLabel(int i) { - return labels[i]; - } } http://git-wip-us.apache.org/repos/asf/atlas/blob/8253653b/repository/src/test/java/org/apache/atlas/query/DSLQueriesTest.java ---------------------------------------------------------------------- diff --git a/repository/src/test/java/org/apache/atlas/query/DSLQueriesTest.java b/repository/src/test/java/org/apache/atlas/query/DSLQueriesTest.java index a8e8393..f3afa34 100644 --- a/repository/src/test/java/org/apache/atlas/query/DSLQueriesTest.java +++ b/repository/src/test/java/org/apache/atlas/query/DSLQueriesTest.java @@ -392,13 +392,18 @@ public class DSLQueriesTest extends BasicTestSetup { .withExpectedValues(1) .withExpectedValues(1) .withExpectedValues(1) }, - // FIXME -// { "from hive_db groupby (owner, name) select Asset.owner, Asset.name, count()", -// new FieldValueValidator() -// .withFieldNames("Asset.owner", "Asset.name", "count()") -// .withExpectedValues("Jane BI", "Reporting", 1) -// .withExpectedValues("Tim ETL", "Logging", 1) -// .withExpectedValues("John ETL", "Sales", 1) }, + { "from hive_db groupby (owner) select owner, name orderby owner", + new FieldValueValidator() + .withFieldNames("owner", "name") + .withExpectedValues("Jane BI", "Reporting") + .withExpectedValues("John ETL", "Sales") + .withExpectedValues("Tim ETL", "Logging") }, + { "from hive_db groupby (owner) select Asset.owner, Asset.name, count()", + new FieldValueValidator() + .withFieldNames("Asset.owner", "Asset.name", "count()") + .withExpectedValues("Jane BI", "Reporting", 1) + .withExpectedValues("Tim ETL", "Logging", 1) + .withExpectedValues("John ETL", "Sales", 1) }, { "from hive_db groupby (owner) select count() ", new FieldValueValidator() .withFieldNames("count()"). http://git-wip-us.apache.org/repos/asf/atlas/blob/8253653b/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java ---------------------------------------------------------------------- diff --git a/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java b/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java index 6faf510..295509b 100644 --- a/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java +++ b/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java @@ -108,14 +108,16 @@ public class GremlinQueryComposerTest { public void groupByMin() { verify("from DB groupby (owner) select min(name) orderby name limit 2", "def f(l){ t=[['min(name)']]; l.get(0).each({k,r -> L:{ def min=r.min({it.value('DB.name')}).value('DB.name'); t.add([min]); } }); t; }; " + - "f(g.V().has('__typeName', 'DB').order().by('DB.name').group().by('DB.owner').limit(2).toList())"); + "f(g.V().has('__typeName', 'DB').group().by('DB.owner').limit(2).toList())"); } @Test public void groupByOrderBy() { verify("Table groupby(owner) select name, owner, clusterName orderby name", - "def f(l){ t=[['name','owner','clusterName']]; l.get(0).each({k,r -> L:{ r.each({t.add([it.value('Table.name'),it.value('Table.owner'),it.value('Table.clusterName')])}) } }); t.unique(); }; " + - "f(g.V().has('__typeName', 'Table').order().by('Table.name').group().by('Table.owner').limit(25).toList())"); + "def f(l){ h=[['name','owner','clusterName']]; t=[]; " + + "l.get(0).each({k,r -> L:{ r.each({t.add([it.value('Table.name'),it.value('Table.owner'),it.value('Table.clusterName')])}) } }); " + + "h.plus(t.unique().sort{a,b -> a[0] <=> b[0]}); }; " + + "f(g.V().has('__typeName', 'Table').group().by('Table.owner').limit(25).toList())"); } @Test
