Repository: incubator-atlas Updated Branches: refs/heads/master 53574720c -> 574da752d
ATLAS-435 Add ORDER BY and Limit to search DSL (neerugupta via sumasai) Project: http://git-wip-us.apache.org/repos/asf/incubator-atlas/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-atlas/commit/574da752 Tree: http://git-wip-us.apache.org/repos/asf/incubator-atlas/tree/574da752 Diff: http://git-wip-us.apache.org/repos/asf/incubator-atlas/diff/574da752 Branch: refs/heads/master Commit: 574da752d81a49c3200ef66e2b71c560f06b33c7 Parents: 5357472 Author: Suma Shivaprasad <[email protected]> Authored: Thu Apr 28 21:30:11 2016 -0700 Committer: Suma Shivaprasad <[email protected]> Committed: Thu Apr 28 21:30:11 2016 -0700 ---------------------------------------------------------------------- docs/src/site/twiki/Search.twiki | 18 +- release-log.txt | 1 + .../org/apache/atlas/query/Expressions.scala | 30 ++ .../apache/atlas/query/GremlinEvaluator.scala | 23 +- .../org/apache/atlas/query/GremlinQuery.scala | 70 +++- .../org/apache/atlas/query/QueryParser.scala | 81 ++++- .../scala/org/apache/atlas/query/Resolver.scala | 8 + .../GraphBackedDiscoveryServiceTest.java | 318 +++++++++++++++++++ 8 files changed, 513 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/docs/src/site/twiki/Search.twiki ---------------------------------------------------------------------- diff --git a/docs/src/site/twiki/Search.twiki b/docs/src/site/twiki/Search.twiki index 3a1d00f..58c9238 100644 --- a/docs/src/site/twiki/Search.twiki +++ b/docs/src/site/twiki/Search.twiki @@ -13,7 +13,7 @@ queryWithPath: query ~ opt(WITHPATH) query: rep1sep(singleQuery, opt(COMMA)) -singleQuery: singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) +singleQuery: singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ~ opt(orderby) ~ opt(limitOffset) singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression) | WHERE ~ (expr ^? notIdExpression) | @@ -22,6 +22,14 @@ singleQrySrc = FROM ~ fromSrc ~ opt(WHERE) ~ opt(expr ^? notIdExpression) | fromSrc: identifier ~ AS ~ alias | identifier +orderby: ORDERBY ~ order ~ opt (sortOrder) + +limitOffset: LIMIT ~ lmt ~ opt (offset) + +offset: OFFSET ~ offsetValue + +sortOrder = ASC | DESC + loopExpression: LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~ opt(AS ~> alias) selectClause: SELECT ~ rep1sep(selectExpression, COMMA) @@ -79,6 +87,9 @@ Language Notes: * The _!WithPath_ clause can be used with transitive closure queries to retrieve the Path that connects the two related Entities. (We also provide a higher level interface for Closure Queries see scaladoc for 'org.apache.atlas.query.ClosureQuery') + * ORDERBY is optional. Orderby clause should be specified in single quote ('). When order by clause is specified case insensitive sorting is done in ascending order. + For sorting in descending order specify 'DESC' after order by clause. If no order by is specified then no default sorting is applied. + * LIMIT is optional. It limits the maximum number of objects to be fetched starting from specified optional offset. If no offset is specified count starts from beginning. * There are couple of Predicate functions different from SQL: * _is_ or _isa_can be used to filter Entities that have a particular Trait. * _has_ can be used to filter Entities that have a value for a particular Attribute. @@ -89,6 +100,11 @@ Language Notes: * from DB * DB where name="Reporting" select name, owner + * DB where name="Reporting" select name, owner orderby 'name' + * DB where name="Reporting" select name limit 10 + * DB where name="Reporting" select name, owner limit 10 offset 0 + * DB where name="Reporting" select name, owner orderby 'name' limit 10 offset 5 + * DB where name="Reporting" select name, owner orderby 'name' desc limit 10 offset 5 * DB has name * DB is !JdbcAccess * Column where Column isa PII http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index 10cf434..3cf823f 100644 --- a/release-log.txt +++ b/release-log.txt @@ -17,6 +17,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags) ALL CHANGES: +ATLAS-435 Add ORDER BY and Limit to search DSL (neerugupta via sumasai) ATLAS-543 Entity Instance requests should not require ID element for new Entities (harishjp via shwethags) ATLAS-681 update committer/ppmc members in the pom.xml (sneethiraj via shwethags) ATLAS-616 Resolve OOM - Zookeeper throws exceptions when trying to fire DSL queries at Atlas at large scale. (yhemanth via sumasai) http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/main/scala/org/apache/atlas/query/Expressions.scala ---------------------------------------------------------------------- diff --git a/repository/src/main/scala/org/apache/atlas/query/Expressions.scala b/repository/src/main/scala/org/apache/atlas/query/Expressions.scala index a5dfa9f..2405750 100755 --- a/repository/src/main/scala/org/apache/atlas/query/Expressions.scala +++ b/repository/src/main/scala/org/apache/atlas/query/Expressions.scala @@ -330,6 +330,10 @@ object Expressions { def instance() = new InstanceExpression(this) def path() = new PathExpression(this) + + def limit(lmt: Literal[Integer], offset : Literal[Integer]) = new LimitExpression(this, lmt, offset) + + def order(odr: String, asc: Boolean) = new OrderExpression(this, odr, asc) } trait BinaryNode { @@ -766,4 +770,30 @@ object Expressions { override def toString = s"$child withPath" } + + case class LimitExpression(child: Expression, limit: Literal[Integer], offset: Literal[Integer]) extends Expression with UnaryNode { + + override def toString = s"$child limit $limit offset $offset " + + lazy val dataType = { + if (!resolved) { + throw new UnresolvedException(this, + s"datatype. Can not resolve due to unresolved children") + } + child.dataType + } + } + + case class OrderExpression(child: Expression, odr: String, asc: Boolean) extends Expression with UnaryNode { + + override def toString = s"$child order $odr asc $asc" + + lazy val dataType = { + if (!resolved) { + throw new UnresolvedException(this, + s"datatype. Can not resolve due to unresolved children") + } + child.dataType + } + } } http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/main/scala/org/apache/atlas/query/GremlinEvaluator.scala ---------------------------------------------------------------------- diff --git a/repository/src/main/scala/org/apache/atlas/query/GremlinEvaluator.scala b/repository/src/main/scala/org/apache/atlas/query/GremlinEvaluator.scala index edb190d..7de03c3 100755 --- a/repository/src/main/scala/org/apache/atlas/query/GremlinEvaluator.scala +++ b/repository/src/main/scala/org/apache/atlas/query/GremlinEvaluator.scala @@ -19,7 +19,7 @@ package org.apache.atlas.query import javax.script.{Bindings, ScriptEngine, ScriptEngineManager} - +import org.apache.atlas.query.Expressions._ import com.thinkaurelius.titan.core.TitanGraph import com.tinkerpop.pipes.util.structures.Row import org.apache.atlas.query.TypeUtils.ResultWithPathStruct @@ -27,8 +27,8 @@ import org.apache.atlas.typesystem.json._ import org.apache.atlas.typesystem.types._ import org.json4s._ import org.json4s.native.Serialization._ - import scala.language.existentials +import org.apache.atlas.query.Expressions._ case class GremlinQueryResult(query: String, resultDataType: IDataType[_], @@ -82,7 +82,6 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS val rType = qry.expr.dataType val oType = if (qry.isPathExpresion) qry.expr.children(0).dataType else rType val rawRes = engine.eval(qry.queryStr, bindings) - if (!qry.hasSelectList) { val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { v => val iV = instanceObject(v) @@ -95,14 +94,16 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS val rows = rawRes.asInstanceOf[java.util.List[AnyRef]].map { r => val rV = instanceObject(r).asInstanceOf[Row[java.util.List[AnyRef]]] val sInstance = sType.createInstance() - val selExpr = - (if (qry.isPathExpresion) qry.expr.children(0) else qry.expr). - asInstanceOf[Expressions.SelectExpression] - selExpr.selectListWithAlias.foreach { aE => - val cName = aE.alias - val (src, idx) = qry.resultMaping(cName) - val v = rV.getColumn(src).get(idx) - sInstance.set(cName, persistenceStrategy.constructInstance(aE.dataType, v)) + val selObj = SelectExpressionHelper.extractSelectExpression(qry.expr) + if (selObj.isDefined) + { + val selExpr = selObj.get.asInstanceOf[Expressions.SelectExpression] + selExpr.selectListWithAlias.foreach { aE => + val cName = aE.alias + val (src, idx) = qry.resultMaping(cName) + val v = rV.getColumn(src).get(idx) + sInstance.set(cName, persistenceStrategy.constructInstance(aE.dataType, v)) + } } addPathStruct(r, sInstance) } http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala ---------------------------------------------------------------------- diff --git a/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala b/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala index f1590a8..73981c0 100755 --- a/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala +++ b/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala @@ -35,6 +35,7 @@ case class GremlinQuery(expr: Expression, queryStr: String, resultMaping: Map[St def hasSelectList = resultMaping != null def isPathExpresion = expr.isInstanceOf[PathExpression] + } trait SelectExpressionHandling { @@ -122,7 +123,6 @@ trait SelectExpressionHandling { } m.toMap } - } class GremlinTranslationException(expr: Expression, reason: String) extends @@ -186,7 +186,8 @@ class GremlinTranslator(expr: Expression, } def traitClauseWithInstanceForTop(topE : Expression) : PartialFunction[Expression, Expression] = { - case te : TraitExpression if (te fastEquals topE) => { +// This topE check prevented the comparison of trait expression when it is a child. Like trait as t limit 2 + case te : TraitExpression => { val theTrait = te.as("theTrait") val theInstance = theTrait.traitInstance().as("theInstance") val outE = @@ -202,6 +203,7 @@ class GremlinTranslator(expr: Expression, stats.last } + private def genQuery(expr: Expression, inSelect: Boolean): String = expr match { case ClassExpression(clsName) => typeTestExpression(clsName) @@ -324,12 +326,26 @@ class GremlinTranslator(expr: Expression, case pe@PathExpression(child) => { s"${genQuery(child, inSelect)}.path" } + case order@OrderExpression(child, odr, asc) => { + var orderby = "" + asc match { + //builds a closure comparison function based on provided order by clause in DSL. This will be used to sort the results by gremlin order pipe. + //Ordering is case insensitive. + case false=> orderby = s"order{it.b.getProperty('$odr').toLowerCase() <=> it.a.getProperty('$odr').toLowerCase()}"//descending + case _ => orderby = s"order{it.a.getProperty('$odr').toLowerCase() <=> it.b.getProperty('$odr').toLowerCase()}" + + } + s"""${genQuery(child, inSelect)}.$orderby""" + } + case limitOffset@LimitExpression(child, limit, offset) => { + val totalResultRows = limit.value + offset.value + s"""${genQuery(child, inSelect)} [$offset..<$totalResultRows]""" + } case x => throw new GremlinTranslationException(x, "expression not yet supported") } def genFullQuery(expr: Expression): String = { var q = genQuery(expr, false) - if(gPersistenceBehavior.addGraphVertexPrefix(preStatements)) { q = s"g.V.$q" } @@ -354,21 +370,48 @@ class GremlinTranslator(expr: Expression, e1 = e1.transformUp(addAliasToLoopInput()) e1 = e1.transformUp(instanceClauseToTop(e1)) e1 = e1.transformUp(traitClauseWithInstanceForTop(e1)) - - e1 match { - case e1: SelectExpression => { - val rMap = buildResultMapping(e1) + + //Following code extracts the select expressions from expression tree. + + val se = SelectExpressionHelper.extractSelectExpression(e1) + if (se.isDefined) + { + val rMap = buildResultMapping(se.get) GremlinQuery(e1, genFullQuery(e1), rMap) - } - case pe@PathExpression(se@SelectExpression(child, selectList)) => { - val rMap = buildResultMapping(se) - GremlinQuery(e1, genFullQuery(e1), rMap) - } - case e1 => GremlinQuery(e1, genFullQuery(e1), null) + } + else + { + GremlinQuery(e1, genFullQuery(e1), null) + } } } + object SelectExpressionHelper { + /** + * This method extracts the child select expression from parent expression + */ + def extractSelectExpression(child: Expression): Option[SelectExpression] = { + child match { + case se@SelectExpression(child, selectList) =>{ + Some(se) + } + case limit@LimitExpression(child, lmt, offset) => { + extractSelectExpression(child) + } + case order@OrderExpression(child, odr, odrBy) => { + extractSelectExpression(child) + } + case path@PathExpression(child) => { + extractSelectExpression(child) + } + case _ => { + None + } + + } + } + } /* * TODO * Translation Issues: @@ -379,4 +422,3 @@ class GremlinTranslator(expr: Expression, * The solution is to to do predicate pushdown and apply the filter immediately on top of the referred Expression. */ -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/main/scala/org/apache/atlas/query/QueryParser.scala ---------------------------------------------------------------------- diff --git a/repository/src/main/scala/org/apache/atlas/query/QueryParser.scala b/repository/src/main/scala/org/apache/atlas/query/QueryParser.scala index b6bbbd3..60b57d9 100755 --- a/repository/src/main/scala/org/apache/atlas/query/QueryParser.scala +++ b/repository/src/main/scala/org/apache/atlas/query/QueryParser.scala @@ -64,10 +64,14 @@ trait QueryKeywords { protected val AS = Keyword("as") protected val TIMES = Keyword("times") protected val WITHPATH = Keyword("withPath") + protected val LIMIT = Keyword("limit") + protected val OFFSET = Keyword("offset") + protected val ORDERBY = Keyword("orderby") } trait ExpressionUtils { + protected val DESC = "desc" def loop(input: Expression, l: (Expression, Option[Literal[Integer]], Option[String])) = l match { case (c, None, None) => input.loop(c) case (c, t, None) => input.loop(c, t.get) @@ -85,6 +89,14 @@ trait ExpressionUtils { input.select(selList: _*) } + def limit(input: Expression, lmt: Literal[Integer], offset: Literal[Integer]) = { + input.limit(lmt, offset) + } + + def order(input: Expression, odr: String, asc: Boolean) = { + input.order(odr, asc) + } + def leftmostId(e: Expression) = { var le: IdExpression = null e.traverseUp { case i: IdExpression if le == null => le = i} @@ -106,7 +118,10 @@ trait ExpressionUtils { sngQuery2.transformUp(replaceIdWithField(leftSrcId, snglQuery1.field(leftSrcId.name))) } } - +/** + * Query parser is used to parse the DSL query. It uses scala PackratParsers and pattern matching to extract the expressions. + * It builds up a expression tree. + */ object QueryParser extends StandardTokenParsers with QueryKeywords with ExpressionUtils with PackratParsers { import scala.language.higherKinds @@ -137,12 +152,33 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi case h :: t => t.foldLeft(h)(merge(_, _)) } } - - def singleQuery = singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ^^ { - case s ~ None ~ None => s - case s ~ l ~ None => loop(s, l.get) - case s ~ l ~ sel if l.isDefined => select(loop(s, l.get), sel.get) - case s ~ None ~ sel => select(s, sel.get) + /** + * A singleQuery can have the following forms: + * 1. SrcQuery [select] [orderby desc] [Limit x offset y] -> source query followed by optional select statement followed by optional order by followed by optional limit + * eg: Select "hive_db where hive_db has name orderby 'hive_db.owner' limit 2 offset 1" + * @return + */ + def singleQuery = singleQrySrc ~ opt(loopExpression) ~ opt(selectClause) ~ opt(orderby) ~ opt(limitOffset) ^^ { + case s ~ l ~ sel ~ odr ~ lmtoff => { + var expressiontree = s + if (l.isDefined) //Note: The order of if statements is important. + { + expressiontree = loop(expressiontree, l.get); + } + if (odr.isDefined) + { + expressiontree = order(expressiontree, odr.get._1, odr.get._2) + } + if (lmtoff.isDefined) + { + expressiontree = limit(expressiontree, int (lmtoff.get._1), int (lmtoff.get._2)) + } + if (sel.isDefined) + { + expressiontree = select(expressiontree, sel.get) + } + expressiontree + } } /** @@ -182,7 +218,25 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi def fromSrc = identifier ~ AS ~ alias ^^ { case s ~ a ~ al => s.as(al)} | identifier - + def orderby = ORDERBY ~ order ~ opt (asce) ^^ { + case o ~ odr ~ None => (odr, true) + case o ~ odr ~ asc => (odr, asc.get) + } + + def limitOffset = LIMIT ~ lmt ~ opt (offset) ^^ { + case l ~ lt ~ None => (lt, 0) + case l ~ lt ~ of => (lt, of.get) + } + + def offset = OFFSET ~ ofset ^^ { + case offset ~ of => of + } + + def asce = asc ^^ { + case DESC => false + case _ => true + } + def loopExpression: Parser[(Expression, Option[Literal[Integer]], Option[String])] = LOOP ~ (LPAREN ~> query <~ RPAREN) ~ opt(intConstant <~ TIMES) ~ opt(AS ~> alias) ^^ { case l ~ e ~ None ~ a => (e, None, a) @@ -192,7 +246,6 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi def selectClause: Parser[List[(Expression, Option[String])]] = SELECT ~ rep1sep(selectExpression, COMMA) ^^ { case s ~ cs => cs } - def selectExpression: Parser[(Expression, Option[String])] = expr ~ opt(AS ~> alias) ^^ { case e ~ a => (e, a) } @@ -239,7 +292,15 @@ object QueryParser extends StandardTokenParsers with QueryKeywords with Expressi } def alias = ident | stringLit - + + def lmt = intConstant + + def ofset = intConstant + + def order = ident | stringLit + + def asc = ident | stringLit + def literal = booleanConstant ^^ { boolean(_) } | http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/main/scala/org/apache/atlas/query/Resolver.scala ---------------------------------------------------------------------- diff --git a/repository/src/main/scala/org/apache/atlas/query/Resolver.scala b/repository/src/main/scala/org/apache/atlas/query/Resolver.scala index c7e1e81..5fc9400 100755 --- a/repository/src/main/scala/org/apache/atlas/query/Resolver.scala +++ b/repository/src/main/scala/org/apache/atlas/query/Resolver.scala @@ -90,6 +90,14 @@ class Resolver(srcExpr: Option[Expression] = None, aliases: Map[String, Expressi case l@LoopExpression(inputExpr, loopExpr, t) if inputExpr.resolved => { val r = new Resolver(Some(inputExpr), inputExpr.namedExpressions, true) return new LoopExpression(inputExpr, loopExpr.transformUp(r), t) + } + case lmt@LimitExpression(child, limit, offset) => { + val r = new Resolver(Some(child), child.namedExpressions) + return new LimitExpression(child.transformUp(r), limit, offset) + } + case order@OrderExpression(child, odr, asc) => { + val r = new Resolver(Some(child), child.namedExpressions) + return new OrderExpression(child.transformUp(r), odr, asc) } case x => x } http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/574da752/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java ---------------------------------------------------------------------- diff --git a/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java b/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java index 06ac492..9cc501f 100755 --- a/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java +++ b/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java @@ -19,6 +19,7 @@ package org.apache.atlas.discovery; import com.google.common.collect.ImmutableSet; + import org.apache.atlas.BaseHiveRepositoryTest; import org.apache.atlas.RepositoryMetadataModule; import org.apache.atlas.RequestContext; @@ -45,8 +46,10 @@ import org.testng.annotations.Guice; import org.testng.annotations.Test; import javax.inject.Inject; + import java.util.ArrayList; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.Map; @@ -248,6 +251,297 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest { }; } + @DataProvider(name = "dslLimitQueriesProvider") + private Object[][] createDSLQueriesWithLimit() { + return new Object[][]{ + {"hive_column limit 10 ", 10}, + {"hive_column select hive_column.name limit 10 ", 10}, + {"hive_column select hive_column.name withPath", 29}, + {"hive_column select hive_column.name limit 10 withPath", 10}, + {"from hive_db", 3}, + {"from hive_db limit 2", 2}, + {"from hive_db limit 2 offset 0", 2}, + {"from hive_db limit 2 offset 1", 2}, + {"from hive_db limit 3 offset 1", 2}, + {"hive_db", 3}, + {"hive_db where hive_db.name=\"Reporting\"", 1}, + {"hive_db where hive_db.name=\"Reporting\" limit 10 ", 1}, + {"hive_db hive_db.name = \"Reporting\"", 1}, + {"hive_db where hive_db.name=\"Reporting\" select name, owner", 1}, + {"hive_db has name", 3}, + {"hive_db has name limit 2 offset 0", 2}, + {"hive_db has name limit 2 offset 1", 2}, + {"hive_db has name limit 10 offset 1", 2}, + {"hive_db has name limit 10 offset 0", 3}, + {"hive_db, hive_table", 8}, + {"hive_db, hive_table limit 5", 5}, + {"hive_db, hive_table limit 5 offset 0", 5}, + {"hive_db, hive_table limit 5 offset 5", 3}, + + {"View is JdbcAccess", 2}, + {"View is JdbcAccess limit 1", 1}, + {"View is JdbcAccess limit 2 offset 1", 1}, + {"hive_db as db1, hive_table where db1.name = \"Reporting\"", 0}, //Not working - ATLAS-145 + + + {"from hive_table", 8}, + {"from hive_table limit 5", 5}, + {"from hive_table limit 5 offset 5", 3}, + + {"hive_table", 8}, + {"hive_table limit 5", 5}, + {"hive_table limit 5 offset 5", 3}, + + {"hive_table isa Dimension", 3}, + {"hive_table isa Dimension limit 2", 2}, + {"hive_table isa Dimension limit 2 offset 0", 2}, + {"hive_table isa Dimension limit 2 offset 1", 2}, + {"hive_table isa Dimension limit 3 offset 1", 2}, + + {"hive_column where hive_column isa PII", 6}, + {"hive_column where hive_column isa PII limit 5", 5}, + {"hive_column where hive_column isa PII limit 5 offset 1", 5}, + {"hive_column where hive_column isa PII limit 5 offset 5", 1}, + + + {"View is Dimension" , 2}, + {"View is Dimension limit 1" , 1}, + {"View is Dimension limit 1 offset 1" , 1}, + {"View is Dimension limit 10 offset 1" , 1}, + + {"hive_column select hive_column.name", 29}, + {"hive_column select hive_column.name limit 5", 5}, + {"hive_column select hive_column.name limit 5 offset 28", 1}, + + {"hive_column select name", 29}, + {"hive_column select name limit 5", 5}, + {"hive_column select name limit 5 offset 28 ", 1}, + + {"hive_column where hive_column.name=\"customer_id\"", 4}, + {"hive_column where hive_column.name=\"customer_id\" limit 2", 2}, + {"hive_column where hive_column.name=\"customer_id\" limit 2 offset 1", 2}, + {"hive_column where hive_column.name=\"customer_id\" limit 10 offset 3", 1}, + + {"from hive_table select hive_table.name", 8}, + {"from hive_table select hive_table.name limit 5", 5}, + {"from hive_table select hive_table.name limit 5 offset 5", 3}, + + {"hive_db where (name = \"Reporting\")", 1}, + {"hive_db where (name = \"Reporting\") limit 10", 1}, + {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1", 1}, + {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1 limit 10", 1}, + {"hive_db where hive_db is JdbcAccess", 0}, //Not supposed to work + {"hive_db hive_table", 8}, + {"hive_db hive_table limit 5", 5}, + {"hive_db hive_table limit 5 offset 5", 3}, + {"hive_db where hive_db has name", 3}, + {"hive_db where hive_db has name limit 5", 3}, + {"hive_db where hive_db has name limit 2 offset 0", 2}, + {"hive_db where hive_db has name limit 2 offset 1", 2}, + + {"hive_db as db1 hive_table where (db1.name = \"Reporting\")", 0}, //Not working -> ATLAS-145 + {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 ", 1}, + {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 limit 10", 1}, + {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 limit 10 offset 1", 0}, + {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 limit 10 offset 0", 1}, + + {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 ", 1}, + {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 limit 10 ", 1}, + {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 limit 10 offset 0", 1}, + {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 limit 10 offset 5", 0}, + + {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 ", 1}, + {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 10 offset 0", 1}, + {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 10 offset 1", 0}, + {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 10", 1}, + {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 0 offset 1", 0}, + + // trait searches + {"Dimension", 5}, + {"Dimension limit 2", 2}, + {"Dimension limit 2 offset 1", 2}, + {"Dimension limit 5 offset 4", 1}, + + {"JdbcAccess", 2}, + {"JdbcAccess limit 5 offset 0", 2}, + {"JdbcAccess limit 2 offset 1", 1}, + {"JdbcAccess limit 1", 1}, + + {"ETL", 3}, + {"ETL limit 2", 2}, + {"ETL limit 1", 1}, + {"ETL limit 1 offset 0", 1}, + {"ETL limit 2 offset 1", 2}, + + {"Metric", 5}, + {"Metric limit 10", 5}, + {"Metric limit 2", 2}, + {"Metric limit 10 offset 1", 4}, + + + + {"PII", 6}, + {"PII limit 10", 6}, + {"PII limit 2", 2}, + {"PII limit 10 offset 1", 5}, + + {"`Log Data`", 4}, + {"`Log Data` limit 3", 3}, + {"`Log Data` limit 10 offset 2", 2}, + + + {"hive_table where name='sales_fact', db where name='Sales'", 1}, + {"hive_table where name='sales_fact', db where name='Sales' limit 10", 1}, + {"hive_table where name='sales_fact', db where name='Sales' limit 10 offset 1", 0}, + {"hive_table where name='sales_fact', db where name='Reporting'", 0}, + {"hive_table where name='sales_fact', db where name='Reporting' limit 10", 0}, + {"hive_table where name='sales_fact', db where name='Reporting' limit 10 offset 1", 0}, + {"hive_partition as p where values = ['2015-01-01']", 1}, + {"hive_partition as p where values = ['2015-01-01'] limit 10", 1}, + {"hive_partition as p where values = ['2015-01-01'] limit 10 offset 1", 0}, + + }; + } + + @DataProvider(name = "dslOrderByQueriesProvider") + private Object[][] createDSLQueriesWithOrderBy() { + Boolean isAscending = Boolean.TRUE; + return new Object[][]{ + //Not working, the problem is in server code not figuring out how to sort. not sure if it is valid use case. +// {"hive_db hive_table orderby 'hive_db.owner'", 8, "owner", isAscending}, +// {"hive_db hive_table orderby 'hive_db.owner' limit 5", 5, "owner", isAscending}, +// {"hive_db hive_table orderby 'hive_db.owner' limit 5 offset 5", 3, "owner", isAscending}, + {"hive_column select hive_column.name orderby 'hive_column.name' limit 10 withPath", 10, "name", isAscending}, + {"hive_column select hive_column.name orderby 'hive_column.name' asc limit 10 withPath", 10, "name", isAscending}, + {"hive_column select hive_column.name orderby 'hive_column.name' desc limit 10 withPath", 10, "name", !isAscending}, + {"from hive_db orderby 'hive_db.owner' limit 3", 3, "owner", isAscending}, + {"hive_db where hive_db.name=\"Reporting\" orderby 'owner'", 1, "owner", isAscending}, + + {"hive_db where hive_db.name=\"Reporting\" orderby 'hive_db.owner' limit 10 ", 1, "owner", isAscending}, + {"hive_db where hive_db.name=\"Reporting\" select name, owner orderby 'hive_db.name' ", 1, "name", isAscending}, + {"hive_db has name orderby 'hive_db.owner' limit 10 offset 0", 3, "owner", isAscending}, + + {"from hive_table orderby 'hive_table.owner'", 8, "owner", isAscending}, + {"from hive_table orderby 'hive_table.owner' limit 8", 8, "owner", isAscending}, + + {"hive_table orderby 'hive_table.owner'", 8, "owner", isAscending}, + {"hive_table orderby 'hive_table.owner' limit 8", 8, "owner", isAscending}, + {"hive_table orderby 'hive_table.owner' limit 8 offset 0", 8, "owner", isAscending}, + {"hive_table orderby 'hive_table.owner' desc limit 8 offset 0", 8, "owner", !isAscending}, + + {"hive_table isa Dimension orderby 'hive_table.owner'", 3, "owner", isAscending},//order not working + {"hive_table isa Dimension orderby 'hive_table.owner' limit 3", 3, "owner", isAscending}, + {"hive_table isa Dimension orderby 'hive_table.owner' limit 3 offset 0", 3, "owner", isAscending}, + {"hive_table isa Dimension orderby 'hive_table.owner' desc limit 3 offset 0", 3, "owner", !isAscending}, + + {"hive_column where hive_column isa PII orderby 'hive_column.name'", 6, "name", isAscending}, + {"hive_column where hive_column isa PII orderby 'hive_column.name' limit 5", 5, "name", isAscending}, + {"hive_column where hive_column isa PII orderby 'hive_column.name' limit 5 offset 1", 5, "name", isAscending}, + {"hive_column where hive_column isa PII orderby 'hive_column.name' desc limit 5 offset 1", 5, "name", !isAscending}, + + + {"hive_column select hive_column.name orderby 'hive_column.name' ", 29, "hive_column.name", isAscending}, + {"hive_column select hive_column.name orderby 'hive_column.name' limit 5", 5, "hive_column.name", isAscending}, + {"hive_column select hive_column.name orderby 'hive_column.name' desc limit 5", 5, "hive_column.name", !isAscending}, + {"hive_column select hive_column.name orderby 'hive_column.name' limit 5 offset 28", 1, "hive_column.name", isAscending}, + + {"hive_column select name orderby 'hive_column.name'", 29, "name", isAscending}, + {"hive_column select name orderby 'hive_column.name' limit 5", 5, "name", isAscending}, + {"hive_column select name orderby 'hive_column.name' desc", 29, "name", !isAscending}, + + {"hive_column where hive_column.name=\"customer_id\" orderby 'hive_column.name'", 4, "name", isAscending}, + {"hive_column where hive_column.name=\"customer_id\" orderby 'hive_column.name' limit 2", 2, "name", isAscending}, + {"hive_column where hive_column.name=\"customer_id\" orderby 'hive_column.name' limit 2 offset 1", 2, "name", isAscending}, + + {"from hive_table select owner orderby 'hive_table.owner'", 8, "owner", isAscending}, + {"from hive_table select owner orderby 'hive_table.owner' limit 5", 5, "owner", isAscending}, + {"from hive_table select owner orderby 'hive_table.owner' desc limit 5", 5, "owner", !isAscending}, + {"from hive_table select owner orderby 'hive_table.owner' limit 5 offset 5", 3, "owner", isAscending}, + + {"hive_db where (name = \"Reporting\") orderby 'hive_db.name'", 1, "name", isAscending}, + {"hive_db where (name = \"Reporting\") orderby 'hive_db.name' limit 10", 1, "name", isAscending}, + {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1 orderby '_col_1'", 1, "_col_1", isAscending}, //will it work + {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1 orderby '_col_1' limit 10", 1, "_col_1", isAscending}, + {"hive_db where hive_db has name orderby 'hive_db.owner'", 3, "owner", isAscending}, + {"hive_db where hive_db has name orderby 'hive_db.owner' limit 5", 3, "owner", isAscending}, + {"hive_db where hive_db has name orderby 'hive_db.owner' limit 2 offset 0", 2, "owner", isAscending}, + {"hive_db where hive_db has name orderby 'hive_db.owner' limit 2 offset 1", 2, "owner", isAscending}, + + {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1'", 1, "_col_1", isAscending}, + {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1' limit 10", 1, "_col_1", isAscending}, + {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1' limit 10 offset 1", 0, "_col_1", isAscending}, + {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1' limit 10 offset 0", 1, "_col_1", isAscending}, + + {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' ", 1, "_col_1", isAscending}, + {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 ", 1, "_col_1", isAscending}, + {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 offset 0", 1, "_col_1", isAscending}, + {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 offset 5", 0, "_col_1", isAscending}, + + {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' ", 1, "_col_0", isAscending}, + {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10 offset 0", 1, "_col_0", isAscending}, + {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10 offset 1", 0, "_col_0", isAscending}, + {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10", 1, "_col_0", isAscending}, + {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 0 offset 1", 0, "_col_0", isAscending}, + + }; + } + + @Test(dataProvider = "dslOrderByQueriesProvider") + public void testSearchByDSLQueriesWithOrderBy(String dslQuery, Integer expectedNumRows, String orderBy, boolean ascending) throws Exception { + System.out.println("Executing dslQuery = " + dslQuery); + String jsonResults = discoveryService.searchByDSL(dslQuery); + Assert.assertNotNull(jsonResults); + + JSONObject results = new JSONObject(jsonResults); + Assert.assertEquals(results.length(), 3); + + Object query = results.get("query"); + Assert.assertNotNull(query); + + JSONObject dataType = results.getJSONObject("dataType"); + Assert.assertNotNull(dataType); + String typeName = dataType.getString("typeName"); + Assert.assertNotNull(typeName); + + JSONArray rows = results.getJSONArray("rows"); + + Assert.assertNotNull(rows); + Assert.assertEquals(rows.length(), expectedNumRows.intValue()); // some queries may not have any results + List<String> returnedList = new ArrayList<String> (); + for (int i = 0; i < rows.length(); i++) { + JSONObject row = rows.getJSONObject(i); + try + { + returnedList.add(row.get(orderBy).toString()); + } + catch(Exception ex) + { + System.out.println( " Exception occured " + ex.getMessage()); + } + } + Iterator<String> iter = returnedList.iterator(); + String _current = null, _prev = null; + //Following code compares the results in rows and makes sure data is sorted as expected. + while(iter.hasNext()) + { + _prev = _current; + _current = iter.next().toLowerCase(); + if (_prev != null && _prev.compareTo(_current) != 0) + { + if(ascending) + { + Assert.assertTrue(_prev.compareTo(_current) < 0); + } + else + { + Assert.assertTrue(_prev.compareTo(_current) > 0); + } + } + } + + System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows"); + } + @Test(dataProvider = "dslQueriesProvider") public void testSearchByDSLQueries(String dslQuery, Integer expectedNumRows) throws Exception { System.out.println("Executing dslQuery = " + dslQuery); @@ -272,6 +566,30 @@ public class GraphBackedDiscoveryServiceTest extends BaseHiveRepositoryTest { System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows"); } + @Test(dataProvider = "dslLimitQueriesProvider") + public void testSearchByDSLQueriesWithLimit(String dslQuery, Integer expectedNumRows) throws Exception { + System.out.println("Executing dslQuery = " + dslQuery); + String jsonResults = discoveryService.searchByDSL(dslQuery); + Assert.assertNotNull(jsonResults); + + JSONObject results = new JSONObject(jsonResults); + Assert.assertEquals(results.length(), 3); + System.out.println("results = " + results); + + Object query = results.get("query"); + Assert.assertNotNull(query); + + JSONObject dataType = results.getJSONObject("dataType"); + Assert.assertNotNull(dataType); + String typeName = dataType.getString("typeName"); + Assert.assertNotNull(typeName); + + JSONArray rows = results.getJSONArray("rows"); + Assert.assertNotNull(rows); + Assert.assertEquals(rows.length(), expectedNumRows.intValue()); // some queries may not have any results + System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows"); + } + @DataProvider(name = "invalidDslQueriesProvider") private Object[][] createInvalidDSLQueries() { return new String[][]{{"from Unknown"}, {"Unknown"}, {"Unknown is Blah"},};
