Repository: lens Updated Branches: refs/heads/master 79b95f0e0 -> dba885ca9
http://git-wip-us.apache.org/repos/asf/lens/blob/dba885ca/lens-examples/src/main/resources/interests.xml ---------------------------------------------------------------------- diff --git a/lens-examples/src/main/resources/interests.xml b/lens-examples/src/main/resources/interests.xml new file mode 100644 index 0000000..2548665 --- /dev/null +++ b/lens-examples/src/main/resources/interests.xml @@ -0,0 +1,31 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> +<x_dimension name="interests" xmlns="uri:lens:cube:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="uri:lens:cube:0.1 cube-0.1.xsd "> + <attributes> + <dim_attribute name="id" _type="INT"/> + <dim_attribute name="name" _type="STRING"/> + </attributes> + <properties> + <property name="dimension.interests.timed.dimension" value="dt"/> + </properties> +</x_dimension> http://git-wip-us.apache.org/repos/asf/lens/blob/dba885ca/lens-examples/src/main/resources/interests_table.xml ---------------------------------------------------------------------- diff --git a/lens-examples/src/main/resources/interests_table.xml b/lens-examples/src/main/resources/interests_table.xml new file mode 100644 index 0000000..314d77f --- /dev/null +++ b/lens-examples/src/main/resources/interests_table.xml @@ -0,0 +1,51 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> +<x_dimension_table dimension_name="interests" table_name="interests_table" weight="100.0" xmlns="uri:lens:cube:0.1" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="uri:lens:cube:0.1 cube-0.1.xsd "> + <columns> + <column comment="ID" name="id" _type="INT"/> + <column comment="name" name="name" _type="STRING"/> + </columns> + <storage_tables> + <storage_table> + <update_periods> + <update_period>HOURLY</update_period> + </update_periods> + <storage_name>local</storage_name> + <table_desc external="true" field_delimiter="," table_location="/tmp/examples/interests"> + <part_cols> + <column comment="Time column" name="dt" _type="STRING"/> + </part_cols> + <time_part_cols>dt</time_part_cols> + </table_desc> + </storage_table> + <storage_table> + <storage_name>mydb</storage_name> + <table_desc external="true" table_location="/tmp/db-storage.db" + storage_handler_name="org.apache.lens.storage.db.DBStorageHandler"> + <table_parameters> + <property name="lens.metastore.native.db.name" value="default"/> + </table_parameters> + </table_desc> + </storage_table> + </storage_tables> +</x_dimension_table> http://git-wip-us.apache.org/repos/asf/lens/blob/dba885ca/lens-examples/src/main/resources/sales-cube.xml ---------------------------------------------------------------------- diff --git a/lens-examples/src/main/resources/sales-cube.xml b/lens-examples/src/main/resources/sales-cube.xml index e944821..f7e3df0 100644 --- a/lens-examples/src/main/resources/sales-cube.xml +++ b/lens-examples/src/main/resources/sales-cube.xml @@ -58,6 +58,10 @@ display_string="Customer City"> <chain_ref_column chain_name="customer_city" ref_col="name" /> </dim_attribute> + <dim_attribute name="customer_interest" _type="string" description="Customer's interest" + display_string="Customer Interest"> + <chain_ref_column chain_name="customer_interests_chain" ref_col="name" /> + </dim_attribute> <dim_attribute name="production_location"> <hierarchy> <dim_attribute name="production_city_name" _type="STRING" description="City name in which the product was produced" @@ -98,6 +102,26 @@ </path> </paths> </join_chain> + <join_chain name="customer_interests_chain"> + <paths> + <path> + <edges> + <edge> + <from table="sales" column="customer_id" /> + <to table="customer" column="id" /> + </edge> + <edge> + <from table="customer" column="id" /> + <to table="customer_interests" column="customer_id" maps_to_many="true" /> + </edge> + <edge> + <from table="customer_interests" column="interest_id" /> + <to table="interests" column="id" /> + </edge> + </edges> + </path> + </paths> + </join_chain> <join_chain name="product_details"> <paths> <path> http://git-wip-us.apache.org/repos/asf/lens/blob/dba885ca/src/site/apt/user/olap-cube.apt ---------------------------------------------------------------------- diff --git a/src/site/apt/user/olap-cube.apt b/src/site/apt/user/olap-cube.apt index 4bed623..3d12850 100644 --- a/src/site/apt/user/olap-cube.apt +++ b/src/site/apt/user/olap-cube.apt @@ -514,6 +514,14 @@ lens-shell> |Football, Cricket, BasketBall| 50| *--+--+ + When there is an expression around the bridge table fields, user might be interested in doing field aggregations on + top of the expression defined. Also, simple filters on the fields should be applied to the array generated. The feature + provides capability for the same. + + For ex: + "select user.sport, revenue from sales where user.sport in ('CRICKET')" would convert the filter user.sport in + 'CRICKET' to contains check in aggregated user sports. + See configuration params available at {{{./olap-query-conf.html} OLAP query configurations}} and look for config related to bridge tables, for turning this on. http://git-wip-us.apache.org/repos/asf/lens/blob/dba885ca/src/site/apt/user/olap-query-conf.apt ---------------------------------------------------------------------- diff --git a/src/site/apt/user/olap-query-conf.apt b/src/site/apt/user/olap-query-conf.apt index 6f84869..6372fb1 100644 --- a/src/site/apt/user/olap-query-conf.apt +++ b/src/site/apt/user/olap-query-conf.apt @@ -26,52 +26,56 @@ OLAP query configuration *--+--+---+--+ |1|lens.cube.query.bridge.table.field.aggregator|collect_set|The field aggregator function to be used for aggregating fields from bridge tables. Would be used only when flattening is enabled. The value can be passed for each query. During typical deployments it would be passed for each driver, as the function could be different for each driver.| *--+--+---+--+ -|2|lens.cube.query.disable.aggregate.resolver|true|Tells whether to disable automatic resolution of aggregations for measures in a cube. To enable automatic resolution, this value should be false.| +|2|lens.cube.query.bridge.table.field.array.filter|array_contains|The array filter function to be used for filter on fields from bridge tables. Would be used only when flattening is enabled and not flattening early. The value can be passed for each query. During typical deployments it would be passed for each driver, as the function could be different for each driver. Signature of the filter getting passed here should be very similar to array_contains. i.e. array_contains(array, value) to return true if array contains value.| *--+--+---+--+ -|3|lens.cube.query.disable.auto.join|true|Tells whether to disable automatic resolution of join conditions between tables involved. To enable automatic resolution, this value should be false.| +|3|lens.cube.query.disable.aggregate.resolver|true|Tells whether to disable automatic resolution of aggregations for measures in a cube. To enable automatic resolution, this value should be false.| *--+--+---+--+ -|4|lens.cube.query.driver.supported.storages| |List of comma separated storage names that supported by a driver. If no value is specified, all storages are valid| +|4|lens.cube.query.disable.auto.join|true|Tells whether to disable automatic resolution of join conditions between tables involved. To enable automatic resolution, this value should be false.| *--+--+---+--+ -|5|lens.cube.query.enable.attrfields.add.distinct|true|When the query has only attribute fields projected from cube and the flag is set to true, distinct clause will be added for the projection so that no duplicate values will be projected. If flag is set to false, projection wont be changed, result might include duplicate values.| +|5|lens.cube.query.driver.supported.storages| |List of comma separated storage names that supported by a driver. If no value is specified, all storages are valid| *--+--+---+--+ -|6|lens.cube.query.enable.flattening.bridge.tables|false|Flag specifies if fields selected have to be flattened or not, if they are coming from tables with many to many relationship in join. If false, field selection will be simple join and selecting the field. If true, the fields from bridge tables will be aggregated grouped by join key.| +|6|lens.cube.query.enable.attrfields.add.distinct|true|When the query has only attribute fields projected from cube and the flag is set to true, distinct clause will be added for the projection so that no duplicate values will be projected. If flag is set to false, projection wont be changed, result might include duplicate values.| *--+--+---+--+ -|7|lens.cube.query.enable.storages.union|false|Sometimes One storage table doesn't contain all required partitions, and the query needs to be answered from two storage tables. Enabling this (make value = <true>) allows rewrite of such queries. If it's <false>, then such queries will fail in rewrite phase. The feature should only be enabled when all the aggregate functions used in the query (explicitly or implicitly picked from default aggregates of used measures) are transitive. Transitive aggregate functions are those that follow the following property:\ | +|7|lens.cube.query.enable.flattening.bridge.tables|false|Flag specifies if fields selected have to be flattened or not, if they are coming from tables with many to many relationship in join. If false, field selection will be simple join and selecting the field. If true, the fields from bridge tables will be aggregated grouped by join key.| +*--+--+---+--+ +|8|lens.cube.query.enable.storages.union|false|Sometimes One storage table doesn't contain all required partitions, and the query needs to be answered from two storage tables. Enabling this (make value = <true>) allows rewrite of such queries. If it's <false>, then such queries will fail in rewrite phase. The feature should only be enabled when all the aggregate functions used in the query (explicitly or implicitly picked from default aggregates of used measures) are transitive. Transitive aggregate functions are those that follow the following property:\ | | | | | \ | | | | |<<<f(a, b, c, d) = f(f(a, b), f(c, d)) for all possible values of a,b,c,d.>>> \ | | | | | \ | | | | |e.g. SUM, MAX, MIN etc are transitive aggregate functions, while AVG, COUNT etc are not. | *--+--+---+--+ -|8|lens.cube.query.fail.if.data.partial|false|Whether to fail the query of data is partial| +|9|lens.cube.query.fail.if.data.partial|false|Whether to fail the query of data is partial| +*--+--+---+--+ +|10|lens.cube.query.flatten.bridge.tables.early|false|Flag specifies if fields selected have to be flattened earlier to applying any expression on top of them, For ex: sub_str(bridge_field) is the expression, if the property is set to true, sub_str will be applied on aggregated field. If set to false, aggregate will applied on top of the expression i.e. sub_str.| *--+--+---+--+ -|9|lens.cube.query.join.type|INNER|Tells what the join type is, in-case of automatic resolution of joins is enabled| +|11|lens.cube.query.join.type|INNER|Tells what the join type is, in-case of automatic resolution of joins is enabled| *--+--+---+--+ -|10|lens.cube.query.lookahead.ptparts.forinterval.${interval}|1|The value of number of lookahead process time partitions for interval specified. Interval can be any Update period.| +|12|lens.cube.query.lookahead.ptparts.forinterval.${interval}|1|The value of number of lookahead process time partitions for interval specified. Interval can be any Update period.| *--+--+---+--+ -|11|lens.cube.query.max.interval| |Maximum value of the update period that the query timed dimensions can take values of. For example, if query involves month ranges, user can say query maximum interval is daily, then no monthly partitions will be picked.| +|13|lens.cube.query.max.interval| |Maximum value of the update period that the query timed dimensions can take values of. For example, if query involves month ranges, user can say query maximum interval is daily, then no monthly partitions will be picked.| *--+--+---+--+ -|12|lens.cube.query.nonexisting.partitions| |The list of comma separated non existing partitions, if query can run with partial data. The value will be set by the cube query rewriter| +|14|lens.cube.query.nonexisting.partitions| |The list of comma separated non existing partitions, if query can run with partial data. The value will be set by the cube query rewriter| *--+--+---+--+ -|13|lens.cube.query.partition.where.clause.format| |The simple date format of how the queried partition should be put in where clause. If nothing is specified, it will use the format from org.apache.lens.cube.metadata.UpdatePeriod for each type of partition| +|15|lens.cube.query.partition.where.clause.format| |The simple date format of how the queried partition should be put in where clause. If nothing is specified, it will use the format from org.apache.lens.cube.metadata.UpdatePeriod for each type of partition| *--+--+---+--+ -|14|lens.cube.query.pick.lightest.fact.first|false|If set to true, lightest fact will be resolved first than resolving storages. Otherwise, storages will be resolved to check all partitions exist and then pick lightest fact among candidates| +|16|lens.cube.query.pick.lightest.fact.first|false|If set to true, lightest fact will be resolved first than resolving storages. Otherwise, storages will be resolved to check all partitions exist and then pick lightest fact among candidates| *--+--+---+--+ -|15|lens.cube.query.process.time.partition.column| |The column name which is a process time column. If process time column is specified, query rewriter will look ahead the partitions of other timed dimensions inside this column.| +|17|lens.cube.query.process.time.partition.column| |The column name which is a process time column. If process time column is specified, query rewriter will look ahead the partitions of other timed dimensions inside this column.| *--+--+---+--+ -|16|lens.cube.query.promote.groupby.toselect|false|Tells whether to promote group by clauses to be promoted to select expressions if they are already not projected. To enable automatic promotion, this value should be true.| +|18|lens.cube.query.promote.groupby.toselect|false|Tells whether to promote group by clauses to be promoted to select expressions if they are already not projected. To enable automatic promotion, this value should be true.| *--+--+---+--+ -|17|lens.cube.query.promote.select.togroupby|false|Tells whether to promote select expressions which is not inside any aggregate, to be promoted to groupby clauses, if they are already not part of groupby clauses. To enable automatic promotion, this value should be true.| +|19|lens.cube.query.promote.select.togroupby|false|Tells whether to promote select expressions which is not inside any aggregate, to be promoted to groupby clauses, if they are already not part of groupby clauses. To enable automatic promotion, this value should be true.| *--+--+---+--+ -|18|lens.cube.query.replace.timedim|true|Tells whether timedim attribute queried in the time range should be replaced with its corresponding partition column name.| +|20|lens.cube.query.replace.timedim|true|Tells whether timedim attribute queried in the time range should be replaced with its corresponding partition column name.| *--+--+---+--+ -|19|lens.cube.query.time.range.writer.class|org.apache.lens.cube.parse.ORTimeRangeWriter|The timerange writer class which specifies how the resolved partitions in timeranges should be written in final query. Available writers are org.apache.lens.cube.parse.ORTimeRangeWriter and org.apache.lens.cube.parse.BetweenTimeRangeWriter| +|21|lens.cube.query.time.range.writer.class|org.apache.lens.cube.parse.ORTimeRangeWriter|The timerange writer class which specifies how the resolved partitions in timeranges should be written in final query. Available writers are org.apache.lens.cube.parse.ORTimeRangeWriter and org.apache.lens.cube.parse.BetweenTimeRangeWriter| *--+--+---+--+ -|20|lens.cube.query.valid.${cubename}.facttables| |List of comma separated fact tables that are valid for cube. If no value is specified, all fact tables are valid| +|22|lens.cube.query.valid.${cubename}.facttables| |List of comma separated fact tables that are valid for cube. If no value is specified, all fact tables are valid| *--+--+---+--+ -|21|lens.cube.query.valid.dim.storgaetables| |List of comma separated dimension storage tables that are valid. If no value is specified, all tables are valid| +|23|lens.cube.query.valid.dim.storgaetables| |List of comma separated dimension storage tables that are valid. If no value is specified, all tables are valid| *--+--+---+--+ -|22|lens.cube.query.valid.fact.${facttable}.storage.${storagename}.updateperiods| |List of comma separated update periods that are valid for a fact on a storage. If no value is specified, all update periods are valid| +|24|lens.cube.query.valid.fact.${facttable}.storage.${storagename}.updateperiods| |List of comma separated update periods that are valid for a fact on a storage. If no value is specified, all update periods are valid| *--+--+---+--+ -|23|lens.cube.query.valid.fact.${facttable}.storagetables| |List of comma separated storage tables that are valid for a fact. If no value is specified, all storage tables are valid| +|25|lens.cube.query.valid.fact.${facttable}.storagetables| |List of comma separated storage tables that are valid for a fact. If no value is specified, all storage tables are valid| *--+--+---+--+ The configuration parameters and their default values http://git-wip-us.apache.org/repos/asf/lens/blob/dba885ca/tools/conf/client/lens-client-site.xml ---------------------------------------------------------------------- diff --git a/tools/conf/client/lens-client-site.xml b/tools/conf/client/lens-client-site.xml index 706e356..8d4b429 100644 --- a/tools/conf/client/lens-client-site.xml +++ b/tools/conf/client/lens-client-site.xml @@ -22,6 +22,18 @@ <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> - - + <property> + <name>lens.client.query.poll.interval</name> + <value>4000</value> + <description>Interval at which query progress will be polled. Interval has to be given in milliseconds + </description> + </property> + <property> + <name>lens.cube.query.enable.flattening.bridge.tables</name> + <value>true</value> + <description>Flag specifies if fields selected have to be flattened or not, if they are coming from tables with many + to many relationship in join. If false, field selection will be simple join and selecting the field. If true, the + fields from bridge tables will be aggregated grouped by join key. + </description> + </property> </configuration>
