http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/metadata/JAXBUtils.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/JAXBUtils.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/JAXBUtils.java new file mode 100644 index 0000000..e1e3d16 --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/JAXBUtils.java @@ -0,0 +1,1114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.metadata; + +import java.lang.reflect.Constructor; +import java.text.ParseException; +import java.util.*; + +import javax.ws.rs.WebApplicationException; +import javax.xml.datatype.DatatypeConfigurationException; +import javax.xml.datatype.DatatypeFactory; +import javax.xml.datatype.XMLGregorianCalendar; + +import org.apache.lens.api.metastore.*; +import org.apache.lens.cube.metadata.ExprColumn.ExprSpec; +import org.apache.lens.cube.metadata.ReferencedDimAttribute.ChainRefCol; +import org.apache.lens.server.api.error.LensException; + +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.mapred.InputFormat; + +import com.google.common.base.Optional; +import com.google.common.collect.Maps; +import lombok.extern.slf4j.Slf4j; + +/** + * Utilities for converting to and from JAXB types to hive.ql.metadata.cube types + */ +@Slf4j +public final class JAXBUtils { + private JAXBUtils() { + + } + + private static final ObjectFactory XCF = new ObjectFactory(); + + /** + * Create a hive ql cube object from corresponding JAXB object + * + * @param cube JAXB Cube + * @return {@link Cube} + * @throws LensException + */ + public static CubeInterface hiveCubeFromXCube(XCube cube, Cube parent) throws LensException { + if (cube instanceof XDerivedCube) { + XDerivedCube dcube = (XDerivedCube) cube; + Set<String> dims = new LinkedHashSet<String>(); + dims.addAll(dcube.getDimAttrNames().getAttrName()); + + Set<String> measures = new LinkedHashSet<String>(); + measures.addAll(dcube.getMeasureNames().getMeasureName()); + + Map<String, String> properties = mapFromXProperties(cube.getProperties()); + return new DerivedCube(cube.getName(), measures, dims, properties, 0L, parent); + } else { + XBaseCube bcube = (XBaseCube) cube; + Set<CubeDimAttribute> dims = new LinkedHashSet<CubeDimAttribute>(); + if (bcube.getDimAttributes() != null && !bcube.getDimAttributes().getDimAttribute().isEmpty()) { + for (XDimAttribute xd : bcube.getDimAttributes().getDimAttribute()) { + dims.add(hiveDimAttrFromXDimAttr(xd)); + } + } + + Set<CubeMeasure> measures = new LinkedHashSet<CubeMeasure>(); + for (XMeasure xm : bcube.getMeasures().getMeasure()) { + measures.add(hiveMeasureFromXMeasure(xm)); + } + + Set<ExprColumn> expressions = new LinkedHashSet<ExprColumn>(); + if (bcube.getExpressions() != null && !bcube.getExpressions().getExpression().isEmpty()) { + for (XExprColumn xe : bcube.getExpressions().getExpression()) { + expressions.add(hiveExprColumnFromXExprColumn(xe)); + } + } + + Set<JoinChain> joinchains = new LinkedHashSet<JoinChain>(); + if (bcube.getJoinChains() != null && !bcube.getJoinChains().getJoinChain().isEmpty()) { + for (XJoinChain xj : bcube.getJoinChains().getJoinChain()) { + joinchains.add(joinChainFromXJoinChain(xj)); + } + } + + Map<String, String> properties = mapFromXProperties(cube.getProperties()); + return new Cube(cube.getName(), measures, dims, expressions, joinchains, properties, 0L); + } + } + + /** + * Get XCube from hive.ql.metadata.Cube + * + * @param c + * @return {@link XCube} + */ + public static XCube xCubeFromHiveCube(CubeInterface c) { + XCube xc; + if (c.isDerivedCube()) { + XDerivedCube xdc = XCF.createXDerivedCube(); + xdc.setMeasureNames(new XMeasureNames()); + xdc.setDimAttrNames(new XDimAttrNames()); + xc = xdc; + xdc.getMeasureNames().getMeasureName().addAll(c.getMeasureNames()); + xdc.getDimAttrNames().getAttrName().addAll(c.getDimAttributeNames()); + xdc.setParent(((DerivedCube) c).getParent().getName()); + } else { + XBaseCube xbc = XCF.createXBaseCube(); + xbc.setMeasures(new XMeasures()); + xbc.setDimAttributes(new XDimAttributes()); + xbc.setExpressions(new XExpressions()); + xbc.setJoinChains(new XJoinChains()); + xc = xbc; + for (CubeMeasure cm : c.getMeasures()) { + xbc.getMeasures().getMeasure().add(xMeasureFromHiveMeasure(cm)); + } + + for (ExprColumn ec : c.getExpressions()) { + xbc.getExpressions().getExpression().add(xExprColumnFromHiveExprColumn(ec)); + } + for (CubeDimAttribute cd : c.getDimAttributes()) { + xbc.getDimAttributes().getDimAttribute().add(xDimAttrFromHiveDimAttr(cd, (Cube) c)); + } + for (JoinChain jc : c.getJoinChains()) { + xbc.getJoinChains().getJoinChain().add(getXJoinChainFromJoinChain(jc)); + } + } + xc.setName(c.getName()); + xc.setProperties(new XProperties()); + xc.getProperties().getProperty().addAll(xPropertiesFromMap(((AbstractCubeTable) c).getProperties())); + return xc; + } + + /** + * Create a hive ql CubeDimension from JAXB counterpart + * + * @param xd + * @return {@link org.apache.lens.cube.metadata.CubeDimAttribute} + */ + public static CubeDimAttribute hiveDimAttrFromXDimAttr(XDimAttribute xd) throws LensException { + Date startDate = getDateFromXML(xd.getStartTime()); + Date endDate = getDateFromXML(xd.getEndTime()); + + CubeDimAttribute hiveDim; + + if (xd.getHierarchy() != null) { + List<CubeDimAttribute> hierarchy = new ArrayList<>(); + for (XDimAttribute hd : xd.getHierarchy().getDimAttribute()) { + hierarchy.add(hiveDimAttrFromXDimAttr(hd)); + } + hiveDim = new HierarchicalDimAttribute(xd.getName(), xd.getDescription(), hierarchy); + } else if (xd.getChainRefColumn() != null + && !xd.getChainRefColumn().isEmpty()) { + hiveDim = new ReferencedDimAttribute(new FieldSchema(xd.getName(), xd.getType().toLowerCase(), + xd.getDescription()), + xd.getDisplayString(), + getChainRefColumns(xd.getChainRefColumn()), + startDate, + endDate, + null, + xd.getNumDistinctValues(), + xd.getValues(), + mapFromXProperties(xd.getTags()) + ); + } else { + hiveDim = new BaseDimAttribute(new FieldSchema(xd.getName(), xd.getType().toLowerCase(), + xd.getDescription()), + xd.getDisplayString(), + startDate, + endDate, + null, + xd.getNumDistinctValues(), + xd.getValues(), + mapFromXProperties(xd.getTags()) + ); + } + return hiveDim; + } + + private static List<ChainRefCol> getChainRefColumns(List<XChainColumn> chainCols) { + List<ChainRefCol> chainRefCols = new ArrayList<>(); + for (XChainColumn chainCol : chainCols) { + chainRefCols.add(new ChainRefCol(chainCol.getChainName(), chainCol.getRefCol())); + } + return chainRefCols; + } + + /** + * Get XMLGregorianCalendar from Date. + * + * Useful for converting from java code to XML spec. + * + * @param d Date value + * @return XML value + */ + public static XMLGregorianCalendar getXMLGregorianCalendar(Date d) { + if (d == null) { + return null; + } + + GregorianCalendar c1 = new GregorianCalendar(); + c1.setTime(d); + try { + return DatatypeFactory.newInstance().newXMLGregorianCalendar(c1); + } catch (DatatypeConfigurationException e) { + log.warn("Error converting date " + d, e); + return null; + } + } + + /** + * Get Date from XMLGregorianCalendar + * + * Useful for converting from XML spec to java code. + * + * @param cal XML value + * @return Date value + */ + public static Date getDateFromXML(XMLGregorianCalendar cal) { + if (cal == null) { + return null; + } + return cal.toGregorianCalendar().getTime(); + } + + /** + * Create XMeasure from hive ql cube measure + */ + public static XMeasure xMeasureFromHiveMeasure(CubeMeasure cm) { + if (cm == null) { + return null; + } + + XMeasure xm = XCF.createXMeasure(); + xm.setName(cm.getName()); + xm.setDescription(cm.getDescription()); + xm.setDisplayString(cm.getDisplayString()); + xm.setDefaultAggr(cm.getAggregate()); + xm.setFormatString(cm.getFormatString()); + xm.setType(XMeasureType.valueOf(cm.getType().toUpperCase())); + xm.setUnit(cm.getUnit()); + xm.setStartTime(getXMLGregorianCalendar(cm.getStartTime())); + xm.setEndTime(getXMLGregorianCalendar(cm.getEndTime())); + xm.setMin(cm.getMin()); + xm.setMax(cm.getMax()); + xm.setTags(getXProperties(xPropertiesFromMap(cm.getTags()))); + return xm; + } + + public static XProperties getXProperties(List<XProperty> prop) { + XProperties properties = XCF.createXProperties(); + properties.getProperty().addAll(prop); + return properties; + } + + /** + * Create XExprColumn from hive ExprColum + */ + public static XExprColumn xExprColumnFromHiveExprColumn(ExprColumn ec) { + if (ec == null) { + return null; + } + + XExprColumn xe = XCF.createXExprColumn(); + xe.setName(ec.getName()); + xe.setType(ec.getType()); + xe.setDescription(ec.getDescription()); + xe.setDisplayString(ec.getDisplayString()); + xe.getExprSpec().addAll(xExprSpecFromExprColumn(ec.getExpressionSpecs())); + xe.setTags(getXProperties(xPropertiesFromMap(ec.getTags()))); + return xe; + } + + private static Collection<XExprSpec> xExprSpecFromExprColumn(Collection<ExprSpec> esSet) { + List<XExprSpec> xes = new ArrayList<XExprSpec>(); + for (ExprSpec es : esSet) { + XExprSpec e = new XExprSpec(); + e.setExpr(es.getExpr()); + e.setStartTime(getXMLGregorianCalendar(es.getStartTime())); + e.setEndTime(getXMLGregorianCalendar(es.getEndTime())); + xes.add(e); + } + return xes; + } + + private static ExprSpec[] exprSpecFromXExprColumn(Collection<XExprSpec> xesList) throws LensException { + List<ExprSpec> esArray = new ArrayList<ExprSpec>(xesList.size()); + for (XExprSpec xes : xesList) { + esArray.add(new ExprSpec(xes.getExpr(), getDateFromXML(xes.getStartTime()), getDateFromXML(xes.getEndTime()))); + } + return esArray.toArray(new ExprSpec[0]); + } + + /** + * Create XDimAttribute from CubeDimAttribute + */ + public static XDimAttribute xDimAttrFromHiveDimAttr(CubeDimAttribute cd, AbstractBaseTable baseTable) { + XDimAttribute xd = XCF.createXDimAttribute(); + xd.setName(cd.getName()); + xd.setDescription(cd.getDescription()); + xd.setDisplayString(cd.getDisplayString()); + xd.setStartTime(getXMLGregorianCalendar(cd.getStartTime())); + xd.setEndTime(getXMLGregorianCalendar(cd.getEndTime())); + xd.setTags(getXProperties(xPropertiesFromMap(cd.getTags()))); + if (cd instanceof ReferencedDimAttribute) { + ReferencedDimAttribute rd = (ReferencedDimAttribute) cd; + if (!rd.getChainRefColumns().isEmpty()) { + for (ChainRefCol crCol : rd.getChainRefColumns()) { + XChainColumn xcc = new XChainColumn(); + xcc.setChainName(crCol.getChainName()); + xcc.setRefCol(crCol.getRefColumn()); + if (baseTable.getChainByName(crCol.getChainName()) == null) { + log.error("Missing chain definition for " + crCol.getChainName()); + } else { + xcc.setDestTable(baseTable.getChainByName(crCol.getChainName()).getDestTable()); + } + xd.getChainRefColumn().add(xcc); + } + } + xd.setType(rd.getType()); + Optional<Long> numOfDistinctValues = rd.getNumOfDistinctValues(); + if (numOfDistinctValues.isPresent()) { + xd.setNumDistinctValues(numOfDistinctValues.get()); + } + if (rd.getValues().isPresent()) { + xd.getValues().addAll(rd.getValues().get()); + } + } else if (cd instanceof BaseDimAttribute) { + BaseDimAttribute bd = (BaseDimAttribute) cd; + xd.setType(bd.getType()); + Optional<Long> numOfDistinctValues = bd.getNumOfDistinctValues(); + if (numOfDistinctValues.isPresent()) { + xd.setNumDistinctValues(numOfDistinctValues.get()); + } + if (bd.getValues().isPresent()) { + xd.getValues().addAll(bd.getValues().get()); + } + } else if (cd instanceof HierarchicalDimAttribute) { + HierarchicalDimAttribute hd = (HierarchicalDimAttribute) cd; + XDimAttributes hierarchy = new XDimAttributes(); + for (CubeDimAttribute hdDim : hd.getHierarchy()) { + hierarchy.getDimAttribute().add(xDimAttrFromHiveDimAttr(hdDim, baseTable)); + } + xd.setHierarchy(hierarchy); + } + return xd; + } + + /** + * Create XJoinChain from cube join chain + */ + public static XJoinChain getXJoinChainFromJoinChain(JoinChain jc) { + XJoinChain xjc = XCF.createXJoinChain(); + xjc.setName(jc.getName()); + xjc.setDescription(jc.getDescription()); + xjc.setDisplayString(jc.getDisplayString()); + xjc.setDestTable(jc.getDestTable()); + xjc.setPaths(new XJoinPaths()); + + for (JoinChain.Path path : jc.getPaths()) { + xjc.getPaths().getPath().add(xJoinPathFromJoinPath(path)); + } + return xjc; + } + + public static XJoinPath xJoinPathFromJoinPath(JoinChain.Path path) { + XJoinPath xjp = XCF.createXJoinPath(); + xjp.setEdges(new XJoinEdges()); + for (JoinChain.Edge edge : path.getLinks()) { + XJoinEdge xje = XCF.createXJoinEdge(); + xje.setFrom(xTabReferenceFromTabReference(edge.getFrom())); + xje.setTo(xTabReferenceFromTabReference(edge.getTo())); + xjp.getEdges().getEdge().add(xje); + } + return xjp; + } + + public static List<XTableReference> xTabReferencesFromHiveTabReferences(List<TableReference> hiveRefs) { + List<XTableReference> xrefList = new ArrayList<XTableReference>(); + + for (TableReference hRef : hiveRefs) { + xrefList.add(xTabReferenceFromTabReference(hRef)); + } + return xrefList; + } + + private static XTableReference xTabReferenceFromTabReference(TableReference ref) { + XTableReference xref = XCF.createXTableReference(); + xref.setTable(ref.getDestTable()); + xref.setColumn(ref.getDestColumn()); + xref.setMapsToMany(ref.isMapsToMany()); + return xref; + } + + /** + * Create hive ql CubeMeasure from JAXB counterpart + * + * @param xm + * @return {@link CubeMeasure} + */ + public static CubeMeasure hiveMeasureFromXMeasure(XMeasure xm) { + Date startDate = xm.getStartTime() == null ? null : xm.getStartTime().toGregorianCalendar().getTime(); + Date endDate = xm.getEndTime() == null ? null : xm.getEndTime().toGregorianCalendar().getTime(); + CubeMeasure cm = new ColumnMeasure(new FieldSchema(xm.getName(), xm.getType().name().toLowerCase(), + xm.getDescription()), + xm.getDisplayString(), + xm.getFormatString(), + xm.getDefaultAggr(), + xm.getUnit(), + startDate, + endDate, + null, + xm.getMin(), + xm.getMax(), + mapFromXProperties(xm.getTags()) + ); + return cm; + } + + /** + * Create cube's JoinChain from JAXB counterpart + * + * @param xj + * @return {@link JoinChain} + */ + public static JoinChain joinChainFromXJoinChain(XJoinChain xj) { + JoinChain jc = new JoinChain(xj.getName(), xj.getDisplayString(), xj.getDescription()); + for (int i = 0; i < xj.getPaths().getPath().size(); i++) { + XJoinPath xchain = xj.getPaths().getPath().get(i); + List<TableReference> chain = new ArrayList<TableReference>(xchain.getEdges().getEdge().size() * 2); + + for (XJoinEdge xRef : xchain.getEdges().getEdge()) { + chain.add(new TableReference(xRef.getFrom().getTable(), xRef.getFrom().getColumn(), + xRef.getFrom().isMapsToMany())); + chain.add(new TableReference(xRef.getTo().getTable(), xRef.getTo().getColumn(), xRef.getTo().isMapsToMany())); + } + jc.addPath(chain); + } + return jc; + } + + public static ExprColumn hiveExprColumnFromXExprColumn(XExprColumn xe) throws LensException { + ExprColumn ec = new ExprColumn(new FieldSchema(xe.getName(), xe.getType().toLowerCase(), + xe.getDescription()), + xe.getDisplayString(), + mapFromXProperties(xe.getTags()), + exprSpecFromXExprColumn(xe.getExprSpec())); + return ec; + } + + /** + * Convert JAXB properties to Map<String, String> + * + * @param xProperties + * @return {@link Map} + */ + public static Map<String, String> mapFromXProperties(XProperties xProperties) { + Map<String, String> properties = new HashMap<String, String>(); + if (xProperties != null && !xProperties.getProperty().isEmpty()) { + for (XProperty xp : xProperties.getProperty()) { + properties.put(xp.getName(), xp.getValue()); + } + } + return properties; + } + + /** + * Convert string map to XProperties + */ + public static List<XProperty> xPropertiesFromMap(Map<String, String> map) { + List<XProperty> xpList = new ArrayList<XProperty>(); + if (map != null && !map.isEmpty()) { + for (Map.Entry<String, String> e : map.entrySet()) { + XProperty property = XCF.createXProperty(); + property.setName(e.getKey()); + property.setValue(e.getValue()); + xpList.add(property); + } + } + return xpList; + } + + public static Set<XSegment> xSegmentsFromSegments(Set<Segment> segs) { + Set<XSegment> xsegs = new HashSet<XSegment>(); + if (segs != null && !segs.isEmpty()) { + for (Segment seg : segs) { + XSegment xcubeSeg = XCF.createXSegment(); + xcubeSeg.setCubeName(seg.getName()); + xcubeSeg.setSegmentParameters(getXpropertiesFromSegment(seg)); + xsegs.add(xcubeSeg); + } + } + return xsegs; + } + + public static XProperties getXpropertiesFromSegment(Segment cseg) { + XProperties xproperties = XCF.createXProperties(); + for (String prop : cseg.getProperties().keySet()) { + String segPrefix = MetastoreUtil.getSegmentPropertyKey(cseg.getName()); + if (prop.startsWith(segPrefix)){ + XProperty xprop = XCF.createXProperty(); + xprop.setName(prop.replace(segPrefix, "")); + xprop.setValue(cseg.getProperties().get(prop)); + xproperties.getProperty().add(xprop); + } + } + return xproperties; + } + + + public static FieldSchema fieldSchemaFromColumn(XColumn c) { + if (c == null) { + return null; + } + + return new FieldSchema(c.getName(), c.getType().toLowerCase(), c.getComment()); + } + + public static XColumn columnFromFieldSchema(FieldSchema fs) { + if (fs == null) { + return null; + } + XColumn c = XCF.createXColumn(); + c.setName(fs.getName()); + c.setType(fs.getType()); + c.setComment(fs.getComment()); + return c; + } + + public static ArrayList<FieldSchema> fieldSchemaListFromColumns(XColumns columns) { + if (columns != null && !columns.getColumn().isEmpty()) { + ArrayList<FieldSchema> fsList = new ArrayList<FieldSchema>(columns.getColumn().size()); + for (XColumn c : columns.getColumn()) { + fsList.add(fieldSchemaFromColumn(c)); + } + return fsList; + } + return null; + } + + public static Map<String, String> columnStartAndEndTimeFromXColumns(XColumns columns) { + if (columns != null && !columns.getColumn().isEmpty()) { + Map<String, String> colStartTimeMap = new HashMap<String, String>(); + for (XColumn c : columns.getColumn()) { + if (!(c.getStartTime() == null)) { + colStartTimeMap.put(MetastoreConstants.FACT_COL_START_TIME_PFX.concat(c.getName()), c.getStartTime()); + } + if (!(c.getEndTime() == null)) { + colStartTimeMap.put(MetastoreConstants.FACT_COL_END_TIME_PFX.concat(c.getName()), c.getEndTime()); + } + } + return colStartTimeMap; + } + return null; + } + + public static List<XColumn> columnsFromFieldSchemaList(List<FieldSchema> fslist) { + List<XColumn> cols = new ArrayList<XColumn>(); + if (fslist == null || fslist.isEmpty()) { + return cols; + } + + for (FieldSchema fs : fslist) { + cols.add(columnFromFieldSchema(fs)); + } + return cols; + } + + public static Map<String, Set<UpdatePeriod>> getFactUpdatePeriodsFromStorageTables(XStorageTables storageTables) { + if (storageTables != null && !storageTables.getStorageTable().isEmpty()) { + Map<String, Set<UpdatePeriod>> factUpdatePeriods = new LinkedHashMap<String, Set<UpdatePeriod>>(); + + for (XStorageTableElement ste : storageTables.getStorageTable()) { + Set<UpdatePeriod> updatePeriods = new TreeSet<>(); + // Check if the update period array is empty. + List<XUpdatePeriod> updatePeriodList = ste.getUpdatePeriods().getUpdatePeriod(); + if (updatePeriodList.isEmpty()) { + List<XUpdatePeriodTableDescriptor> tableDescriptorList = ste.getUpdatePeriods() + .getUpdatePeriodTableDescriptor(); + for (XUpdatePeriodTableDescriptor tableDescriptor : tableDescriptorList) { + updatePeriodList.add(tableDescriptor.getUpdatePeriod()); + } + } + for (XUpdatePeriod upd : updatePeriodList) { + updatePeriods.add(UpdatePeriod.valueOf(upd.name())); + } + factUpdatePeriods.put(ste.getStorageName(), updatePeriods); + } + return factUpdatePeriods; + } + return null; + } + + public static Map<String, UpdatePeriod> dumpPeriodsFromStorageTables(XStorageTables storageTables) { + if (storageTables != null && !storageTables.getStorageTable().isEmpty()) { + Map<String, UpdatePeriod> dumpPeriods = new LinkedHashMap<String, UpdatePeriod>(); + + for (XStorageTableElement ste : storageTables.getStorageTable()) { + UpdatePeriod dumpPeriod = null; + if (ste.getUpdatePeriods() != null && !ste.getUpdatePeriods().getUpdatePeriod().isEmpty()) { + dumpPeriod = UpdatePeriod.valueOf(ste.getUpdatePeriods().getUpdatePeriod().get(0).name()); + } + dumpPeriods.put(ste.getStorageName(), dumpPeriod); + } + return dumpPeriods; + } + return null; + } + + public static Storage storageFromXStorage(XStorage xs) { + if (xs == null) { + return null; + } + + Storage storage; + try { + Class<?> clazz = Class.forName(xs.getClassname()); + Constructor<?> constructor = clazz.getConstructor(String.class); + storage = (Storage) constructor.newInstance(xs.getName()); + storage.addProperties(mapFromXProperties(xs.getProperties())); + return storage; + } catch (Exception e) { + log.error("Could not create storage class" + xs.getClassname() + "with name:" + xs.getName(), e); + throw new WebApplicationException(e); + } + } + + public static XStorage xstorageFromStorage(Storage storage) { + if (storage == null) { + return null; + } + + XStorage xstorage = null; + xstorage = XCF.createXStorage(); + xstorage.setProperties(new XProperties()); + xstorage.setName(storage.getName()); + xstorage.setClassname(storage.getClass().getCanonicalName()); + xstorage.getProperties().getProperty().addAll(xPropertiesFromMap(storage.getProperties())); + return xstorage; + } + + public static XDimensionTable dimTableFromCubeDimTable(CubeDimensionTable cubeDimTable) { + if (cubeDimTable == null) { + return null; + } + + XDimensionTable dimTab = XCF.createXDimensionTable(); + dimTab.setDimensionName(cubeDimTable.getDimName()); + dimTab.setTableName(cubeDimTable.getName()); + dimTab.setWeight(cubeDimTable.weight()); + dimTab.setColumns(new XColumns()); + dimTab.setProperties(new XProperties()); + dimTab.setStorageTables(new XStorageTables()); + + for (FieldSchema column : cubeDimTable.getColumns()) { + dimTab.getColumns().getColumn().add(columnFromFieldSchema(column)); + } + dimTab.getProperties().getProperty().addAll(xPropertiesFromMap(cubeDimTable.getProperties())); + + return dimTab; + } + + public static List<? extends XTableReference> dimRefListFromTabRefList( + List<TableReference> tabRefs) { + if (tabRefs != null && !tabRefs.isEmpty()) { + List<XTableReference> xTabRefs = new ArrayList<XTableReference>(tabRefs.size()); + for (TableReference ref : tabRefs) { + XTableReference xRef = XCF.createXTableReference(); + xRef.setColumn(ref.getDestColumn()); + xRef.setTable(ref.getDestTable()); + xRef.setMapsToMany(ref.isMapsToMany()); + xTabRefs.add(xRef); + } + return xTabRefs; + } + + return null; + } + + public static CubeDimensionTable cubeDimTableFromDimTable(XDimensionTable dimensionTable) throws LensException { + + return new CubeDimensionTable(dimensionTable.getDimensionName(), + dimensionTable.getTableName(), + fieldSchemaListFromColumns(dimensionTable.getColumns()), + dimensionTable.getWeight(), + dumpPeriodsFromStorageTables(dimensionTable.getStorageTables()), + mapFromXProperties(dimensionTable.getProperties())); + } + + public static CubeFactTable cubeFactFromFactTable(XFactTable fact) throws LensException { + List<FieldSchema> columns = fieldSchemaListFromColumns(fact.getColumns()); + + Map<String, Set<UpdatePeriod>> storageUpdatePeriods = getFactUpdatePeriodsFromStorageTables( + fact.getStorageTables()); + Map<String, Map<UpdatePeriod, String>> storageTablePrefixMap = storageTablePrefixMapOfStorage( + fact.getStorageTables()); + return new CubeFactTable(fact.getCubeName(), fact.getName(), columns, storageUpdatePeriods, fact.getWeight(), + mapFromXProperties(fact.getProperties()), storageTablePrefixMap); + } + + public static Segmentation segmentationFromXSegmentation(XSegmentation seg) throws LensException { + + Map<String, String> props = new HashMap<>(); + // Skip properties with keyword internal. These properties are internal to lens + // and users are not supposed to see them. + for(String prop : mapFromXProperties(seg.getProperties()).keySet()) { + if (!(prop.toLowerCase().startsWith(MetastoreConstants.SEGMENTATION_KEY_PFX))) { + props.put(prop, mapFromXProperties(seg.getProperties()).get(prop)); + } + } + return new Segmentation(seg.getCubeName(), + seg.getName(), + segmentsFromXSegments(seg.getSegements()), + seg.getWeight(), + props); + } + + + public static XFactTable factTableFromCubeFactTable(CubeFactTable cFact) { + XFactTable fact = XCF.createXFactTable(); + fact.setName(cFact.getName()); + fact.setColumns(new XColumns()); + fact.setProperties(new XProperties()); + fact.setStorageTables(new XStorageTables()); + fact.getProperties().getProperty().addAll(xPropertiesFromMap(cFact.getProperties())); + fact.getColumns().getColumn().addAll(columnsFromFieldSchemaList(cFact.getColumns())); + fact.setWeight(cFact.weight()); + fact.setCubeName(cFact.getCubeName()); + return fact; + } + + public static XSegmentation xsegmentationFromSegmentation(Segmentation cSeg) { + XSegmentation seg = XCF.createXSegmentation(); + seg.setName(cSeg.getName()); + seg.setProperties(new XProperties()); + seg.setSegements(new XSegments()); + seg.setWeight(cSeg.weight()); + seg.setCubeName(cSeg.getBaseCube()); + if (xPropertiesFromMap(cSeg.getProperties()) != null) { + seg.getProperties().getProperty().addAll(xPropertiesFromMap(cSeg.getProperties())); + } + seg.getSegements().getSegment(). + addAll(xSegmentsFromSegments(cSeg.getSegments())); + return seg; + } + + public static StorageTableDesc storageTableDescFromXStorageTableDesc( + XStorageTableDesc xtableDesc) { + StorageTableDesc tblDesc = new StorageTableDesc(); + tblDesc.setTblProps(mapFromXProperties(xtableDesc.getTableParameters())); + tblDesc.setSerdeProps(mapFromXProperties(xtableDesc.getSerdeParameters())); + tblDesc.setPartCols(fieldSchemaListFromColumns(xtableDesc.getPartCols())); + tblDesc.setTimePartCols(xtableDesc.getTimePartCols()); + tblDesc.setExternal(xtableDesc.isExternal()); + tblDesc.setLocation(xtableDesc.getTableLocation()); + tblDesc.setInputFormat(xtableDesc.getInputFormat()); + tblDesc.setOutputFormat(xtableDesc.getOutputFormat()); + tblDesc.setFieldDelim(xtableDesc.getFieldDelimiter()); + tblDesc.setFieldEscape(xtableDesc.getEscapeChar()); + tblDesc.setCollItemDelim(xtableDesc.getCollectionDelimiter()); + tblDesc.setLineDelim(xtableDesc.getLineDelimiter()); + tblDesc.setMapKeyDelim(xtableDesc.getMapKeyDelimiter()); + tblDesc.setSerName(xtableDesc.getSerdeClassName()); + tblDesc.setStorageHandler(xtableDesc.getStorageHandlerName()); + return tblDesc; + } + + public static StorageTableDesc storageTableDescFromXStorageTableElement( + XStorageTableElement storageTableElement) { + return storageTableDescFromXStorageTableDesc(storageTableElement.getTableDesc()); + } + + public static XStorageTableElement getXStorageTableFromHiveTable(Table tbl) { + XStorageTableElement tblElement = new XStorageTableElement(); + tblElement.setUpdatePeriods(new XUpdatePeriods()); + tblElement.setTableDesc(getStorageTableDescFromHiveTable(tbl)); + return tblElement; + } + + public static XStorageTableDesc getStorageTableDescFromHiveTable(Table tbl) { + XStorageTableDesc tblDesc = new XStorageTableDesc(); + tblDesc.setPartCols(new XColumns()); + tblDesc.setTableParameters(new XProperties()); + tblDesc.setSerdeParameters(new XProperties()); + tblDesc.getPartCols().getColumn().addAll(columnsFromFieldSchemaList(tbl.getPartCols())); + String timePartCols = tbl.getParameters().get(MetastoreConstants.TIME_PART_COLUMNS); + if (timePartCols != null) { + tblDesc.getTimePartCols().addAll(Arrays.asList(org.apache.commons.lang.StringUtils.split(timePartCols, ","))); + } + tblDesc.setNumBuckets(tbl.getNumBuckets()); + tblDesc.getBucketCols().addAll(tbl.getBucketCols()); + List<String> sortCols = new ArrayList<String>(); + List<Integer> sortOrders = new ArrayList<Integer>(); + for (Order order : tbl.getSortCols()) { + sortCols.add(order.getCol()); + sortOrders.add(order.getOrder()); + } + tblDesc.getSortCols().addAll(sortCols); + tblDesc.getSortColOrder().addAll(sortOrders); + + XSkewedInfo xskewinfo = new XSkewedInfo(); + xskewinfo.getColNames().addAll(tbl.getSkewedColNames()); + for (List<String> value : tbl.getSkewedColValues()) { + XSkewColList colVallist = new XSkewColList(); + colVallist.getElements().addAll(value); + xskewinfo.getColValues().add(colVallist); + XSkewedValueLocation valueLocation = new XSkewedValueLocation(); + if (tbl.getSkewedColValueLocationMaps().get(value) != null) { + valueLocation.setValue(colVallist); + valueLocation.setLocation(tbl.getSkewedColValueLocationMaps().get(value)); + xskewinfo.getValueLocationMap().add(valueLocation); + } + } + + tblDesc.getTableParameters().getProperty().addAll(xPropertiesFromMap(tbl.getParameters())); + tblDesc.getSerdeParameters().getProperty().addAll(xPropertiesFromMap( + tbl.getTTable().getSd().getSerdeInfo().getParameters())); + tblDesc.setExternal(tbl.getTableType().equals(TableType.EXTERNAL_TABLE)); + tblDesc.setCompressed(tbl.getTTable().getSd().isCompressed()); + tblDesc.setTableLocation(tbl.getDataLocation().toString()); + tblDesc.setInputFormat(tbl.getInputFormatClass().getCanonicalName()); + tblDesc.setOutputFormat(tbl.getOutputFormatClass().getCanonicalName()); + tblDesc.setFieldDelimiter(tbl.getSerdeParam(serdeConstants.FIELD_DELIM)); + tblDesc.setLineDelimiter(tbl.getSerdeParam(serdeConstants.LINE_DELIM)); + tblDesc.setCollectionDelimiter(tbl.getSerdeParam(serdeConstants.COLLECTION_DELIM)); + tblDesc.setMapKeyDelimiter(tbl.getSerdeParam(serdeConstants.MAPKEY_DELIM)); + tblDesc.setEscapeChar(tbl.getSerdeParam(serdeConstants.ESCAPE_CHAR)); + tblDesc.setSerdeClassName(tbl.getSerializationLib()); + tblDesc.setStorageHandlerName(tbl.getStorageHandler() != null + ? tbl.getStorageHandler().getClass().getCanonicalName() : ""); + return tblDesc; + } + + public static Map<String, StorageTableDesc> tableDescPrefixMapFromXStorageTables(XStorageTables storageTables) { + Map<String, StorageTableDesc> storageTablePrefixToDescMap = new HashMap<>(); + if (storageTables != null && !storageTables.getStorageTable().isEmpty()) { + for (XStorageTableElement sTbl : storageTables.getStorageTable()) { + if (sTbl.getUpdatePeriods() != null && sTbl.getUpdatePeriods().getUpdatePeriodTableDescriptor() != null && !sTbl + .getUpdatePeriods().getUpdatePeriodTableDescriptor().isEmpty()) { + for (XUpdatePeriodTableDescriptor updatePeriodTable : sTbl.getUpdatePeriods() + .getUpdatePeriodTableDescriptor()) { + // Get table name with update period as the prefix. + storageTablePrefixToDescMap.put(updatePeriodTable.getUpdatePeriod() + "_" + sTbl.getStorageName(), + storageTableDescFromXStorageTableDesc(updatePeriodTable.getTableDesc())); + } + } else { + storageTablePrefixToDescMap.put(sTbl.getStorageName(), storageTableDescFromXStorageTableElement(sTbl)); + } + } + } + return storageTablePrefixToDescMap; + } + + public static Map<String, Map<UpdatePeriod, String>> storageTablePrefixMapOfStorage(XStorageTables storageTables) { + Map<String, Map<UpdatePeriod, String>> storageTableMap = new HashMap<>(); + if (storageTables != null && !storageTables.getStorageTable().isEmpty()) { + for (XStorageTableElement sTbl : storageTables.getStorageTable()) { + Map<UpdatePeriod, String> storageNameMap = new HashMap<>(); + if (sTbl.getUpdatePeriods() != null && sTbl.getUpdatePeriods().getUpdatePeriodTableDescriptor() != null && !sTbl + .getUpdatePeriods().getUpdatePeriodTableDescriptor().isEmpty()) { + for (XUpdatePeriodTableDescriptor updatePeriodTable : sTbl.getUpdatePeriods() + .getUpdatePeriodTableDescriptor()) { + // Get table name with update period as the prefix. + storageNameMap.put(UpdatePeriod.valueOf(updatePeriodTable.getUpdatePeriod().value()), + updatePeriodTable.getUpdatePeriod() + "_" + sTbl.getStorageName()); + } + } else { + for (XUpdatePeriod updatePeriod : sTbl.getUpdatePeriods().getUpdatePeriod()) { + storageNameMap.put(UpdatePeriod.valueOf(updatePeriod.value()), sTbl.getStorageName()); + } + } + storageTableMap.put(sTbl.getStorageName(), storageNameMap); + } + } + return storageTableMap; + } + + public static Set<Segment> segmentsFromXSegments(XSegments segs) { + Set<Segment> cubeSegs = new HashSet<>(); + for (XSegment xcube : segs.getSegment()){ + Map<String, String> segProp = new HashMap<>(); + if (xcube.getSegmentParameters() != null) { + for (XProperty prop : xcube.getSegmentParameters().getProperty()) { + segProp.put(prop.getName(), prop.getValue()); + } + } + cubeSegs.add(new Segment(xcube.getCubeName(), segProp)); + } + return cubeSegs; + } + + public static Map<String, Date> timePartSpecfromXTimePartSpec( + XTimePartSpec xtimePartSpec) { + Map<String, Date> timePartSpec = new HashMap<String, Date>(); + if (xtimePartSpec != null && !xtimePartSpec.getPartSpecElement().isEmpty()) { + for (XTimePartSpecElement xtimePart : xtimePartSpec.getPartSpecElement()) { + timePartSpec.put(xtimePart.getKey(), getDateFromXML(xtimePart.getValue())); + } + } + return timePartSpec; + } + + public static Map<String, String> nonTimePartSpecfromXNonTimePartSpec( + XPartSpec xnonTimePartSpec) { + Map<String, String> nonTimePartSpec = new HashMap<String, String>(); + if (xnonTimePartSpec != null && !xnonTimePartSpec.getPartSpecElement().isEmpty()) { + for (XPartSpecElement xPart : xnonTimePartSpec.getPartSpecElement()) { + nonTimePartSpec.put(xPart.getKey(), xPart.getValue()); + } + } + return nonTimePartSpec; + } + + public static XPartitionList xpartitionListFromPartitionList(String cubeTableName, List<Partition> partitions, + List<String> timePartCols) throws HiveException { + XPartitionList xPartitionList = new XPartitionList(); + xPartitionList.getPartition(); + if (partitions != null) { + for (Partition partition : partitions) { + xPartitionList.getPartition().add(xpartitionFromPartition(cubeTableName, partition, timePartCols)); + } + } + return xPartitionList; + } + + public static XPartition xpartitionFromPartition(String cubeTableName, Partition p, List<String> timePartCols) + throws HiveException { + XPartition xp = new XPartition(); + xp.setFactOrDimensionTableName(cubeTableName); + xp.setPartitionParameters(new XProperties()); + xp.setSerdeParameters(new XProperties()); + xp.setName(p.getCompleteName()); + xp.setLocation(p.getLocation()); + xp.setInputFormat(p.getInputFormatClass().getCanonicalName()); + xp.setOutputFormat(p.getOutputFormatClass().getCanonicalName()); + xp.getPartitionParameters().getProperty().addAll(xPropertiesFromMap(p.getParameters())); + String upParam = p.getParameters().get(MetastoreConstants.PARTITION_UPDATE_PERIOD); + xp.setUpdatePeriod(XUpdatePeriod.valueOf(upParam)); + LinkedHashMap<String, String> partSpec = p.getSpec(); + xp.setFullPartitionSpec(new XPartSpec()); + for (Map.Entry<String, String> entry : partSpec.entrySet()) { + XPartSpecElement e = new XPartSpecElement(); + e.setKey(entry.getKey()); + e.setValue(entry.getValue()); + xp.getFullPartitionSpec().getPartSpecElement().add(e); + } + try { + xp.setTimePartitionSpec(new XTimePartSpec()); + xp.setNonTimePartitionSpec(new XPartSpec()); + for (Map.Entry<String, String> entry : partSpec.entrySet()) { + if (timePartCols.contains(entry.getKey())) { + XTimePartSpecElement timePartSpecElement = new XTimePartSpecElement(); + timePartSpecElement.setKey(entry.getKey()); + timePartSpecElement + .setValue(getXMLGregorianCalendar(UpdatePeriod.valueOf(xp.getUpdatePeriod().name()).parse( + entry.getValue()))); + xp.getTimePartitionSpec().getPartSpecElement().add(timePartSpecElement); + } else { + XPartSpecElement partSpecElement = new XPartSpecElement(); + partSpecElement.setKey(entry.getKey()); + partSpecElement.setValue(entry.getValue()); + xp.getNonTimePartitionSpec().getPartSpecElement().add(partSpecElement); + } + } + } catch (ParseException exc) { + log.debug("can't form time part spec from " + partSpec, exc); + xp.setTimePartitionSpec(null); + xp.setNonTimePartitionSpec(null); + } + xp.setSerdeClassname(p.getTPartition().getSd().getSerdeInfo().getSerializationLib()); + xp.getSerdeParameters().getProperty().addAll(xPropertiesFromMap( + p.getTPartition().getSd().getSerdeInfo().getParameters())); + return xp; + } + + public static void updatePartitionFromXPartition(Partition partition, XPartition xp) throws ClassNotFoundException { + partition.getParameters().putAll(mapFromXProperties(xp.getPartitionParameters())); + partition.getTPartition().getSd().getSerdeInfo().setParameters(mapFromXProperties(xp.getSerdeParameters())); + partition.setLocation(xp.getLocation()); + if (xp.getInputFormat() != null) { + partition.setInputFormatClass(Class.forName(xp.getInputFormat()).asSubclass(InputFormat.class)); + } + if (xp.getOutputFormat() != null) { + Class<? extends HiveOutputFormat> outputFormatClass = + Class.forName(xp.getOutputFormat()).asSubclass(HiveOutputFormat.class); + partition.setOutputFormatClass(outputFormatClass); + } + partition.getParameters().put(MetastoreConstants.PARTITION_UPDATE_PERIOD, xp.getUpdatePeriod().name()); + partition.getTPartition().getSd().getSerdeInfo().setSerializationLib(xp.getSerdeClassname()); + } + + public static StoragePartitionDesc storagePartSpecFromXPartition( + XPartition xpart) { + StoragePartitionDesc partDesc = new StoragePartitionDesc( + xpart.getFactOrDimensionTableName(), + timePartSpecfromXTimePartSpec(xpart.getTimePartitionSpec()), + nonTimePartSpecfromXNonTimePartSpec(xpart.getNonTimePartitionSpec()), + UpdatePeriod.valueOf(xpart.getUpdatePeriod().name())); + partDesc.setPartParams(mapFromXProperties(xpart.getPartitionParameters())); + partDesc.setSerdeParams(mapFromXProperties(xpart.getSerdeParameters())); + partDesc.setLocation(xpart.getLocation()); + partDesc.setInputFormat(xpart.getInputFormat()); + partDesc.setOutputFormat(xpart.getOutputFormat()); + partDesc.setSerializationLib(xpart.getSerdeClassname()); + return partDesc; + } + + public static List<StoragePartitionDesc> storagePartSpecListFromXPartitionList( + final XPartitionList xpartList) { + ArrayList<StoragePartitionDesc> ret = new ArrayList<StoragePartitionDesc>(); + for (XPartition xpart : xpartList.getPartition()) { + ret.add(storagePartSpecFromXPartition(xpart)); + } + return ret; + } + + public static Dimension dimensionFromXDimension(XDimension dimension) throws LensException { + Set<CubeDimAttribute> dims = new LinkedHashSet<CubeDimAttribute>(); + for (XDimAttribute xd : dimension.getAttributes().getDimAttribute()) { + dims.add(hiveDimAttrFromXDimAttr(xd)); + } + + Set<ExprColumn> expressions = new LinkedHashSet<ExprColumn>(); + if (dimension.getExpressions() != null && !dimension.getExpressions().getExpression().isEmpty()) { + for (XExprColumn xe : dimension.getExpressions().getExpression()) { + expressions.add(hiveExprColumnFromXExprColumn(xe)); + } + } + + Set<JoinChain> joinchains = new LinkedHashSet<JoinChain>(); + if (dimension.getJoinChains() != null && !dimension.getJoinChains().getJoinChain().isEmpty()) { + for (XJoinChain xj : dimension.getJoinChains().getJoinChain()) { + joinchains.add(joinChainFromXJoinChain(xj)); + } + } + + Map<String, String> properties = mapFromXProperties(dimension.getProperties()); + return new Dimension(dimension.getName(), dims, expressions, joinchains, properties, 0L); + } + + public static XDimension xdimensionFromDimension(Dimension dimension) { + XDimension xd = XCF.createXDimension(); + xd.setName(dimension.getName()); + xd.setAttributes(new XDimAttributes()); + xd.setExpressions(new XExpressions()); + xd.setJoinChains(new XJoinChains()); + xd.setProperties(new XProperties()); + + xd.getProperties().getProperty().addAll(xPropertiesFromMap(((AbstractCubeTable) dimension).getProperties())); + for (CubeDimAttribute cd : dimension.getAttributes()) { + xd.getAttributes().getDimAttribute().add(xDimAttrFromHiveDimAttr(cd, dimension)); + } + + for (ExprColumn ec : dimension.getExpressions()) { + xd.getExpressions().getExpression().add(xExprColumnFromHiveExprColumn(ec)); + } + + for (JoinChain jc : dimension.getJoinChains()) { + xd.getJoinChains().getJoinChain().add(getXJoinChainFromJoinChain(jc)); + } + + return xd; + } + + public static XNativeTable nativeTableFromMetaTable(Table table) { + XNativeTable xtable = XCF.createXNativeTable(); + xtable.setColumns(new XColumns()); + xtable.setName(table.getTableName()); + xtable.setDbname(table.getDbName()); + xtable.setOwner(table.getOwner()); + xtable.setCreatetime(table.getTTable().getCreateTime()); + xtable.setLastAccessTime(table.getTTable().getLastAccessTime()); + xtable.getColumns().getColumn().addAll(columnsFromFieldSchemaList(table.getCols())); + xtable.setStorageDescriptor(getStorageTableDescFromHiveTable(table)); + xtable.setTableType(table.getTableType().name()); + return xtable; + } + + public static Map<String, String> getFullPartSpecAsMap(XPartition partition) { + Map<String, String> spec = Maps.newHashMap(); + if (partition.getTimePartitionSpec() != null) { + for (XTimePartSpecElement timePartSpecElement : partition.getTimePartitionSpec().getPartSpecElement()) { + spec.put(timePartSpecElement.getKey(), UpdatePeriod.valueOf(partition.getUpdatePeriod().name()).format() + .format(getDateFromXML(timePartSpecElement.getValue()))); + } + } + if (partition.getNonTimePartitionSpec() != null) { + for (XPartSpecElement partSpecElement : partition.getNonTimePartitionSpec().getPartSpecElement()) { + spec.put(partSpecElement.getKey(), partSpecElement.getValue()); + } + } + return spec; + } +}
http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java index 4e350c8..599027f 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreUtil.java @@ -595,5 +595,4 @@ public class MetastoreUtil { return MetastoreUtil.getFactKeyPrefix(factTableName) + "." + storageName + "." + updatePeriod; } - } http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/metadata/TimeRange.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/TimeRange.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/TimeRange.java index bf6cc5c..8286894 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/TimeRange.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/TimeRange.java @@ -22,7 +22,7 @@ import static org.apache.lens.cube.metadata.DateUtil.ABSDATE_PARSER; import java.util.Calendar; import java.util.Date; -import java.util.TreeSet; +import java.util.Set; import org.apache.lens.cube.error.LensCubeErrorCode; import org.apache.lens.server.api.error.LensException; @@ -48,10 +48,28 @@ public class TimeRange { private ASTNode parent; private int childIndex; - public boolean isCoverableBy(TreeSet<UpdatePeriod> updatePeriods) { + public boolean isCoverableBy(Set<UpdatePeriod> updatePeriods) { return DateUtil.isCoverableBy(fromDate, toDate, updatePeriods); } + /** + * Truncate time range using the update period. + * The lower value of the truncated time range is the smallest date value equal to or larger than original + * time range's lower value which lies at the update period's boundary. Similarly for higher value. + * @param updatePeriod Update period to truncate time range with + * @return truncated time range + * @throws LensException If the truncated time range is invalid. + */ + public TimeRange truncate(UpdatePeriod updatePeriod) throws LensException { + TimeRange timeRange = new TimeRangeBuilder().partitionColumn(partitionColumn) + .fromDate(updatePeriod.getCeilDate(fromDate)).toDate(updatePeriod.getFloorDate(toDate)).build(); + timeRange.validate(); + return timeRange; + } + + public long milliseconds() { + return toDate.getTime() - fromDate.getTime(); + } public static class TimeRangeBuilder { private final TimeRange range; http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/AbridgedTimeRangeWriter.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/AbridgedTimeRangeWriter.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/AbridgedTimeRangeWriter.java index 8681e90..10f98a1 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/AbridgedTimeRangeWriter.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/AbridgedTimeRangeWriter.java @@ -19,6 +19,8 @@ package org.apache.lens.cube.parse; +import static java.util.stream.Collectors.toMap; + import java.util.*; import org.apache.lens.cube.metadata.FactPartition; @@ -33,14 +35,13 @@ import com.google.common.collect.Sets; * Collapses the time range filters using IN operators */ public class AbridgedTimeRangeWriter implements TimeRangeWriter { - //TODO: minimize use of String, use StringBuilders /** * Return IN clause for the partitions selected in the cube query * - * @param cubeQueryContext - * @param tableName - * @param parts + * @param cubeQueryContext cube query context + * @param tableName table name + * @param parts partitions * @return * @throws LensException */ @@ -80,7 +81,7 @@ public class AbridgedTimeRangeWriter implements TimeRangeWriter { for (FactPartition factPartition : parts) { String filter = TimeRangeUtils.getTimeRangePartitionFilter(factPartition, cubeQueryContext, tableName); if (filter.contains("AND")) { - allTimeRangeFilters.add(new StringBuilder("(").append(filter).append(")").toString()); + allTimeRangeFilters.add("(" + filter + ")"); } else { extractColumnAndCondition(filter, partFilterMap); } @@ -89,7 +90,7 @@ public class AbridgedTimeRangeWriter implements TimeRangeWriter { List<String> inClauses = new ArrayList<String>(partFilterMap.size()); for (String column : partFilterMap.keySet()) { String clause = - new StringBuilder("(").append(StringUtils.join(partFilterMap.get(column), ",")).append(")").toString(); + "(" + StringUtils.join(partFilterMap.get(column), ",") + ")"; inClauses.add(column + " IN " + clause); } @@ -120,29 +121,17 @@ public class AbridgedTimeRangeWriter implements TimeRangeWriter { private Map<Set<FactPartition>, Set<FactPartition>> groupPartitions(Collection<FactPartition> parts) { Map<FactPartition, Set<FactPartition>> partitionSetMap = new HashMap<FactPartition, Set<FactPartition>>(); for (FactPartition part : parts) { - FactPartition key = part.getContainingPart(); - FactPartition part2 = new FactPartition(part.getPartCol(), part.getPartSpec(), part.getPeriod(), null, part - .getPartFormat(), part.getStorageTables()); - if (partitionSetMap.get(key) == null) { - partitionSetMap.put(key, Sets.<FactPartition>newTreeSet()); - } - partitionSetMap.get(key).add(part2); + partitionSetMap.computeIfAbsent(part.getContainingPart(), k -> Sets.newTreeSet()).add(part.withoutContaining()); } Map<Set<FactPartition>, Set<FactPartition>> setSetOppositeMap = Maps.newHashMap(); for (Map.Entry<FactPartition, Set<FactPartition>> entry : partitionSetMap.entrySet()) { - if (setSetOppositeMap.get(entry.getValue()) == null) { - setSetOppositeMap.put(entry.getValue(), Sets.<FactPartition>newTreeSet()); - } + setSetOppositeMap.computeIfAbsent(entry.getValue(), k -> Sets.newTreeSet()); if (entry.getKey() != null) { setSetOppositeMap.get(entry.getValue()).add(entry.getKey()); } } - - Map<Set<FactPartition>, Set<FactPartition>> setSetMap = Maps.newHashMap(); - for (Map.Entry<Set<FactPartition>, Set<FactPartition>> entry : setSetOppositeMap.entrySet()) { - setSetMap.put(entry.getValue(), entry.getKey()); - } - return setSetMap; + // inverse again + return setSetOppositeMap.entrySet().stream().collect(toMap(Map.Entry::getValue, Map.Entry::getKey)); } // This takes the output of filter generated by TimeRangeUtils.getTimeRangePartitionFilter @@ -156,13 +145,6 @@ public class AbridgedTimeRangeWriter implements TimeRangeWriter { String column = subTokens[0].trim(); String filterValue = subTokens[1].trim(); - List<String> filterValues = partFilterMap.get(column); - - if (filterValues == null) { - filterValues = new ArrayList<String>(); - partFilterMap.put(column, filterValues); - } - - filterValues.add(filterValue); + partFilterMap.computeIfAbsent(column, k -> new ArrayList<>()).add(filterValue); } } http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java index 9658100..30b1a90 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/AggregateResolver.java @@ -27,7 +27,6 @@ import java.util.Iterator; import org.apache.lens.cube.error.LensCubeErrorCode; import org.apache.lens.cube.metadata.CubeMeasure; import org.apache.lens.cube.metadata.ExprColumn; -import org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode; import org.apache.lens.cube.parse.ExpressionResolver.ExprSpecContext; import org.apache.lens.server.api.error.LensException; @@ -71,21 +70,23 @@ class AggregateResolver implements ContextRewriter { || hasMeasuresNotInDefaultAggregates(cubeql, cubeql.getHavingAST(), null, aggregateResolverDisabled) || hasMeasures(cubeql, cubeql.getWhereAST()) || hasMeasures(cubeql, cubeql.getGroupByAST()) || hasMeasures(cubeql, cubeql.getOrderByAST())) { - Iterator<CandidateFact> factItr = cubeql.getCandidateFacts().iterator(); - while (factItr.hasNext()) { - CandidateFact candidate = factItr.next(); - if (candidate.fact.isAggregated()) { - cubeql.addFactPruningMsgs(candidate.fact, - CandidateTablePruneCause.missingDefaultAggregate()); - factItr.remove(); + Iterator<Candidate> candItr = cubeql.getCandidates().iterator(); + while (candItr.hasNext()) { + Candidate candidate = candItr.next(); + if (candidate instanceof StorageCandidate) { + StorageCandidate sc = (StorageCandidate) candidate; + if (sc.getFact().isAggregated()) { + cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.missingDefaultAggregate()); + candItr.remove(); + } + } else { + throw new LensException("Not a storage candidate!!"); } } nonDefaultAggregates = true; log.info("Query has non default aggregates, no aggregate resolution will be done"); } - cubeql.pruneCandidateFactSet(CandidateTablePruneCode.MISSING_DEFAULT_AGGREGATE); - if (nonDefaultAggregates || aggregateResolverDisabled) { return; } http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/BetweenTimeRangeWriter.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/BetweenTimeRangeWriter.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/BetweenTimeRangeWriter.java index c8b8129..bd77498 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/BetweenTimeRangeWriter.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/BetweenTimeRangeWriter.java @@ -92,7 +92,7 @@ public class BetweenTimeRangeWriter implements TimeRangeWriter { } String partCol = start.getPartCol(); - if (cubeQueryContext != null && !cubeQueryContext.shouldReplaceTimeDimWithPart()) { + if (!cubeQueryContext.shouldReplaceTimeDimWithPart()) { partCol = cubeQueryContext.getTimeDimOfPartitionColumn(partCol); } http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java new file mode 100644 index 0000000..f241cb3 --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.parse; + +import java.util.Collection; +import java.util.Date; +import java.util.Set; + +import org.apache.lens.cube.metadata.FactPartition; +import org.apache.lens.cube.metadata.TimeRange; +import org.apache.lens.server.api.error.LensException; + +/** + * This interface represents candidates that are involved in different phases of query rewriting. + * At the lowest level, Candidate is represented by a StorageCandidate that has a fact on a storage + * and other joined dimensions (if any) that are required to answer the query or part of the query. + * At a higher level Candidate can also be a Join or a Union Candidate representing join or union + * between other candidates + * + * Different Re-writers will work on applicable candidates to produce a final candidate which will be used + * for generating the re-written query. + */ +public interface Candidate { + + /** + * Returns all the fact columns + * + * @return + */ + Collection<String> getColumns(); + + /** + * Start Time for this candidate (calculated based on schema) + * + * @return + */ + Date getStartTime(); + + /** + * End Time for this candidate (calculated based on schema) + * + * @return + */ + Date getEndTime(); + + /** + * Returns the cost of this candidate + * + * @return + */ + double getCost(); + + /** + * Returns true if this candidate contains the given candidate + * + * @param candidate + * @return + */ + boolean contains(Candidate candidate); + + /** + * Returns child candidates of this candidate if any. + * Note: StorageCandidate will return null + * + * @return + */ + Collection<Candidate> getChildren(); + + /** + * Is time range coverable based on start and end times configured in schema for the composing storage candidates + * and valid update periods. + * + * Note: This method is different from {@link #evaluateCompleteness(TimeRange, TimeRange, boolean)} . + * isTimeRangeCoverable checks the the possibility of covering time range from schema perspective by using valid + * storages/update periods while evaluateCompleteness checks if a time range can be covered based on + * registered partitions. So isTimeRangeCoverable = false implies evaluateCompleteness = false but vice versa is + * not true. + * + * @param timeRange + * @return + * @throws LensException + */ + boolean isTimeRangeCoverable(TimeRange timeRange) throws LensException; + + /** + * Calculates if this candidate can answer the query for given time range based on actual data registered with + * the underlying candidate storages. This method will also update any internal candidate data structures that are + * required for writing the re-written query and to answer {@link #getParticipatingPartitions()}. + * + * @param timeRange : TimeRange to check completeness for. TimeRange consists of start time, end time and the + * partition column + * @param queriedTimeRange : User quried timerange + * @param failOnPartialData : fail fast if the candidate can answer the query only partially + * @return true if this Candidate can answer query for the given time range. + */ + boolean evaluateCompleteness(TimeRange timeRange, TimeRange queriedTimeRange, boolean failOnPartialData) + throws LensException; + + /** + * Returns the set of fact partitions that will participate in this candidate. + * Note: This method can be called only after call to + * {@link #evaluateCompleteness(TimeRange, TimeRange, boolean)} + * + * @return + */ + Set<FactPartition> getParticipatingPartitions(); + + /** + * Checks whether an expression is evaluable by a candidate + * 1. For a JoinCandidate, atleast one of the child candidates should be able to answer the expression + * 2. For a UnionCandidate, all child candidates should answer the expression + * + * @param expr :Expression need to be evaluated for Candidate + * @return + */ + boolean isExpressionEvaluable(ExpressionResolver.ExpressionContext expr); + + /** + * Gets the index positions of answerable measure phrases in CubeQueryContext#selectPhrases + * @return + */ + Set<Integer> getAnswerableMeasurePhraseIndices(); +} http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateCoveringSetsResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateCoveringSetsResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateCoveringSetsResolver.java new file mode 100644 index 0000000..b22d972 --- /dev/null +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateCoveringSetsResolver.java @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.parse; + +import static org.apache.lens.cube.parse.CandidateUtil.getColumns; + +import java.util.*; + +import org.apache.lens.cube.error.LensCubeErrorCode; +import org.apache.lens.cube.metadata.TimeRange; +import org.apache.lens.server.api.error.LensException; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class CandidateCoveringSetsResolver implements ContextRewriter { + + @Override + public void rewriteContext(CubeQueryContext cubeql) throws LensException { + + if (!cubeql.hasCubeInQuery()) { + return; //Dimension query + } + + if (cubeql.getCandidates().size() == 0){ + cubeql.throwNoCandidateFactException(); + } + + List<QueriedPhraseContext> qpcList = cubeql.getQueriedPhrases(); + Set<QueriedPhraseContext> queriedMsrs = new HashSet<>(); + for (QueriedPhraseContext qpc : qpcList) { + if (qpc.hasMeasures(cubeql)) { + queriedMsrs.add(qpc); + } + } + + List<Candidate> timeRangeCoveringSet = resolveTimeRangeCoveringFactSet(cubeql, queriedMsrs, qpcList); + if (timeRangeCoveringSet.isEmpty()) { + throw new LensException(LensCubeErrorCode.NO_UNION_CANDIDATE_AVAILABLE.getLensErrorInfo(), + cubeql.getCube().getName(), cubeql.getTimeRanges().toString(), getColumns(queriedMsrs).toString()); + } + log.info("Time covering candidates :{}", timeRangeCoveringSet); + + if (queriedMsrs.isEmpty()) { + cubeql.getCandidates().clear(); + cubeql.getCandidates().addAll(timeRangeCoveringSet); + } else if (!timeRangeCoveringSet.isEmpty()) { + List<List<Candidate>> measureCoveringSets = resolveJoinCandidates(timeRangeCoveringSet, queriedMsrs, cubeql); + if (measureCoveringSets.isEmpty()) { + throw new LensException(LensCubeErrorCode.NO_JOIN_CANDIDATE_AVAILABLE.getLensErrorInfo(), + cubeql.getCube().getName(), getColumns(queriedMsrs).toString()); + } + updateFinalCandidates(measureCoveringSets, cubeql); + } + + log.info("Final Time and Measure covering candidates :{}", cubeql.getCandidates()); + } + + private Candidate createJoinCandidate(List<Candidate> childCandidates, CubeQueryContext cubeql) { + Candidate cand; + Candidate first = childCandidates.get(0); + Candidate second = childCandidates.get(1); + cand = new JoinCandidate(first, second, cubeql); + for (int i = 2; i < childCandidates.size(); i++) { + cand = new JoinCandidate(cand, childCandidates.get(i), cubeql); + } + return cand; + } + + private void updateFinalCandidates(List<List<Candidate>> joinCandidates, CubeQueryContext cubeql) { + List<Candidate> finalCandidates = new ArrayList<>(); + + for (List<Candidate> joinCandidate : joinCandidates) { + if (joinCandidate.size() == 1) { + finalCandidates.add(joinCandidate.iterator().next()); + } else { + finalCandidates.add(createJoinCandidate(joinCandidate, cubeql)); + } + } + cubeql.getCandidates().clear(); + cubeql.getCandidates().addAll(finalCandidates); + } + + private boolean isCandidateCoveringTimeRanges(UnionCandidate uc, List<TimeRange> ranges) { + for (TimeRange range : ranges) { + if (!CandidateUtil.isTimeRangeCovered(uc.getChildren(), range.getFromDate(), range.getToDate())) { + return false; + } + } + return true; + } + + private void pruneUnionCandidatesNotCoveringAllRanges(List<UnionCandidate> ucs, CubeQueryContext cubeql) { + for (Iterator<UnionCandidate> itr = ucs.iterator(); itr.hasNext();) { + UnionCandidate uc = itr.next(); + if (!isCandidateCoveringTimeRanges(uc, cubeql.getTimeRanges())) { + itr.remove(); + cubeql.addCandidatePruningMsg(uc, CandidateTablePruneCause.storageNotAvailableInRange(cubeql.getTimeRanges())); + } + } + } + + private List<Candidate> resolveTimeRangeCoveringFactSet(CubeQueryContext cubeql, + Set<QueriedPhraseContext> queriedMsrs, List<QueriedPhraseContext> qpcList) throws LensException { + List<Candidate> candidateSet = new ArrayList<>(); + if (!cubeql.getCandidates().isEmpty()) { + // All Candidates + List<Candidate> allCandidates = new ArrayList<>(cubeql.getCandidates()); + // Partially valid candidates + List<Candidate> allCandidatesPartiallyValid = new ArrayList<>(); + for (Candidate cand : allCandidates) { + // Assuming initial list of candidates populated are StorageCandidate + assert (cand instanceof StorageCandidate); + StorageCandidate sc = (StorageCandidate) cand; + if (CandidateUtil.isValidForTimeRanges(sc, cubeql.getTimeRanges())) { + candidateSet.add(CandidateUtil.cloneStorageCandidate(sc)); + } else if (CandidateUtil.isPartiallyValidForTimeRanges(sc, cubeql.getTimeRanges())) { + allCandidatesPartiallyValid.add(CandidateUtil.cloneStorageCandidate(sc)); + } else { + cubeql.addCandidatePruningMsg(sc, CandidateTablePruneCause.storageNotAvailableInRange( + cubeql.getTimeRanges())); + } + + } + // Get all covering fact sets + List<UnionCandidate> unionCoveringSet = + getCombinations(new ArrayList<>(allCandidatesPartiallyValid), cubeql); + // Sort the Collection based on no of elements + unionCoveringSet.sort(new CandidateUtil.ChildrenSizeBasedCandidateComparator<UnionCandidate>()); + // prune non covering sets + pruneUnionCandidatesNotCoveringAllRanges(unionCoveringSet, cubeql); + // prune candidate set which doesn't contain any common measure i + pruneUnionCoveringSetWithoutAnyCommonMeasure(unionCoveringSet, queriedMsrs, cubeql); + // prune redundant covering sets + pruneRedundantUnionCoveringSets(unionCoveringSet); + // pruing done in the previous steps, now create union candidates + candidateSet.addAll(unionCoveringSet); + updateQueriableMeasures(candidateSet, qpcList, cubeql); + } + return candidateSet; + } + + private boolean isMeasureAnswerablebyUnionCandidate(QueriedPhraseContext msr, Candidate uc, + CubeQueryContext cubeql) throws LensException { + // Candidate is a single StorageCandidate + if ((uc instanceof StorageCandidate) && !msr.isEvaluable(cubeql, (StorageCandidate) uc)) { + return false; + } else if ((uc instanceof UnionCandidate)){ + for (Candidate cand : uc.getChildren()) { + if (!msr.isEvaluable(cubeql, (StorageCandidate) cand)) { + return false; + } + } + } + return true; + } + + private void pruneUnionCoveringSetWithoutAnyCommonMeasure(List<UnionCandidate> ucs, + Set<QueriedPhraseContext> queriedMsrs, + CubeQueryContext cubeql) throws LensException { + for (ListIterator<UnionCandidate> itr = ucs.listIterator(); itr.hasNext();) { + boolean toRemove = true; + UnionCandidate uc = itr.next(); + for (QueriedPhraseContext msr : queriedMsrs) { + if (isMeasureAnswerablebyUnionCandidate(msr, uc, cubeql)) { + toRemove = false; + break; + } + } + if (toRemove) { + itr.remove(); + } + } + } + + private void pruneRedundantUnionCoveringSets(List<UnionCandidate> candidates) { + for (int i = 0; i < candidates.size(); i++) { + UnionCandidate current = candidates.get(i); + int j = i + 1; + for (ListIterator<UnionCandidate> itr = candidates.listIterator(j); itr.hasNext();) { + UnionCandidate next = itr.next(); + if (next.getChildren().containsAll(current.getChildren())) { + itr.remove(); + } + } + } + } + + private List<UnionCandidate> getCombinations(final List<Candidate> candidates, CubeQueryContext cubeql) { + List<UnionCandidate> combinations = new LinkedList<>(); + int size = candidates.size(); + int threshold = Double.valueOf(Math.pow(2, size)).intValue() - 1; + + for (int i = 1; i <= threshold; ++i) { + LinkedList<Candidate> individualCombinationList = new LinkedList<>(); + int count = size - 1; + int clonedI = i; + while (count >= 0) { + if ((clonedI & 1) != 0) { + individualCombinationList.addFirst(candidates.get(count)); + } + clonedI = clonedI >>> 1; + --count; + } + combinations.add(new UnionCandidate(individualCombinationList, cubeql)); + } + return combinations; + } + + private List<List<Candidate>> resolveJoinCandidates(List<Candidate> unionCandidates, + Set<QueriedPhraseContext> msrs, CubeQueryContext cubeql) throws LensException { + List<List<Candidate>> msrCoveringSets = new ArrayList<>(); + List<Candidate> ucSet = new ArrayList<>(unionCandidates); + // Check if a single set can answer all the measures and exprsWithMeasures + for (Iterator<Candidate> i = ucSet.iterator(); i.hasNext();) { + boolean evaluable = false; + Candidate uc = i.next(); + for (QueriedPhraseContext msr : msrs) { + evaluable = isMeasureAnswerablebyUnionCandidate(msr, uc, cubeql); + if (!evaluable) { + break; + } + } + if (evaluable) { + // single set can answer all the measures as an UnionCandidate + List<Candidate> one = new ArrayList<>(); + one.add(uc); + msrCoveringSets.add(one); + i.remove(); + } + } + // Sets that contain all measures or no measures are removed from iteration. + // find other facts + for (Iterator<Candidate> i = ucSet.iterator(); i.hasNext();) { + Candidate candidate = i.next(); + i.remove(); + // find the remaining measures in other facts + if (i.hasNext()) { + Set<QueriedPhraseContext> remainingMsrs = new HashSet<>(msrs); + Set<QueriedPhraseContext> coveredMsrs = CandidateUtil.coveredMeasures(candidate, msrs, cubeql); + remainingMsrs.removeAll(coveredMsrs); + + List<List<Candidate>> coveringSets = resolveJoinCandidates(ucSet, remainingMsrs, cubeql); + if (!coveringSets.isEmpty()) { + for (List<Candidate> candSet : coveringSets) { + candSet.add(candidate); + msrCoveringSets.add(candSet); + } + } else { + log.info("Couldnt find any set containing remaining measures:{} {} in {}", remainingMsrs, + ucSet); + } + } + } + log.info("Covering set {} for measures {} with factsPassed {}", msrCoveringSets, msrs, ucSet); + return msrCoveringSets; + } + + private void updateQueriableMeasures(List<Candidate> cands, + List<QueriedPhraseContext> qpcList, CubeQueryContext cubeql) throws LensException { + for (Candidate cand : cands) { + updateStorageCandidateQueriableMeasures(cand, qpcList, cubeql); + } + } + + + private void updateStorageCandidateQueriableMeasures(Candidate unionCandidate, + List<QueriedPhraseContext> qpcList, CubeQueryContext cubeql) throws LensException { + QueriedPhraseContext msrPhrase; + boolean isEvaluable; + for (int index = 0; index < qpcList.size(); index++) { + + if (!qpcList.get(index).hasMeasures(cubeql)) { + //Not a measure phrase. Skip it + continue; + } + + msrPhrase = qpcList.get(index); + if (unionCandidate instanceof StorageCandidate && msrPhrase.isEvaluable(cubeql, + (StorageCandidate) unionCandidate)) { + ((StorageCandidate) unionCandidate).setAnswerableMeasurePhraseIndices(index); + } else if (unionCandidate instanceof UnionCandidate) { + isEvaluable = true; + for (Candidate childCandidate : unionCandidate.getChildren()) { + if (!msrPhrase.isEvaluable(cubeql, (StorageCandidate) childCandidate)) { + isEvaluable = false; + break; + } + } + if (isEvaluable) { + //Set the index for all the children in this case + for (Candidate childCandidate : unionCandidate.getChildren()) { + ((StorageCandidate) childCandidate).setAnswerableMeasurePhraseIndices(index); + } + } + } + } + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java index 4dcdbcf..ce734cf 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateDim.java @@ -124,12 +124,7 @@ public class CandidateDim implements CandidateTable { } @Override - public Set<String> getStorageTables() { - return Collections.singleton(storageTable); - } - - @Override - public Set<String> getPartsQueried() { + public Set<String> getParticipatingPartitions() { if (StringUtils.isBlank(whereClause)) { return Collections.emptySet(); } http://git-wip-us.apache.org/repos/asf/lens/blob/ae83caae/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java deleted file mode 100644 index b42262d..0000000 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java +++ /dev/null @@ -1,367 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.lens.cube.parse; - -import static org.apache.hadoop.hive.ql.parse.HiveParser.*; - -import java.util.*; - -import org.apache.lens.cube.metadata.*; -import org.apache.lens.server.api.error.LensException; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.session.SessionState; - -import org.antlr.runtime.CommonToken; - -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import lombok.Getter; -import lombok.Setter; - -/** - * Holds context of a candidate fact table. - */ -public class CandidateFact implements CandidateTable, QueryAST { - final CubeFactTable fact; - @Getter - private Set<String> storageTables; - @Getter - private int numQueriedParts = 0; - @Getter - private final Set<FactPartition> partsQueried = Sets.newHashSet(); - - private CubeInterface baseTable; - @Getter - @Setter - private ASTNode selectAST; - @Getter - @Setter - private ASTNode whereAST; - @Getter - @Setter - private ASTNode groupByAST; - @Getter - @Setter - private ASTNode havingAST; - @Getter - @Setter - private ASTNode joinAST; - @Getter - @Setter - private ASTNode orderByAST; - @Getter - @Setter - private Integer limitValue; - @Getter - private String fromString; - private final List<Integer> selectIndices = Lists.newArrayList(); - private final List<Integer> dimFieldIndices = Lists.newArrayList(); - private Collection<String> columns; - @Getter - private final Map<String, ASTNode> storgeWhereClauseMap = new HashMap<>(); - @Getter - private final Map<String, String> storgeWhereStringMap = new HashMap<>(); - @Getter - private final Map<TimeRange, Map<String, LinkedHashSet<FactPartition>>> rangeToStoragePartMap = new HashMap<>(); - @Getter - private final Map<TimeRange, Map<String, String>> rangeToStorageWhereMap = new HashMap<>(); - @Getter - @Setter - private Map<String, Map<String, Float>> dataCompletenessMap; - - CandidateFact(CubeFactTable fact, CubeInterface cube) { - this.fact = fact; - this.baseTable = cube; - } - - @Override - public String toString() { - return fact.toString(); - } - - public Collection<String> getColumns() { - if (columns == null) { - columns = fact.getValidColumns(); - if (columns == null) { - columns = fact.getAllFieldNames(); - } - } - return columns; - } - - public boolean isValidForTimeRange(TimeRange timeRange) { - return (!timeRange.getFromDate().before(fact.getStartTime())) && (!timeRange.getToDate().after(fact.getEndTime())); - } - - public void addToHaving(ASTNode ast) { - if (getHavingAST() == null) { - setHavingAST(new ASTNode(new CommonToken(TOK_HAVING, "TOK_HAVING"))); - getHavingAST().addChild(ast); - return; - } - ASTNode existingHavingAST = (ASTNode) getHavingAST().getChild(0); - ASTNode newHavingAST = new ASTNode(new CommonToken(KW_AND, "AND")); - newHavingAST.addChild(existingHavingAST); - newHavingAST.addChild(ast); - getHavingAST().setChild(0, newHavingAST); - } - - public String addAndGetAliasFromSelect(ASTNode ast, AliasDecider aliasDecider) { - for (Node n : getSelectAST().getChildren()) { - ASTNode astNode = (ASTNode) n; - if (HQLParser.equalsAST(ast, (ASTNode) astNode.getChild(0))) { - if (astNode.getChildCount() > 1) { - return astNode.getChild(1).getText(); - } - String alias = aliasDecider.decideAlias(astNode); - astNode.addChild(new ASTNode(new CommonToken(Identifier, alias))); - return alias; - } - } - // Not found, have to add to select - String alias = aliasDecider.decideAlias(ast); - ASTNode selectExprNode = new ASTNode(new CommonToken(TOK_SELEXPR)); - selectExprNode.addChild(ast); - selectExprNode.addChild(new ASTNode(new CommonToken(Identifier, alias))); - getSelectAST().addChild(selectExprNode); - return alias; - } - - void incrementPartsQueried(int incr) { - numQueriedParts += incr; - } - - // copy ASTs from CubeQueryContext - public void copyASTs(CubeQueryContext cubeql) throws LensException { - setSelectAST(MetastoreUtil.copyAST(cubeql.getSelectAST())); - setWhereAST(MetastoreUtil.copyAST(cubeql.getWhereAST())); - if (cubeql.getJoinAST() != null) { - setJoinAST(MetastoreUtil.copyAST(cubeql.getJoinAST())); - } - if (cubeql.getGroupByAST() != null) { - setGroupByAST(MetastoreUtil.copyAST(cubeql.getGroupByAST())); - } - } - - - public ASTNode getStorageWhereClause(String storageTable) { - return storgeWhereClauseMap.get(storageTable); - } - public String getStorageWhereString(String storageTable) { - return storgeWhereStringMap.get(storageTable); - } - - public boolean isExpressionAnswerable(ASTNode node, CubeQueryContext context) throws LensException { - return getColumns().containsAll(HQLParser.getColsInExpr(context.getAliasForTableName(context.getCube()), node)); - } - - /** - * Update the ASTs to include only the fields queried from this fact, in all the expressions - * - * @param cubeql - * @throws LensException - */ - public void updateASTs(CubeQueryContext cubeql) throws LensException { - // update select AST with selected fields - int currentChild = 0; - for (int i = 0; i < cubeql.getSelectAST().getChildCount(); i++) { - ASTNode selectExpr = (ASTNode) this.selectAST.getChild(currentChild); - Set<String> exprCols = HQLParser.getColsInExpr(cubeql.getAliasForTableName(cubeql.getCube()), selectExpr); - if (getColumns().containsAll(exprCols)) { - selectIndices.add(i); - if (exprCols.isEmpty() // no direct fact columns - // does not have measure names - || (!containsAny(cubeql.getCube().getMeasureNames(), exprCols))) { - dimFieldIndices.add(i); - } - ASTNode aliasNode = HQLParser.findNodeByPath(selectExpr, Identifier); - String alias = cubeql.getSelectPhrases().get(i).getSelectAlias(); - if (aliasNode != null) { - String queryAlias = aliasNode.getText(); - if (!queryAlias.equals(alias)) { - // replace the alias node - ASTNode newAliasNode = new ASTNode(new CommonToken(HiveParser.Identifier, alias)); - this.selectAST.getChild(currentChild).replaceChildren(selectExpr.getChildCount() - 1, - selectExpr.getChildCount() - 1, newAliasNode); - } - } else { - // add column alias - ASTNode newAliasNode = new ASTNode(new CommonToken(HiveParser.Identifier, alias)); - this.selectAST.getChild(currentChild).addChild(newAliasNode); - } - } else { - this.selectAST.deleteChild(currentChild); - currentChild--; - } - currentChild++; - } - - // don't need to update where ast, since where is only on dim attributes and dim attributes - // are assumed to be common in multi fact queries. - - // push down of having clauses happens just after this call in cubequerycontext - } - - // The source set contains atleast one column in the colSet - static boolean containsAny(Collection<String> srcSet, Collection<String> colSet) { - if (colSet == null || colSet.isEmpty()) { - return true; - } - for (String column : colSet) { - if (srcSet.contains(column)) { - return true; - } - } - return false; - } - - @Override - public String getStorageString(String alias) { - return StringUtils.join(storageTables, ",") + " " + alias; - } - - public void setStorageTables(Set<String> storageTables) { - String database = SessionState.get().getCurrentDatabase(); - // Add database name prefix for non default database - if (StringUtils.isNotBlank(database) && !"default".equalsIgnoreCase(database)) { - Set<String> storageTbls = new TreeSet<>(); - Iterator<String> names = storageTables.iterator(); - while (names.hasNext()) { - storageTbls.add(database + "." + names.next()); - } - this.storageTables = storageTbls; - } else { - this.storageTables = storageTables; - } - } - - @Override - public AbstractCubeTable getBaseTable() { - return (AbstractCubeTable) baseTable; - } - - @Override - public CubeFactTable getTable() { - return fact; - } - - @Override - public String getName() { - return fact.getName(); - } - - @Override - public boolean equals(Object obj) { - if (!super.equals(obj)) { - return false; - } - CandidateFact other = (CandidateFact) obj; - - if (this.getTable() == null) { - if (other.getTable() != null) { - return false; - } - } - return true; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = super.hashCode(); - result = prime * result + ((getTable() == null) ? 0 : getTable().getName().toLowerCase().hashCode()); - return result; - } - - public String getSelectString() { - return HQLParser.getString(selectAST); - } - - public String getWhereString() { - if (whereAST != null) { - return HQLParser.getString(whereAST); - } - return null; - } - - public String getHavingString() { - if (havingAST != null) { - return HQLParser.getString(havingAST); - } - return null; - } - - @Override - public String getOrderByString() { - if (orderByAST != null) { - return HQLParser.getString(orderByAST); - } - return null; - } - - /** - * @return the selectIndices - */ - public List<Integer> getSelectIndices() { - return selectIndices; - } - - /** - * @return the groupbyIndices - */ - public List<Integer> getDimFieldIndices() { - return dimFieldIndices; - } - - public String getGroupByString() { - if (groupByAST != null) { - return HQLParser.getString(groupByAST); - } - return null; - } - - public Set<String> getTimePartCols(CubeQueryContext query) throws LensException { - Set<String> cubeTimeDimensions = baseTable.getTimedDimensions(); - Set<String> timePartDimensions = new HashSet<String>(); - String singleStorageTable = storageTables.iterator().next(); - List<FieldSchema> partitionKeys = null; - partitionKeys = query.getMetastoreClient().getTable(singleStorageTable).getPartitionKeys(); - for (FieldSchema fs : partitionKeys) { - if (cubeTimeDimensions.contains(CubeQueryContext.getTimeDimOfPartitionColumn(baseTable, fs.getName()))) { - timePartDimensions.add(fs.getName()); - } - } - return timePartDimensions; - } - - public void updateFromString(CubeQueryContext query, Set<Dimension> queryDims, - Map<Dimension, CandidateDim> dimsToQuery) throws LensException { - fromString = "%s"; // to update the storage alias later - if (query.isAutoJoinResolved()) { - fromString = - query.getAutoJoinCtx().getFromString(fromString, this, queryDims, dimsToQuery, - query, this); - } - } -}
