Repository: incubator-apex-malhar Updated Branches: refs/heads/devel-3 a029c5f48 -> eaf74392c
http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/java/com/datatorrent/lib/appdata/schemas/DimensionalConfigurationSchemaTest.java ---------------------------------------------------------------------- diff --git a/library/src/test/java/com/datatorrent/lib/appdata/schemas/DimensionalConfigurationSchemaTest.java b/library/src/test/java/com/datatorrent/lib/appdata/schemas/DimensionalConfigurationSchemaTest.java new file mode 100644 index 0000000..26a06bd --- /dev/null +++ b/library/src/test/java/com/datatorrent/lib/appdata/schemas/DimensionalConfigurationSchemaTest.java @@ -0,0 +1,521 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.datatorrent.lib.appdata.schemas; + +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +import org.codehaus.jettison.json.JSONArray; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.datatorrent.lib.appdata.schemas.DimensionalConfigurationSchema.DimensionsCombination; +import com.datatorrent.lib.appdata.schemas.DimensionalConfigurationSchema.Key; +import com.datatorrent.lib.appdata.schemas.DimensionalConfigurationSchema.Value; +import com.datatorrent.lib.dimensions.DimensionsDescriptor; +import com.datatorrent.lib.dimensions.aggregator.AggregatorIncrementalType; +import com.datatorrent.lib.dimensions.aggregator.AggregatorRegistry; + +public class DimensionalConfigurationSchemaTest +{ + private static final Logger logger = LoggerFactory.getLogger(DimensionalConfigurationSchemaTest.class); + + @Before + public void initialize() + { + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY.setup(); + } + + @Test + public void noEnumTest() + { + //Test if loading of no enums works + DimensionalConfigurationSchema des = + new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaNoEnums.json"), + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + DimensionalSchema dimensionalSchema = new DimensionalSchema(des); + dimensionalSchema.getSchemaJSON(); + } + + @Test + public void simpleTest() + { + final String keyName1 = "keyName1"; + final String keyName2 = "keyName2"; + + final String keyName1Type = "string"; + final String keyName2Type = "string"; + + final String valueName1 = "valueName1"; + final String valueName2 = "valueName2"; + + final String valueName1Type = "double"; + final String valueName2Type = "integer"; + + final String jsonSchema = + "{\"keys\":\n" + + "[{\"name\":\"" + keyName1 + "\",\"type\":\"" + keyName1Type + "\"},\n" + + "{\"name\":\"" + keyName2 + "\",\"type\":\"" + keyName2Type + "\"}],\n" + + "\"values\":\n" + + "[{\"name\":\"" + valueName1 + "\",\"type\":\"" + valueName1Type + "\"},\n" + + "{\"name\":\"" + valueName2 + "\",\"type\":\"" + valueName2Type + "\"}],\n" + + "\"timeBuckets\":[all]," + + "\"dimensions\":\n" + + "[{\"combination\":[\"" + keyName1 + "\",\"" + keyName2 + "\"],\"additionalValues\":[\"" + valueName1 + ":MIN\"," + "\"" + valueName1 + ":MAX\"]},\n" + + "{\"combination\":[\"" + keyName1 + "\"],\"additionalValues\":[\"" + valueName2 + ":SUM\"," + "\"" + valueName2 + ":COUNT\"]}]\n" + + "}"; + + DimensionalConfigurationSchema des = new DimensionalConfigurationSchema(jsonSchema, + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + FieldsDescriptor allKeysDescriptor = des.getKeyDescriptor(); + + Assert.assertEquals("Incorrect number of keys.", 2, allKeysDescriptor.getFields().getFields().size()); + Assert.assertTrue("Doesn't contain required key.", allKeysDescriptor.getFields().getFields().contains(keyName1)); + Assert.assertTrue("Doesn't contain required key.", allKeysDescriptor.getFields().getFields().contains(keyName2)); + Assert.assertEquals("Key doesn't have correct type.", Type.STRING, allKeysDescriptor.getType(keyName1)); + Assert.assertEquals("Key doesn't have correct type.", Type.STRING, allKeysDescriptor.getType(keyName2)); + + Assert.assertTrue("First descriptor must contain this key", des.getDimensionsDescriptorIDToKeyDescriptor().get(0).getFields().getFields().contains(keyName1)); + Assert.assertTrue("First descriptor must contain this key", des.getDimensionsDescriptorIDToKeyDescriptor().get(0).getFields().getFields().contains(keyName2)); + + Assert.assertEquals("First descriptor must contain this key", Type.STRING, des.getDimensionsDescriptorIDToKeyDescriptor().get(0).getType(keyName1)); + Assert.assertEquals("First descriptor must contain this key", Type.STRING, des.getDimensionsDescriptorIDToKeyDescriptor().get(0).getType(keyName2)); + + Assert.assertTrue("First descriptor must contain this key", des.getDimensionsDescriptorIDToKeyDescriptor().get(1).getFields().getFields().contains(keyName1)); + Assert.assertFalse("First descriptor must contain this key", des.getDimensionsDescriptorIDToKeyDescriptor().get(1).getFields().getFields().contains(keyName2)); + + Assert.assertEquals("First descriptor must contain this key", Type.STRING, des.getDimensionsDescriptorIDToKeyDescriptor().get(1).getType(keyName1)); + + //Aggregate to dimensions descriptor + + Set<String> ddKeys1 = Sets.newHashSet(keyName1, keyName2); + Set<String> ddKeys2 = Sets.newHashSet(keyName1); + + Set<String> minAggFields = Sets.newHashSet(valueName1); + Set<String> maxAggFields = Sets.newHashSet(valueName1); + Set<String> sumAggFields = Sets.newHashSet(valueName2); + Set<String> countAggFields = Sets.newHashSet(valueName2); + + logger.debug("map: {}", des.getDimensionsDescriptorIDToAggregatorToAggregateDescriptor().get(0)); + + Assert.assertTrue("Incorrect aggregate fields.", + des.getDimensionsDescriptorIDToAggregatorToAggregateDescriptor().get(0).get("MIN").getFields().getFields().equals(minAggFields)); + Assert.assertTrue("Incorrect aggregate fields.", + des.getDimensionsDescriptorIDToAggregatorToAggregateDescriptor().get(0).get("MAX").getFields().getFields().equals(maxAggFields)); + Assert.assertTrue("Incorrect aggregate fields.", + des.getDimensionsDescriptorIDToAggregatorToAggregateDescriptor().get(1).get("SUM").getFields().getFields().equals(sumAggFields)); + Assert.assertTrue("Incorrect aggregate fields.", + des.getDimensionsDescriptorIDToAggregatorToAggregateDescriptor().get(1).get("COUNT").getFields().getFields().equals(countAggFields)); + + final Map<String, Integer> aggToId = Maps.newHashMap(); + aggToId.put("min", 0); + aggToId.put("max", 1); + aggToId.put("sum", 2); + aggToId.put("count", 3); + } + + @Test + public void countDescriptorTest() + { + final String keyName1 = "keyName1"; + final String keyName2 = "keyName2"; + + final String keyName1Type = "string"; + final String keyName2Type = "string"; + + final String valueName1 = "valueName1"; + final String valueName2 = "valueName2"; + + final String valueName1Type = "double"; + final String valueName2Type = "integer"; + + final String jsonSchema = + "{\"keys\":\n" + + "[{\"name\":\"" + keyName1 + "\",\"type\":\"" + keyName1Type + "\"},\n" + + "{\"name\":\"" + keyName2 + "\",\"type\":\"" + keyName2Type + "\"}],\n" + + "\"values\":\n" + + "[{\"name\":\"" + valueName1 + "\",\"type\":\"" + valueName1Type + "\"},\n" + + "{\"name\":\"" + valueName2 + "\",\"type\":\"" + valueName2Type + "\"}],\n" + + "\"timeBuckets\":[\"1m\"]," + + "\"dimensions\":\n" + + "[{\"combination\":[\"" + keyName1 + "\",\"" + keyName2 + "\"],\"additionalValues\":[\"" + valueName1 + ":COUNT\"" + "]},\n" + "]\n" + + "}"; + + DimensionalConfigurationSchema des = new DimensionalConfigurationSchema(jsonSchema, + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + FieldsDescriptor fd = des.getDimensionsDescriptorIDToAggregatorIDToOutputAggregatorDescriptor().get(0).get(AggregatorIncrementalType.NAME_TO_ORDINAL.get("COUNT")); + + Assert.assertEquals("Indexes for type compress fields should be 0", 0, (int) fd.getTypeToFieldToIndex().get(Type.LONG).get("valueName1")); + } + + @Test + public void otfAggregatorDefinitionTest() + { + final String keyName1 = "keyName1"; + final String keyName1Type = "string"; + + final String valueName1 = "valueName1"; + final String valueName1Type = "double"; + + final String jsonSchema = "{\"keys\":\n" + + "[{\"name\":\"" + keyName1 + "\",\"type\":\"" + keyName1Type + "\"}],\n" + + "\"values\":\n" + + "[{\"name\":\"" + valueName1 + "\",\"type\":\"" + valueName1Type + "\",\"aggregators\":[\"AVG\"]}],\n" + + "\"timeBuckets\":[\"1m\"]," + + "\"dimensions\":\n" + + "[{\"combination\":[\"" + keyName1 + "\"]}]}"; + + logger.debug("test schema:\n{}", jsonSchema); + + DimensionalConfigurationSchema des = new DimensionalConfigurationSchema(jsonSchema, + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + Assert.assertEquals(1, des.getDimensionsDescriptorIDToDimensionsDescriptor().size()); + + Map<String, Type> keyFieldToType = Maps.newHashMap(); + keyFieldToType.put(keyName1, Type.STRING); + FieldsDescriptor keyFD = new FieldsDescriptor(keyFieldToType); + + Map<String, Type> valueFieldToTypeSum = Maps.newHashMap(); + valueFieldToTypeSum.put(valueName1, Type.DOUBLE); + FieldsDescriptor valueFDSum = new FieldsDescriptor(valueFieldToTypeSum); + + Map<String, Type> valueFieldToTypeCount = Maps.newHashMap(); + valueFieldToTypeCount.put(valueName1, Type.DOUBLE); + FieldsDescriptor valueFDCount = new FieldsDescriptor(valueFieldToTypeCount); + + Assert.assertEquals(keyFD, des.getKeyDescriptor()); + Assert.assertEquals(valueFDSum, des.getDimensionsDescriptorIDToAggregatorToAggregateDescriptor().get(0).get("SUM")); + Assert.assertEquals(valueFDCount, des.getDimensionsDescriptorIDToAggregatorToAggregateDescriptor().get(0).get("COUNT")); + } + + @Test + public void getAllKeysDescriptorTest() + { + DimensionalConfigurationSchema des = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchema.json"), + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + Set<String> keys = Sets.newHashSet("publisher", "advertiser", "location"); + + Assert.assertEquals(keys, des.getKeyDescriptor().getFields().getFields()); + } + + @Test + public void aggregationSchemaTest() + { + DimensionalConfigurationSchema des = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaAggregations.json"), + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + Set<String> keys = Sets.newHashSet(); + + Assert.assertEquals(keys, des.getKeyDescriptor().getFields().getFields()); + + Assert.assertEquals(3, des.getDimensionsDescriptorIDToAggregatorIDToInputAggregatorDescriptor().size()); + Assert.assertEquals(3, des.getDimensionsDescriptorIDToAggregatorIDToOutputAggregatorDescriptor().size()); + Assert.assertEquals(3, des.getDimensionsDescriptorIDToAggregatorIDs().size()); + Assert.assertEquals(3, des.getDimensionsDescriptorIDToAggregatorIDToInputAggregatorDescriptor().size()); + Assert.assertEquals(3, des.getDimensionsDescriptorIDToDimensionsDescriptor().size()); + Assert.assertEquals(3, des.getDimensionsDescriptorIDToKeyDescriptor().size()); + Assert.assertEquals(3, des.getDimensionsDescriptorIDToAggregatorIDToInputAggregatorDescriptor().size()); + Assert.assertEquals(3, des.getDimensionsDescriptorIDToValueToAggregator().size()); + Assert.assertEquals(3, des.getDimensionsDescriptorIDToValueToOTFAggregator().size()); + Assert.assertEquals(1, des.getDimensionsDescriptorIDToFieldToAggregatorAdditionalValues().size()); + Assert.assertEquals(1, des.getDimensionsDescriptorIDToKeys().size()); + } + + @Test + public void simpleOTFTest() + { + DimensionalConfigurationSchema des = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaOTF.json"), + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + Assert.assertEquals(4, des.getDimensionsDescriptorIDToAggregatorIDs().get(0).size()); + } + + @Test + public void testConstructorAgreement() + { + DimensionalConfigurationSchema expectedEventSchema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaAdditional.json"), + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + @SuppressWarnings("unchecked") + List<Object> publisherEnumVals = (List<Object>) ((List) Lists.newArrayList("twitter","facebook","yahoo","google","bing","amazon")); + @SuppressWarnings("unchecked") + List<Object> advertiserEnumVals = (List<Object>) ((List) Lists.newArrayList("starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson")); + @SuppressWarnings("unchecked") + List<Object> locationEnumVals = (List<Object>) ((List) Lists.newArrayList("N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID")); + + List<Key> keys = Lists.newArrayList(new Key("publisher", Type.STRING, publisherEnumVals), + new Key("advertiser", Type.STRING, advertiserEnumVals), + new Key("location", Type.STRING, locationEnumVals)); + List<TimeBucket> timeBuckets = Lists.newArrayList(TimeBucket.MINUTE, TimeBucket.HOUR, TimeBucket.DAY); + List<Value> values = Lists.newArrayList(new Value("impressions", + Type.LONG, + Sets.newHashSet("SUM", "COUNT")), + new Value("clicks", + Type.LONG, + Sets.newHashSet("SUM", "COUNT")), + new Value("cost", + Type.DOUBLE, + Sets.newHashSet("SUM", "COUNT")), + new Value("revenue", + Type.DOUBLE, + Sets.newHashSet("SUM", "COUNT"))); + + Map<String, Set<String>> valueToAggregators = Maps.newHashMap(); + valueToAggregators.put("impressions", Sets.newHashSet("MIN", "MAX")); + valueToAggregators.put("clicks", Sets.newHashSet("MIN", "MAX")); + valueToAggregators.put("cost", Sets.newHashSet("MIN", "MAX")); + valueToAggregators.put("revenue", Sets.newHashSet("MIN", "MAX")); + + Set<String> emptySet = Sets.newHashSet(); + Map<String, Set<String>> emptyMap = Maps.newHashMap(); + + List<DimensionsCombination> dimensionsCombinations = + Lists.newArrayList(new DimensionsCombination(new Fields(emptySet), + emptyMap), + new DimensionsCombination(new Fields(Sets.newHashSet("location")), + emptyMap), + new DimensionsCombination(new Fields(Sets.newHashSet("advertiser")), + valueToAggregators), + new DimensionsCombination(new Fields(Sets.newHashSet("publisher")), + valueToAggregators), + new DimensionsCombination(new Fields(Sets.newHashSet("advertiser", "location")), + emptyMap), + new DimensionsCombination(new Fields(Sets.newHashSet("publisher", "location")), + emptyMap), + new DimensionsCombination(new Fields(Sets.newHashSet("publisher", "advertiser")), + emptyMap), + new DimensionsCombination(new Fields(Sets.newHashSet("publisher", "advertiser", "location")), + emptyMap)); + + DimensionalConfigurationSchema eventSchema = new DimensionalConfigurationSchema(keys, + values, + timeBuckets, + dimensionsCombinations, + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + logger.debug("expected {}", expectedEventSchema.getDimensionsDescriptorIDToValueToOTFAggregator()); + logger.debug("actual {}", eventSchema.getDimensionsDescriptorIDToValueToOTFAggregator()); + + Assert.assertEquals(expectedEventSchema, eventSchema); + } + + @Test + public void testOTFAggregatorMap() + { + DimensionalConfigurationSchema schema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaOTF.json"), + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + Set<String> otfAggregator = Sets.newHashSet("AVG"); + Set<String> valueSet = Sets.newHashSet("impressions", "clicks", "cost", "revenue"); + + List<Map<String, FieldsDescriptor>> aggregatorToDescriptor = schema.getDimensionsDescriptorIDToOTFAggregatorToAggregateDescriptor(); + List<Map<String, Set<String>>> valueToAggregator = schema.getDimensionsDescriptorIDToValueToOTFAggregator(); + + for(int ddId = 0; + ddId < aggregatorToDescriptor.size(); + ddId++) { + Assert.assertEquals(otfAggregator, aggregatorToDescriptor.get(ddId).keySet()); + Assert.assertNotNull(aggregatorToDescriptor.get(ddId).get("AVG")); + + Assert.assertEquals(valueSet, valueToAggregator.get(ddId).keySet()); + Map<String, Set<String>> tempValueToAgg = valueToAggregator.get(ddId); + + for(Map.Entry<String, Set<String>> entry: tempValueToAgg.entrySet()) { + Assert.assertEquals(otfAggregator, entry.getValue()); + } + } + } + + @Test + public void testCustomTimeBuckets() + { + DimensionalConfigurationSchema schema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaCustomTimeBuckets.json"), AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + Assert.assertEquals(3, schema.getTimeBuckets().size()); + + Assert.assertEquals(5, schema.getCustomTimeBuckets().size()); + List<CustomTimeBucket> customTimeBuckets = Lists.newArrayList(new CustomTimeBucket(TimeBucket.MINUTE), + new CustomTimeBucket(TimeBucket.HOUR), + new CustomTimeBucket(TimeBucket.DAY), + new CustomTimeBucket(TimeBucket.MINUTE, 5), + new CustomTimeBucket(TimeBucket.HOUR, 3)); + Assert.assertEquals(customTimeBuckets, + schema.getCustomTimeBuckets()); + + Assert.assertEquals(40, schema.getDimensionsDescriptorIDToKeyDescriptor().size()); + + JSONArray timeBucketsArray = new JSONArray(); + timeBucketsArray.put("1m").put("1h").put("1d").put("5m").put("3h"); + Assert.assertEquals(timeBucketsArray.toString(), schema.getBucketsString()); + + CustomTimeBucket customTimeBucket = schema.getCustomTimeBucketRegistry().getTimeBucket(TimeBucket.MINUTE.ordinal()); + Assert.assertTrue(customTimeBucket.isUnit()); + Assert.assertEquals(TimeBucket.MINUTE, customTimeBucket.getTimeBucket()); + + customTimeBucket = schema.getCustomTimeBucketRegistry().getTimeBucket(TimeBucket.HOUR.ordinal()); + Assert.assertTrue(customTimeBucket.isUnit()); + Assert.assertEquals(TimeBucket.HOUR, customTimeBucket.getTimeBucket()); + + customTimeBucket = schema.getCustomTimeBucketRegistry().getTimeBucket(TimeBucket.DAY.ordinal()); + Assert.assertTrue(customTimeBucket.isUnit()); + Assert.assertEquals(TimeBucket.DAY, customTimeBucket.getTimeBucket()); + + int id5m = schema.getCustomTimeBucketRegistry().getTimeBucketId(new CustomTimeBucket(TimeBucket.MINUTE, 5)); + int id3h = schema.getCustomTimeBucketRegistry().getTimeBucketId(new CustomTimeBucket(TimeBucket.HOUR, 3)); + + Assert.assertEquals(256, id5m); + Assert.assertEquals(257, id3h); + + for (int ddID = 0; ddID < schema.getDimensionsDescriptorIDToDimensionsDescriptor().size(); ddID++) { + DimensionsDescriptor dd = schema.getDimensionsDescriptorIDToDimensionsDescriptor().get(ddID); + + Assert.assertEquals(customTimeBuckets.get(ddID % 5), dd.getCustomTimeBucket()); + } + } + + @Test + public void testAllCombinationsGeneration() + { + DimensionalConfigurationSchema schema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSimpleAllCombinations.json"), AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + Set<DimensionsDescriptor> dimensionsDescriptors = Sets.newHashSet(); + dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(new HashSet<String>()))); + dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher")))); + dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("advertiser")))); + dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("location")))); + dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher", "advertiser")))); + dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher", "location")))); + dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("advertiser", "location")))); + dimensionsDescriptors.add(new DimensionsDescriptor(TimeBucket.MINUTE, new Fields(Sets.newHashSet("publisher", "advertiser", "location")))); + + Set<DimensionsDescriptor> actualDimensionsDescriptors = Sets.newHashSet(); + + for (DimensionsDescriptor dimensionsDescriptor : schema.getDimensionsDescriptorToID().keySet()) { + actualDimensionsDescriptors.add(dimensionsDescriptor); + } + + List<DimensionsDescriptor> ddList = Lists.newArrayList(dimensionsDescriptors); + List<DimensionsDescriptor> ddActualList = Lists.newArrayList(actualDimensionsDescriptors); + + Collections.sort(ddList); + Collections.sort(ddActualList); + + Assert.assertEquals(dimensionsDescriptors, actualDimensionsDescriptors); + } + + public void testLoadingSchemaWithNoTimeBucket() + { + DimensionalConfigurationSchema schema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaNoTime.json"), + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + Assert.assertEquals(1, schema.getTimeBuckets().size()); + Assert.assertEquals(TimeBucket.ALL, schema.getTimeBuckets().get(0)); + } + + /** + * A schema illustrating this corner case is as follows: + * <br/> + * <pre> + * {@code + * {"keys":[{"name":"publisher","type":"string"}, + * {"name":"advertiser","type":"string"}, + * {"name":"location","type":"string"}], + * "timeBuckets":["1m"], + * "values": + * [{"name":"impressions","type":"long","aggregators":["SUM"]}, + * {"name":"clicks","type":"long","aggregators":["SUM"]}, + * {"name":"cost","type":"double","aggregators":["SUM"]}, + * {"name":"revenue","type":"double"}], + * "dimensions": + * [{"combination":[]}, + * {"combination":["location"],"additionalValues":["revenue:SUM"]}] + * } + * } + * </pre> + */ + @Test + public void testAdditionalValuesOneCornerCase() + { + DimensionalConfigurationSchema schema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaAdditionalOne.json"), AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + int sumID = AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY.getIncrementalAggregatorNameToID().get("SUM"); + + Assert.assertEquals(3, schema.getDimensionsDescriptorIDToAggregatorIDToOutputAggregatorDescriptor().get(0).get(sumID).getFieldList().size()); + Assert.assertEquals(4, schema.getDimensionsDescriptorIDToAggregatorIDToOutputAggregatorDescriptor().get(1).get(sumID).getFieldList().size()); + } + + /** + * Tests to make sure that the correct number of dimension descriptors are produced when different time buckets are + * specified for each dimension combination. + */ + @Test + public void testTimeBucketsDimensionCombination() + { + final int numDDIds = 11; + final int numCombinations = 3; + + DimensionalConfigurationSchema schema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaDimensionTimeBuckets.json"), AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + List<DimensionsDescriptor> dimensionsDescriptors = schema.getDimensionsDescriptorIDToDimensionsDescriptor(); + + Assert.assertEquals(numDDIds, dimensionsDescriptors.size()); + Assert.assertEquals(numDDIds, schema.getDimensionsDescriptorIDToAggregatorIDToInputAggregatorDescriptor().size()); + Assert.assertEquals(numDDIds, schema.getDimensionsDescriptorIDToAggregatorIDToOutputAggregatorDescriptor().size()); + Assert.assertEquals(numDDIds, schema.getDimensionsDescriptorIDToAggregatorIDs().size()); + Assert.assertEquals(numDDIds, schema.getDimensionsDescriptorIDToAggregatorToAggregateDescriptor().size()); + Assert.assertEquals(numDDIds, schema.getDimensionsDescriptorIDToDimensionsDescriptor().size()); + Assert.assertEquals(numDDIds, schema.getDimensionsDescriptorIDToKeyDescriptor().size()); + Assert.assertEquals(numDDIds, schema.getDimensionsDescriptorIDToOTFAggregatorToAggregateDescriptor().size()); + Assert.assertEquals(numDDIds, schema.getDimensionsDescriptorIDToValueToAggregator().size()); + Assert.assertEquals(numDDIds, schema.getDimensionsDescriptorIDToValueToOTFAggregator().size()); + + Assert.assertEquals(numCombinations, schema.getDimensionsDescriptorIDToFieldToAggregatorAdditionalValues().size()); + Assert.assertEquals(numCombinations, schema.getDimensionsDescriptorIDToKeys().size()); + + String[] buckets = new String[]{"1m", "3d", "1h", "5s", "1m", "3d", "1m", "3d", "30s", "2h", "1d"}; + + for (int ddId = 0; ddId < numDDIds; ddId++) { + CustomTimeBucket customTimeBucket = dimensionsDescriptors.get(ddId).getCustomTimeBucket(); + Assert.assertEquals(new CustomTimeBucket(buckets[ddId]), customTimeBucket); + } + } + + @Test + public void testTimeBucketsBackwardCompatibility() + { + DimensionalConfigurationSchema schema = new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaDimensionTimeBuckets.json"), AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + + Assert.assertEquals(2, schema.getCustomTimeBuckets().size()); + } + + private static final Logger LOG = LoggerFactory.getLogger(DimensionalConfigurationSchemaTest.class); +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/java/com/datatorrent/lib/appdata/schemas/DimensionalSchemaTest.java ---------------------------------------------------------------------- diff --git a/library/src/test/java/com/datatorrent/lib/appdata/schemas/DimensionalSchemaTest.java b/library/src/test/java/com/datatorrent/lib/appdata/schemas/DimensionalSchemaTest.java new file mode 100644 index 0000000..a98d346 --- /dev/null +++ b/library/src/test/java/com/datatorrent/lib/appdata/schemas/DimensionalSchemaTest.java @@ -0,0 +1,502 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.datatorrent.lib.appdata.schemas; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +import org.codehaus.jettison.json.JSONArray; +import org.codehaus.jettison.json.JSONObject; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.datatorrent.lib.appdata.query.serde.MessageSerializerFactory; +import com.datatorrent.lib.dimensions.aggregator.AggregatorRegistry; + +public class DimensionalSchemaTest +{ + private static final String FIELD_TAGS = "tags"; + + public DimensionalSchemaTest() + { + } + + @Before + public void initialize() + { + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY.setup(); + } + + @Test + public void noEnumsTest() + { + //Test if creating schema with no enums works + DimensionalConfigurationSchema des = + new DimensionalConfigurationSchema(SchemaUtils.jarResourceFileToString("adsGenericEventSchemaNoEnums.json"), + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY); + } + + @Test + public void noTimeTest() throws Exception + { + String resultSchema = produceSchema("adsGenericEventSchemaNoTime.json"); + + Map<String, String> valueToType = Maps.newHashMap(); + valueToType.put("impressions:SUM", "long"); + valueToType.put("clicks:SUM", "long"); + valueToType.put("cost:SUM", "double"); + valueToType.put("revenue:SUM", "double"); + + @SuppressWarnings("unchecked") + List<Set<String>> dimensionCombinationsList = Lists.newArrayList((Set<String>)new HashSet<String>(), + Sets.newHashSet("location"), + Sets.newHashSet("advertiser"), + Sets.newHashSet("publisher"), + Sets.newHashSet("location", "advertiser"), + Sets.newHashSet("location", "publisher"), + Sets.newHashSet("advertiser", "publisher"), + Sets.newHashSet("location", "advertiser", "publisher")); + + basicSchemaChecker(resultSchema, + Lists.newArrayList(TimeBucket.ALL.getText()), + Lists.newArrayList("publisher", "advertiser", "location"), + Lists.newArrayList("string", "string", "string"), + valueToType, + dimensionCombinationsList); + } + + @Test + public void globalValueTest() throws Exception + { + String resultSchema = produceSchema("adsGenericEventSchema.json"); + + List<String> timeBuckets = Lists.newArrayList("1m", "1h", "1d"); + List<String> keyNames = Lists.newArrayList("publisher", "advertiser", "location"); + List<String> keyTypes = Lists.newArrayList("string", "string", "string"); + + Map<String, String> valueToType = Maps.newHashMap(); + valueToType.put("impressions:SUM", "long"); + valueToType.put("clicks:SUM", "long"); + valueToType.put("cost:SUM", "double"); + valueToType.put("revenue:SUM", "double"); + + @SuppressWarnings("unchecked") + List<Set<String>> dimensionCombinationsList = Lists.newArrayList((Set<String>) new HashSet<String>(), + Sets.newHashSet("location"), + Sets.newHashSet("advertiser"), + Sets.newHashSet("publisher"), + Sets.newHashSet("location", "advertiser"), + Sets.newHashSet("location", "publisher"), + Sets.newHashSet("advertiser", "publisher"), + Sets.newHashSet("location", "advertiser", "publisher")); + + basicSchemaChecker(resultSchema, + timeBuckets, + keyNames, + keyTypes, + valueToType, + dimensionCombinationsList); + } + + @Test + public void additionalValueTest() throws Exception + { + String resultSchema = produceSchema("adsGenericEventSchemaAdditional.json"); + + List<String> timeBuckets = Lists.newArrayList("1m", "1h", "1d"); + List<String> keyNames = Lists.newArrayList("publisher", "advertiser", "location"); + List<String> keyTypes = Lists.newArrayList("string", "string", "string"); + + Map<String, String> valueToType = Maps.newHashMap(); + valueToType.put("impressions:SUM", "long"); + valueToType.put("impressions:COUNT", "long"); + valueToType.put("clicks:SUM", "long"); + valueToType.put("clicks:COUNT", "long"); + valueToType.put("cost:SUM", "double"); + valueToType.put("cost:COUNT", "long"); + valueToType.put("revenue:SUM", "double"); + valueToType.put("revenue:COUNT", "long"); + + @SuppressWarnings("unchecked") + List<Set<String>> dimensionCombinationsList = Lists.newArrayList((Set<String>) new HashSet<String>(), + Sets.newHashSet("location"), + Sets.newHashSet("advertiser"), + Sets.newHashSet("publisher"), + Sets.newHashSet("location", "advertiser"), + Sets.newHashSet("location", "publisher"), + Sets.newHashSet("advertiser", "publisher"), + Sets.newHashSet("location", "advertiser", "publisher")); + + basicSchemaChecker(resultSchema, + timeBuckets, + keyNames, + keyTypes, + valueToType, + dimensionCombinationsList); + + Map<String, String> additionalValueMap = Maps.newHashMap(); + additionalValueMap.put("impressions:MAX", "long"); + additionalValueMap.put("clicks:MAX", "long"); + additionalValueMap.put("cost:MAX", "double"); + additionalValueMap.put("revenue:MAX", "double"); + additionalValueMap.put("impressions:MIN", "long"); + additionalValueMap.put("clicks:MIN", "long"); + additionalValueMap.put("cost:MIN", "double"); + additionalValueMap.put("revenue:MIN", "double"); + + @SuppressWarnings("unchecked") + List<Map<String, String>> additionalValuesList = Lists.newArrayList((Map<String, String>) new HashMap<String, String>(), + (Map<String, String>) new HashMap<String, String>(), + additionalValueMap, + additionalValueMap, + (Map<String, String>) new HashMap<String, String>(), + (Map<String, String>) new HashMap<String, String>(), + (Map<String, String>) new HashMap<String, String>(), + (Map<String, String>) new HashMap<String, String>()); + + JSONObject data = new JSONObject(resultSchema).getJSONArray("data").getJSONObject(0); + JSONArray dimensions = data.getJSONArray("dimensions"); + + for(int index = 0; + index < dimensions.length(); + index++) { + JSONObject combination = dimensions.getJSONObject(index); + + Map<String, String> tempAdditionalValueMap = additionalValuesList.get(index); + Assert.assertEquals(tempAdditionalValueMap.isEmpty(), !combination.has("additionalValues")); + + Set<String> additionalValueSet = Sets.newHashSet(); + + if(tempAdditionalValueMap.isEmpty()) { + continue; + } + + JSONArray additionalValues = combination.getJSONArray("additionalValues"); + + LOG.debug("additionalValues {}", additionalValues); + + for(int aIndex = 0; + aIndex < additionalValues.length(); + aIndex++) { + JSONObject additionalValue = additionalValues.getJSONObject(aIndex); + + String valueName = additionalValue.getString("name"); + String valueType = additionalValue.getString("type"); + + String expectedValueType = tempAdditionalValueMap.get(valueName); + + Assert.assertTrue("Duplicate value " + valueName, additionalValueSet.add(valueName)); + Assert.assertTrue("Invalid value " + valueName, expectedValueType != null); + Assert.assertEquals(expectedValueType, valueType); + } + } + } + + @Test + public void enumValUpdateTest() throws Exception + { + String eventSchemaJSON = SchemaUtils.jarResourceFileToString("adsGenericEventSchema.json"); + DimensionalSchema dimensional = new DimensionalSchema( + new DimensionalConfigurationSchema(eventSchemaJSON, + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY)); + + Map<String, List<Object>> replacementEnums = Maps.newHashMap(); + @SuppressWarnings("unchecked") + List<Object> publisherEnumList = ((List<Object>) ((List) Lists.newArrayList("google", "twitter"))); + @SuppressWarnings("unchecked") + List<Object> advertiserEnumList = ((List<Object>) ((List) Lists.newArrayList("google", "twitter"))); + @SuppressWarnings("unchecked") + List<Object> locationEnumList = ((List<Object>) ((List) Lists.newArrayList("google", "twitter"))); + + replacementEnums.put("publisher", publisherEnumList); + replacementEnums.put("advertiser", advertiserEnumList); + replacementEnums.put("location", locationEnumList); + + dimensional.setEnumsList(replacementEnums); + + String schemaJSON = dimensional.getSchemaJSON(); + + JSONObject schema = new JSONObject(schemaJSON); + JSONArray keys = schema.getJSONArray(DimensionalConfigurationSchema.FIELD_KEYS); + + Map<String, List<Object>> newEnums = Maps.newHashMap(); + + for(int keyIndex = 0; + keyIndex < keys.length(); + keyIndex++) { + JSONObject keyData = keys.getJSONObject(keyIndex); + String name = keyData.getString(DimensionalConfigurationSchema.FIELD_KEYS_NAME); + JSONArray enumValues = keyData.getJSONArray(DimensionalConfigurationSchema.FIELD_KEYS_ENUMVALUES); + List<Object> enumList = Lists.newArrayList(); + + for(int enumIndex = 0; + enumIndex < enumValues.length(); + enumIndex++) { + enumList.add(enumValues.get(enumIndex)); + } + + newEnums.put(name, enumList); + } + + Assert.assertEquals(replacementEnums, newEnums); + } + + @Test + @SuppressWarnings("rawtypes") + public void enumValUpdateTestComparable() throws Exception + { + String eventSchemaJSON = SchemaUtils.jarResourceFileToString("adsGenericEventSchema.json"); + DimensionalSchema dimensional = new DimensionalSchema( + new DimensionalConfigurationSchema(eventSchemaJSON, + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY)); + + Map<String, Set<Comparable>> replacementEnums = Maps.newHashMap(); + @SuppressWarnings("unchecked") + Set<Comparable> publisherEnumList = ((Set<Comparable>) ((Set) Sets.newHashSet("b", "c", "a"))); + @SuppressWarnings("unchecked") + Set<Comparable> advertiserEnumList = ((Set<Comparable>) ((Set) Sets.newHashSet("b", "c", "a"))); + @SuppressWarnings("unchecked") + Set<Comparable> locationEnumList = ((Set<Comparable>) ((Set) Sets.newHashSet("b", "c", "a"))); + + replacementEnums.put("publisher", publisherEnumList); + replacementEnums.put("advertiser", advertiserEnumList); + replacementEnums.put("location", locationEnumList); + + Map<String, List<Comparable>> expectedOutput = Maps.newHashMap(); + @SuppressWarnings("unchecked") + List<Comparable> publisherEnumSortedList = (List<Comparable>) ((List) Lists.newArrayList("a", "b", "c")); + @SuppressWarnings("unchecked") + List<Comparable> advertiserEnumSortedList = (List<Comparable>) ((List) Lists.newArrayList("a", "b", "c")); + @SuppressWarnings("unchecked") + List<Comparable> locationEnumSortedList = (List<Comparable>) ((List) Lists.newArrayList("a", "b", "c")); + + expectedOutput.put("publisher", publisherEnumSortedList); + expectedOutput.put("advertiser", advertiserEnumSortedList); + expectedOutput.put("location", locationEnumSortedList); + + dimensional.setEnumsSetComparable(replacementEnums); + + String schemaJSON = dimensional.getSchemaJSON(); + + JSONObject schema = new JSONObject(schemaJSON); + JSONArray keys = schema.getJSONArray(DimensionalConfigurationSchema.FIELD_KEYS); + + Map<String, List<Comparable>> newEnums = Maps.newHashMap(); + + for(int keyIndex = 0; + keyIndex < keys.length(); + keyIndex++) { + JSONObject keyData = keys.getJSONObject(keyIndex); + String name = keyData.getString(DimensionalConfigurationSchema.FIELD_KEYS_NAME); + JSONArray enumValues = keyData.getJSONArray(DimensionalConfigurationSchema.FIELD_KEYS_ENUMVALUES); + List<Comparable> enumList = Lists.newArrayList(); + + for(int enumIndex = 0; + enumIndex < enumValues.length(); + enumIndex++) { + enumList.add((Comparable) enumValues.get(enumIndex)); + } + + newEnums.put(name, enumList); + } + + Assert.assertEquals(expectedOutput, newEnums); + } + + @Test + public void testSchemaTags() throws Exception + { + List<String> expectedTags = Lists.newArrayList("geo", "bullet"); + List<String> expectedKeyTags = Lists.newArrayList("geo.location"); + List<String> expectedValueTagsLat = Lists.newArrayList("geo.lattitude"); + List<String> expectedValueTagsLong = Lists.newArrayList("geo.longitude"); + + String eventSchemaJSON = SchemaUtils.jarResourceFileToString("adsGenericEventSchemaTags.json"); + DimensionalSchema dimensional = new DimensionalSchema( + new DimensionalConfigurationSchema(eventSchemaJSON, + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY)); + + String schemaJSON = dimensional.getSchemaJSON(); + + JSONObject jo = new JSONObject(schemaJSON); + List<String> tags = getStringList(jo.getJSONArray(FIELD_TAGS)); + Assert.assertEquals(expectedTags, tags); + + JSONArray keys = jo.getJSONArray(DimensionalConfigurationSchema.FIELD_KEYS); + + List<String> keyTags = null; + + for (int keyIndex = 0; keyIndex < keys.length(); keyIndex++) { + JSONObject key = keys.getJSONObject(keyIndex); + + if (!key.has(FIELD_TAGS)) { + continue; + } + + Assert.assertEquals("location", key.get(DimensionalConfigurationSchema.FIELD_KEYS_NAME)); + keyTags = getStringList(key.getJSONArray(FIELD_TAGS)); + } + + Assert.assertTrue("No tags found for any key", keyTags != null); + Assert.assertEquals(expectedKeyTags, keyTags); + + JSONArray values = jo.getJSONArray(DimensionalConfigurationSchema.FIELD_VALUES); + + boolean valueTagsLat = false; + boolean valueTagsLong = false; + + for (int valueIndex = 0; valueIndex < values.length(); valueIndex++) { + JSONObject value = values.getJSONObject(valueIndex); + + if (!value.has(FIELD_TAGS)) { + continue; + } + + String valueName = value.getString(DimensionalConfigurationSchema.FIELD_VALUES_NAME); + List<String> valueTags = getStringList(value.getJSONArray(FIELD_TAGS)); + + LOG.debug("value name: {}", valueName); + + if (valueName.startsWith("impressions")) { + Assert.assertEquals(expectedValueTagsLat, valueTags); + valueTagsLat = true; + } else if (valueName.startsWith("clicks")) { + Assert.assertEquals(expectedValueTagsLong, valueTags); + valueTagsLong = true; + } else { + Assert.fail("There should be no tags for " + valueName); + } + } + + Assert.assertTrue("No tags found for impressions", valueTagsLat); + Assert.assertTrue("No tags found for clicks", valueTagsLong); + } + + private String produceSchema(String resourceName) throws Exception + { + String eventSchemaJSON = SchemaUtils.jarResourceFileToString(resourceName); + + MessageSerializerFactory dsf = new MessageSerializerFactory(new ResultFormatter()); + DimensionalSchema schemaDimensional = new DimensionalSchema(new DimensionalConfigurationSchema(eventSchemaJSON, + AggregatorRegistry.DEFAULT_AGGREGATOR_REGISTRY)); + + SchemaQuery schemaQuery = new SchemaQuery("1"); + + SchemaResult result = new SchemaResult(schemaQuery, schemaDimensional); + return dsf.serialize(result); + } + + private List<String> getStringList(JSONArray ja) throws Exception + { + List<String> stringsArray = Lists.newArrayList(); + + for (int index = 0; index < ja.length(); index++) { + stringsArray.add(ja.getString(index)); + } + + return stringsArray; + } + + private void basicSchemaChecker(String resultSchema, + List<String> timeBuckets, + List<String> keyNames, + List<String> keyTypes, + Map<String, String> valueToType, + List<Set<String>> dimensionCombinationsList) throws Exception + { + LOG.debug("Schema to check {}", resultSchema); + JSONObject schemaJO = new JSONObject(resultSchema); + JSONObject data = schemaJO.getJSONArray("data").getJSONObject(0); + + JSONArray jaBuckets = SchemaUtils.findFirstKeyJSONArray(schemaJO, "buckets"); + + Assert.assertEquals(timeBuckets.size(), jaBuckets.length()); + + for(int index = 0; + index < jaBuckets.length(); + index++) { + Assert.assertEquals(timeBuckets.get(index), jaBuckets.get(index)); + } + + JSONArray keys = data.getJSONArray("keys"); + + for(int index = 0; + index < keys.length(); + index++) { + JSONObject keyJO = keys.getJSONObject(index); + + Assert.assertEquals(keyNames.get(index), keyJO.get("name")); + Assert.assertEquals(keyTypes.get(index), keyJO.get("type")); + Assert.assertTrue(keyJO.has("enumValues")); + } + + JSONArray valuesArray = data.getJSONArray("values"); + + Assert.assertEquals("Incorrect number of values.", valueToType.size(), valuesArray.length()); + + Set<String> valueNames = Sets.newHashSet(); + + for(int index = 0; + index < valuesArray.length(); + index++) { + JSONObject valueJO = valuesArray.getJSONObject(index); + + String valueName = valueJO.getString("name"); + String typeName = valueJO.getString("type"); + + String expectedType = valueToType.get(valueName); + + Assert.assertTrue("Duplicate value name " + valueName, valueNames.add(valueName)); + Assert.assertTrue("Invalid value name " + valueName, expectedType != null); + + Assert.assertEquals(expectedType, typeName); + } + + JSONArray dimensions = data.getJSONArray("dimensions"); + + for(int index = 0; + index < dimensions.length(); + index++) { + JSONObject combination = dimensions.getJSONObject(index); + JSONArray dimensionsCombinationArray = combination.getJSONArray("combination"); + + Set<String> dimensionCombination = Sets.newHashSet(); + + for(int dimensionIndex = 0; + dimensionIndex < dimensionsCombinationArray.length(); + dimensionIndex++) { + dimensionCombination.add(dimensionsCombinationArray.getString(dimensionIndex)); + } + + Assert.assertEquals(dimensionCombinationsList.get(index), dimensionCombination); + } + } + + private static final Logger LOG = LoggerFactory.getLogger(DimensionalSchemaTest.class); +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/resources/adsGenericEventSchema.json ---------------------------------------------------------------------- diff --git a/library/src/test/resources/adsGenericEventSchema.json b/library/src/test/resources/adsGenericEventSchema.json new file mode 100644 index 0000000..49d65a9 --- /dev/null +++ b/library/src/test/resources/adsGenericEventSchema.json @@ -0,0 +1,19 @@ +{"keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, + {"name":"advertiser","type":"string","enumValues":["starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"]}, + {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"]}], + "timeBuckets":["1m","1h","1d"], + "values": + [{"name":"impressions","type":"long","aggregators":["SUM"]}, + {"name":"clicks","type":"long","aggregators":["SUM"]}, + {"name":"cost","type":"double","aggregators":["SUM"]}, + {"name":"revenue","type":"double","aggregators":["SUM"]}], + "dimensions": + [{"combination":[]}, + {"combination":["location"]}, + {"combination":["advertiser"]}, + {"combination":["publisher"]}, + {"combination":["advertiser","location"]}, + {"combination":["publisher","location"]}, + {"combination":["publisher","advertiser"]}, + {"combination":["publisher","advertiser","location"]}] +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/resources/adsGenericEventSchemaAdditional.json ---------------------------------------------------------------------- diff --git a/library/src/test/resources/adsGenericEventSchemaAdditional.json b/library/src/test/resources/adsGenericEventSchemaAdditional.json new file mode 100644 index 0000000..eb8cf0b --- /dev/null +++ b/library/src/test/resources/adsGenericEventSchemaAdditional.json @@ -0,0 +1,19 @@ +{"keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, + {"name":"advertiser","type":"string","enumValues":["starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"]}, + {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"]}], + "timeBuckets":["1m","1h","1d"], + "values": + [{"name":"impressions","type":"long","aggregators":["SUM","COUNT"]}, + {"name":"clicks","type":"long","aggregators":["SUM","COUNT"]}, + {"name":"cost","type":"double","aggregators":["SUM","COUNT"]}, + {"name":"revenue","type":"double","aggregators":["SUM","COUNT"]}], + "dimensions": + [{"combination":[]}, + {"combination":["location"]}, + {"combination":["advertiser"], "additionalValues":["impressions:MIN", "clicks:MIN", "cost:MIN", "revenue:MIN", "impressions:MAX", "clicks:MAX", "cost:MAX", "revenue:MAX"]}, + {"combination":["publisher"], "additionalValues":["impressions:MIN", "clicks:MIN", "cost:MIN", "revenue:MIN", "impressions:MAX", "clicks:MAX", "cost:MAX", "revenue:MAX"]}, + {"combination":["advertiser","location"]}, + {"combination":["publisher","location"]}, + {"combination":["publisher","advertiser"]}, + {"combination":["publisher","advertiser","location"]}] +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/resources/adsGenericEventSchemaAdditionalOne.json ---------------------------------------------------------------------- diff --git a/library/src/test/resources/adsGenericEventSchemaAdditionalOne.json b/library/src/test/resources/adsGenericEventSchemaAdditionalOne.json new file mode 100644 index 0000000..b75ec8a --- /dev/null +++ b/library/src/test/resources/adsGenericEventSchemaAdditionalOne.json @@ -0,0 +1,13 @@ +{"keys":[{"name":"publisher","type":"string"}, + {"name":"advertiser","type":"string"}, + {"name":"location","type":"string"}], + "timeBuckets":["1m"], + "values": + [{"name":"impressions","type":"long","aggregators":["SUM"]}, + {"name":"clicks","type":"long","aggregators":["SUM"]}, + {"name":"cost","type":"double","aggregators":["SUM"]}, + {"name":"revenue","type":"double"}], + "dimensions": + [{"combination":[]}, + {"combination":["location"],"additionalValues":["revenue:SUM"]}] +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/resources/adsGenericEventSchemaAggregations.json ---------------------------------------------------------------------- diff --git a/library/src/test/resources/adsGenericEventSchemaAggregations.json b/library/src/test/resources/adsGenericEventSchemaAggregations.json new file mode 100644 index 0000000..0cba36c --- /dev/null +++ b/library/src/test/resources/adsGenericEventSchemaAggregations.json @@ -0,0 +1,7 @@ +{"timeBuckets":["1m","1h","1d"], + "values": + [{"name":"impressions","type":"long","aggregators":["SUM","AVG"]}, + {"name":"clicks","type":"long","aggregators":["SUM","AVG"]}, + {"name":"cost","type":"double","aggregators":["SUM","AVG"]}, + {"name":"revenue","type":"double","aggregators":["SUM","AVG"]}] +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/resources/adsGenericEventSchemaCustomTimeBuckets.json ---------------------------------------------------------------------- diff --git a/library/src/test/resources/adsGenericEventSchemaCustomTimeBuckets.json b/library/src/test/resources/adsGenericEventSchemaCustomTimeBuckets.json new file mode 100644 index 0000000..8e41211 --- /dev/null +++ b/library/src/test/resources/adsGenericEventSchemaCustomTimeBuckets.json @@ -0,0 +1,19 @@ +{"keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, + {"name":"advertiser","type":"string","enumValues":["starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"]}, + {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"]}], + "timeBuckets":["1m","1h","1d","5m","3h"], + "values": + [{"name":"impressions","type":"long","aggregators":["SUM"]}, + {"name":"clicks","type":"long","aggregators":["SUM"]}, + {"name":"cost","type":"double","aggregators":["SUM"]}, + {"name":"revenue","type":"double","aggregators":["SUM"]}], + "dimensions": + [{"combination":[]}, + {"combination":["location"]}, + {"combination":["advertiser"]}, + {"combination":["publisher"]}, + {"combination":["advertiser","location"]}, + {"combination":["publisher","location"]}, + {"combination":["publisher","advertiser"]}, + {"combination":["publisher","advertiser","location"]}] +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/resources/adsGenericEventSchemaDimensionTimeBuckets.json ---------------------------------------------------------------------- diff --git a/library/src/test/resources/adsGenericEventSchemaDimensionTimeBuckets.json b/library/src/test/resources/adsGenericEventSchemaDimensionTimeBuckets.json new file mode 100644 index 0000000..a625d59 --- /dev/null +++ b/library/src/test/resources/adsGenericEventSchemaDimensionTimeBuckets.json @@ -0,0 +1,14 @@ +{"keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, + {"name":"advertiser","type":"string","enumValues":["starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"]}, + {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"]}], + "timeBuckets":["1m", "3d"], + "values": + [{"name":"impressions","type":"long","aggregators":["SUM"]}, + {"name":"clicks","type":"long","aggregators":["SUM"]}, + {"name":"cost","type":"double","aggregators":["SUM"]}, + {"name":"revenue","type":"double","aggregators":["SUM"]}], + "dimensions": + [{"combination":[], "timeBuckets":["1h", "5s"]}, + {"combination":["location"]}, + {"combination":["advertiser","location"], "timeBuckets":["30s", "2h", "1d"]}] +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/resources/adsGenericEventSchemaNoEnums.json ---------------------------------------------------------------------- diff --git a/library/src/test/resources/adsGenericEventSchemaNoEnums.json b/library/src/test/resources/adsGenericEventSchemaNoEnums.json new file mode 100644 index 0000000..95ab937 --- /dev/null +++ b/library/src/test/resources/adsGenericEventSchemaNoEnums.json @@ -0,0 +1,19 @@ +{"keys":[{"name":"publisher","type":"string"}, + {"name":"advertiser","type":"string"}, + {"name":"location","type":"string"}], + "timeBuckets":["1m","1h","1d"], + "values": + [{"name":"impressions","type":"long","aggregators":["SUM"]}, + {"name":"clicks","type":"long","aggregators":["SUM"]}, + {"name":"cost","type":"double","aggregators":["SUM"]}, + {"name":"revenue","type":"double","aggregators":["SUM"]}], + "dimensions": + [{"combination":[]}, + {"combination":["location"]}, + {"combination":["advertiser"]}, + {"combination":["publisher"]}, + {"combination":["advertiser","location"]}, + {"combination":["publisher","location"]}, + {"combination":["publisher","advertiser"]}, + {"combination":["publisher","advertiser","location"]}] +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/resources/adsGenericEventSchemaNoTime.json ---------------------------------------------------------------------- diff --git a/library/src/test/resources/adsGenericEventSchemaNoTime.json b/library/src/test/resources/adsGenericEventSchemaNoTime.json new file mode 100644 index 0000000..9fb8577 --- /dev/null +++ b/library/src/test/resources/adsGenericEventSchemaNoTime.json @@ -0,0 +1,18 @@ +{"keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, + {"name":"advertiser","type":"string","enumValues":["starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"]}, + {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"]}], + "values": + [{"name":"impressions","type":"long","aggregators":["SUM"]}, + {"name":"clicks","type":"long","aggregators":["SUM"]}, + {"name":"cost","type":"double","aggregators":["SUM"]}, + {"name":"revenue","type":"double","aggregators":["SUM"]}], + "dimensions": + [{"combination":[]}, + {"combination":["location"]}, + {"combination":["advertiser"]}, + {"combination":["publisher"]}, + {"combination":["advertiser","location"]}, + {"combination":["publisher","location"]}, + {"combination":["publisher","advertiser"]}, + {"combination":["publisher","advertiser","location"]}] +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/resources/adsGenericEventSchemaOTF.json ---------------------------------------------------------------------- diff --git a/library/src/test/resources/adsGenericEventSchemaOTF.json b/library/src/test/resources/adsGenericEventSchemaOTF.json new file mode 100644 index 0000000..f2c29dd --- /dev/null +++ b/library/src/test/resources/adsGenericEventSchemaOTF.json @@ -0,0 +1,19 @@ +{"keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, + {"name":"advertiser","type":"string","enumValues":["starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"]}, + {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"]}], + "timeBuckets":["1m","1h","1d"], + "values": + [{"name":"impressions","type":"long","aggregators":["MIN", "MAX", "SUM", "AVG"]}, + {"name":"clicks","type":"long","aggregators":["MIN", "MAX", "SUM", "AVG"]}, + {"name":"cost","type":"double","aggregators":["MIN", "MAX", "SUM", "AVG"]}, + {"name":"revenue","type":"double","aggregators":["MIN", "MAX", "SUM", "AVG"]}], + "dimensions": + [{"combination":[]}, + {"combination":["location"]}, + {"combination":["advertiser"]}, + {"combination":["publisher"]}, + {"combination":["advertiser","location"]}, + {"combination":["publisher","location"]}, + {"combination":["publisher","advertiser"]}, + {"combination":["publisher","advertiser","location"]}] +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/resources/adsGenericEventSchemaTags.json ---------------------------------------------------------------------- diff --git a/library/src/test/resources/adsGenericEventSchemaTags.json b/library/src/test/resources/adsGenericEventSchemaTags.json new file mode 100644 index 0000000..ddda988 --- /dev/null +++ b/library/src/test/resources/adsGenericEventSchemaTags.json @@ -0,0 +1,21 @@ +{ + "tags":["geo", "bullet"], + "keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, + {"name":"advertiser","type":"string","enumValues":["starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"]}, + {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"],"tags":["geo.location"]}], + "timeBuckets":["1m","1h","1d"], + "values": + [{"name":"impressions","type":"long","aggregators":["SUM", "COUNT"],"tags":["geo.lattitude"]}, + {"name":"clicks","type":"long","aggregators":["SUM", "COUNT"],"tags":["geo.longitude"]}, + {"name":"cost","type":"double","aggregators":["SUM", "COUNT"]}, + {"name":"revenue","type":"double","aggregators":["SUM", "COUNT"]}], + "dimensions": + [{"combination":[]}, + {"combination":["location"]}, + {"combination":["advertiser"]}, + {"combination":["publisher"]}, + {"combination":["advertiser","location"]}, + {"combination":["publisher","location"]}, + {"combination":["publisher","advertiser"]}, + {"combination":["publisher","advertiser","location"]}] +} http://git-wip-us.apache.org/repos/asf/incubator-apex-malhar/blob/fcdd74de/library/src/test/resources/adsGenericEventSimpleAllCombinations.json ---------------------------------------------------------------------- diff --git a/library/src/test/resources/adsGenericEventSimpleAllCombinations.json b/library/src/test/resources/adsGenericEventSimpleAllCombinations.json new file mode 100644 index 0000000..aa7f8cb --- /dev/null +++ b/library/src/test/resources/adsGenericEventSimpleAllCombinations.json @@ -0,0 +1,11 @@ +{"keys":[{"name":"publisher","type":"string","enumValues":["twitter","facebook","yahoo","google","bing","amazon"]}, + {"name":"advertiser","type":"string","enumValues":["starbucks","safeway","mcdonalds","macys","taco bell","walmart","khol's","san diego zoo","pandas","jack in the box","tomatina","ron swanson"]}, + {"name":"location","type":"string","enumValues":["N","LREC","SKY","AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID"]}], + "timeBuckets":["1m"], + "values": + [{"name":"impressions","type":"long","aggregators":["SUM"]}, + {"name":"clicks","type":"long","aggregators":["SUM"]}, + {"name":"cost","type":"double","aggregators":["SUM"]}, + {"name":"revenue","type":"double","aggregators":["SUM"]}], + "dimensions":"ALL_COMBINATIONS" +}
