Repository: incubator-carbondata Updated Branches: refs/heads/master 9e35dc280 -> 70c1015e4
add dictionary interface remove size() add PreCreatedDictionary.java add size() fix comment Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/e44f7a9c Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/e44f7a9c Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/e44f7a9c Branch: refs/heads/master Commit: e44f7a9cd0441e46437418b71d83fbdb1ebda7cf Parents: 9e35dc2 Author: jackylk <jacky.li...@huawei.com> Authored: Fri Oct 14 01:38:43 2016 +0800 Committer: jackylk <jacky.li...@huawei.com> Committed: Fri Oct 14 22:38:47 2016 +0800 ---------------------------------------------------------------------- .../carbondata/core/devapi/BiDictionary.java | 53 +++++++++++++ .../core/devapi/DictionaryGenerator.java | 31 ++++++++ .../core/devapi/GeneratingBiDictionary.java | 50 ++++++++++++ .../newflow/dictionary/InMemBiDictionary.java | 80 ++++++++++++++++++++ .../dictionary/PreCreatedDictionary.java | 56 ++++++++++++++ .../dictionary/InMemBiDictionaryTest.java | 73 ++++++++++++++++++ 6 files changed, 343 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e44f7a9c/core/src/main/java/org/apache/carbondata/core/devapi/BiDictionary.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/devapi/BiDictionary.java b/core/src/main/java/org/apache/carbondata/core/devapi/BiDictionary.java new file mode 100644 index 0000000..ffaba52 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/devapi/BiDictionary.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.carbondata.core.devapi; + +public interface BiDictionary<K, V> { + + /** + * Get the dictionary key corresponding to the input value, generate a new key if value is + * not exist. The new key value pair will be added to this dictionary + * @param value dictionary value + * @return dictionary key + */ + K getOrGenerateKey(V value) throws Exception; + + /** + * Get the dictionary key corresponding to the input value, return null if value is not exist in + * the dictionary. + * @param value dictionary value + * @return dictionary key + */ + K getKey(V value); + + /** + * Get dictionary value corresponding to the input key, return null if key is not exist in the + * dictionary. + * @param key dictionary key + * @return dictionary value + */ + V getValue(K key); + + /** + * Return the size of the dictionary + * @return size + */ + int size(); +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e44f7a9c/core/src/main/java/org/apache/carbondata/core/devapi/DictionaryGenerator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/devapi/DictionaryGenerator.java b/core/src/main/java/org/apache/carbondata/core/devapi/DictionaryGenerator.java new file mode 100644 index 0000000..99f430e --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/devapi/DictionaryGenerator.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.carbondata.core.devapi; + +public interface DictionaryGenerator<K, V> { + + /** + * Generate the dictionary key based on input value + * @param value dictionary value + * @return dictionary key + * @throws Exception any exception + */ + K generateKey(V value) throws Exception; +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e44f7a9c/core/src/main/java/org/apache/carbondata/core/devapi/GeneratingBiDictionary.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/devapi/GeneratingBiDictionary.java b/core/src/main/java/org/apache/carbondata/core/devapi/GeneratingBiDictionary.java new file mode 100644 index 0000000..47424b4 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/devapi/GeneratingBiDictionary.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.carbondata.core.devapi; + +public abstract class GeneratingBiDictionary<K, V> implements BiDictionary<K, V> { + + private DictionaryGenerator<K, V> generator; + + public GeneratingBiDictionary(DictionaryGenerator<K, V> generator) { + this.generator = generator; + } + + @Override + public K getOrGenerateKey(V value) throws Exception { + K key = getKey(value); + if (key != null) { + return key; + } else { + K newKey = generator.generateKey(value); + assert(newKey != null); + put(newKey, value); + return newKey; + } + } + + /** + * put the input key value pair into the dictionary + * @param key dictionary key + * @param value dictionary value + */ + protected abstract void put(K key, V value); + +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e44f7a9c/processing/src/main/java/org/apache/carbondata/processing/newflow/dictionary/InMemBiDictionary.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/dictionary/InMemBiDictionary.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/dictionary/InMemBiDictionary.java new file mode 100644 index 0000000..dcf5d5e --- /dev/null +++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/dictionary/InMemBiDictionary.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.carbondata.processing.newflow.dictionary; + +import java.util.Map; + +import org.apache.carbondata.core.devapi.DictionaryGenerator; +import org.apache.carbondata.core.devapi.GeneratingBiDictionary; + +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; + +public class InMemBiDictionary<K, V> extends GeneratingBiDictionary<K, V> { + + private BiMap<K, V> biMap; + + /** + * Constructor to create a new dictionary, dictionary key will be generated by specified generator + * @param generator + */ + public InMemBiDictionary(DictionaryGenerator generator) { + super(generator); + biMap = HashBiMap.create(); + } + + /** + * Constructor to create a pre-created dictionary + * @param preCreatedDictionary + */ + public InMemBiDictionary(Map<K, V> preCreatedDictionary) { + super(new DictionaryGenerator<K, V>() { + @Override + public K generateKey(V value) throws Exception { + // Since dictionary is provided by preCreated, normally it should not come here + throw new Exception("encounter new dictionary value in pre-created dictionary:" + value); + } + }); + biMap = HashBiMap.create(preCreatedDictionary); + } + + @Override + public K getKey(V value) { + return biMap.inverse().get(value); + } + + @Override + public V getValue(K key) { + return biMap.get(key); + } + + @Override + protected void put(K key, V value) { + // dictionary is immutable, it is append only + assert(!biMap.containsKey(key)); + assert(!biMap.containsValue(value)); + biMap.put(key, value); + } + + @Override + public int size() { + return biMap.size(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e44f7a9c/processing/src/main/java/org/apache/carbondata/processing/newflow/dictionary/PreCreatedDictionary.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/dictionary/PreCreatedDictionary.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/dictionary/PreCreatedDictionary.java new file mode 100644 index 0000000..900d3e5 --- /dev/null +++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/dictionary/PreCreatedDictionary.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.carbondata.processing.newflow.dictionary; + +import org.apache.carbondata.core.cache.dictionary.Dictionary; +import org.apache.carbondata.core.devapi.BiDictionary; + +public class PreCreatedDictionary implements BiDictionary<Integer, String> { + + private Dictionary dictionary; + + public PreCreatedDictionary(Dictionary dictionary) { + this.dictionary = dictionary; + } + + @Override + public Integer getOrGenerateKey(String value) throws Exception { + Integer key = getKey(value); + if (key == null) { + throw new UnsupportedOperationException("trying to add new entry in PreCreatedDictionary"); + } + return key; + } + + @Override + public Integer getKey(String value) { + return dictionary.getSurrogateKey(value); + } + + @Override + public String getValue(Integer key) { + return dictionary.getDictionaryValueForKey(key); + } + + @Override + public int size() { + return dictionary.getDictionaryChunks().getSize(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e44f7a9c/processing/src/test/java/org/apache/carbondata/processing/newflow/dictionary/InMemBiDictionaryTest.java ---------------------------------------------------------------------- diff --git a/processing/src/test/java/org/apache/carbondata/processing/newflow/dictionary/InMemBiDictionaryTest.java b/processing/src/test/java/org/apache/carbondata/processing/newflow/dictionary/InMemBiDictionaryTest.java new file mode 100644 index 0000000..6ceea7e --- /dev/null +++ b/processing/src/test/java/org/apache/carbondata/processing/newflow/dictionary/InMemBiDictionaryTest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.carbondata.processing.newflow.dictionary; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.carbondata.core.devapi.BiDictionary; +import org.apache.carbondata.core.devapi.DictionaryGenerator; +import org.junit.Assert; +import org.junit.Test; + +public class InMemBiDictionaryTest { + + /** + * test pre-created dictionary + */ + @Test public void testPreCreated() throws Exception { + Map<Integer, String> map = new HashMap<>(); + map.put(1, "amy"); + map.put(2, "bob"); + BiDictionary<Integer, String> dict = new InMemBiDictionary<>(map); + Assert.assertEquals(1, dict.getKey("amy").intValue()); + Assert.assertEquals(2, dict.getKey("bob").intValue()); + Assert.assertEquals("amy", dict.getValue(1)); + Assert.assertEquals("bob", dict.getValue(2)); + Assert.assertEquals(2, dict.size()); + try { + dict.getOrGenerateKey("cat"); + Assert.fail("add dictionary successfully"); + } catch (Exception e) { + // test pass + } + } + + /** + * test generating dictionary on the fly + */ + @Test public void testGenerateDict() throws Exception { + BiDictionary<Integer, String> dict = new InMemBiDictionary<>( + new DictionaryGenerator<Integer, String>() { + int sequence = 1; + @Override + public Integer generateKey(String value) throws Exception { + return sequence++; + } + }); + Assert.assertEquals(1, dict.getOrGenerateKey("amy").intValue()); + Assert.assertEquals(2, dict.getOrGenerateKey("bob").intValue()); + Assert.assertEquals(1, dict.getKey("amy").intValue()); + Assert.assertEquals(2, dict.getKey("bob").intValue()); + Assert.assertEquals("amy", dict.getValue(1)); + Assert.assertEquals("bob", dict.getValue(2)); + Assert.assertEquals(2, dict.size()); + } +}