Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2300#discussion_r187762828
--- Diff:
datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapCache.java
---
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.datamap.bloom;
+
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.carbondata.common.logging.LogService;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.util.CarbonUtil;
+
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.CacheStats;
+import com.google.common.cache.LoadingCache;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
+
+public class BloomDataMapCache implements Serializable {
+ private static final LogService LOGGER = LogServiceFactory.getLogService(
+ BloomDataMapCache.class.getName());
+ private static final long serialVersionUID = 20160822L;
+ private static final long DEFAULT_CACHE_SIZE = 512 * 1024 * 1024;
+ private static final int DEFAULT_CACHE_EXPIRED_HOURS = 2;
+ private LoadingCache<CacheKey, List<BloomDMModel>> bloomDMCache = null;
+
+ private BloomDataMapCache() {
+ RemovalListener<CacheKey, List<BloomDMModel>> listener =
+ new RemovalListener<CacheKey, List<BloomDMModel>>() {
+ @Override
+ public void onRemoval(RemovalNotification<CacheKey,
List<BloomDMModel>> notification) {
+ LOGGER.error(
+ String.format("Remove bloom datamap entry %s from cache due to
%s",
+ notification.getKey(), notification.getCause()));
+ }
+ };
+ CacheLoader<CacheKey, List<BloomDMModel>> cacheLoader =
+ new CacheLoader<CacheKey, List<BloomDMModel>>() {
+ @Override
+ public List<BloomDMModel> load(CacheKey key) throws Exception {
+ LOGGER.error(String.format("Load bloom datamap entry %s to cache",
key));
+ return loadBloomDataMapModel(key);
+ }
+ };
+
+ this.bloomDMCache = CacheBuilder.newBuilder()
+ .recordStats()
+ .maximumSize(DEFAULT_CACHE_SIZE)
+ .expireAfterAccess(DEFAULT_CACHE_EXPIRED_HOURS, TimeUnit.HOURS)
+ .removalListener(listener)
+ .build(cacheLoader);
+ }
+
+ private static class SingletonHolder {
+ public static final BloomDataMapCache INSTANCE = new
BloomDataMapCache();
+ }
+
+ /**
+ * get instance
+ */
+ public static BloomDataMapCache getInstance() {
+ return SingletonHolder.INSTANCE;
+ }
+
+ /**
+ * for resolve from serialized
+ */
+ protected Object readResolve() {
+ return getInstance();
+ }
+
+ /**
+ * load datamap from bloomindex file
+ */
+ private List<BloomDMModel> loadBloomDataMapModel(CacheKey cacheKey) {
+ DataInputStream dataInStream = null;
+ ObjectInputStream objectInStream = null;
+ List<BloomDMModel> bloomDMModels = new ArrayList<BloomDMModel>();
+ try {
+ String indexFile = getIndexFileFromCacheKey(cacheKey);
+ dataInStream = FileFactory.getDataInputStream(indexFile,
FileFactory.getFileType(indexFile));
+ objectInStream = new ObjectInputStream(dataInStream);
+ try {
+ BloomDMModel model = null;
+ while ((model = (BloomDMModel) objectInStream.readObject()) !=
null) {
+ bloomDMModels.add(model);
+ }
+ } catch (EOFException e) {
+ LOGGER.info(String.format("Read %d bloom indices from %s",
+ bloomDMModels.size(), indexFile));
+ }
+ this.bloomDMCache.put(cacheKey, bloomDMModels);
+ return bloomDMModels;
+ } catch (ClassNotFoundException | IOException e) {
+ LOGGER.error(e, "Error occurs while reading bloom index");
+ throw new RuntimeException("Error occurs while reading bloom index",
e);
+ } finally {
+ clear();
+ CarbonUtil.closeStreams(objectInStream, dataInStream);
+ }
+ }
+
+ /**
+ * get bloom index file name from cachekey
+ */
+ private String getIndexFileFromCacheKey(CacheKey cacheKey) {
+ return
cacheKey.shardPath.concat(File.separator).concat(cacheKey.indexColumn)
--- End diff --
can you make an utility function to get the file path, I think this utility
should be used here and in BloomDataMapWriter.initDataMapFile
---