Github user jackylk commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2300#discussion_r187762828
  
    --- Diff: 
datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapCache.java
 ---
    @@ -0,0 +1,197 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.carbondata.datamap.bloom;
    +
    +import java.io.DataInputStream;
    +import java.io.EOFException;
    +import java.io.File;
    +import java.io.IOException;
    +import java.io.ObjectInputStream;
    +import java.io.Serializable;
    +import java.util.ArrayList;
    +import java.util.List;
    +import java.util.Objects;
    +import java.util.concurrent.TimeUnit;
    +
    +import org.apache.carbondata.common.logging.LogService;
    +import org.apache.carbondata.common.logging.LogServiceFactory;
    +import org.apache.carbondata.core.datastore.impl.FileFactory;
    +import org.apache.carbondata.core.util.CarbonUtil;
    +
    +import com.google.common.cache.CacheBuilder;
    +import com.google.common.cache.CacheLoader;
    +import com.google.common.cache.CacheStats;
    +import com.google.common.cache.LoadingCache;
    +import com.google.common.cache.RemovalListener;
    +import com.google.common.cache.RemovalNotification;
    +
    +public class BloomDataMapCache implements Serializable {
    +  private static final LogService LOGGER = LogServiceFactory.getLogService(
    +      BloomDataMapCache.class.getName());
    +  private static final long serialVersionUID = 20160822L;
    +  private static final long DEFAULT_CACHE_SIZE = 512 * 1024 * 1024;
    +  private static final int DEFAULT_CACHE_EXPIRED_HOURS = 2;
    +  private LoadingCache<CacheKey, List<BloomDMModel>> bloomDMCache = null;
    +
    +  private BloomDataMapCache() {
    +    RemovalListener<CacheKey, List<BloomDMModel>> listener =
    +        new RemovalListener<CacheKey, List<BloomDMModel>>() {
    +      @Override
    +      public void onRemoval(RemovalNotification<CacheKey, 
List<BloomDMModel>> notification) {
    +        LOGGER.error(
    +            String.format("Remove bloom datamap entry %s from cache due to 
%s",
    +                notification.getKey(), notification.getCause()));
    +      }
    +    };
    +    CacheLoader<CacheKey, List<BloomDMModel>> cacheLoader =
    +        new CacheLoader<CacheKey, List<BloomDMModel>>() {
    +      @Override
    +      public List<BloomDMModel> load(CacheKey key) throws Exception {
    +        LOGGER.error(String.format("Load bloom datamap entry %s to cache", 
key));
    +        return loadBloomDataMapModel(key);
    +      }
    +    };
    +
    +    this.bloomDMCache = CacheBuilder.newBuilder()
    +        .recordStats()
    +        .maximumSize(DEFAULT_CACHE_SIZE)
    +        .expireAfterAccess(DEFAULT_CACHE_EXPIRED_HOURS, TimeUnit.HOURS)
    +        .removalListener(listener)
    +        .build(cacheLoader);
    +  }
    +
    +  private static class SingletonHolder {
    +    public static final BloomDataMapCache INSTANCE = new 
BloomDataMapCache();
    +  }
    +
    +  /**
    +   * get instance
    +   */
    +  public static BloomDataMapCache getInstance() {
    +    return SingletonHolder.INSTANCE;
    +  }
    +
    +  /**
    +   * for resolve from serialized
    +   */
    +  protected Object readResolve() {
    +    return getInstance();
    +  }
    +
    +  /**
    +   * load datamap from bloomindex file
    +   */
    +  private List<BloomDMModel> loadBloomDataMapModel(CacheKey cacheKey) {
    +    DataInputStream dataInStream = null;
    +    ObjectInputStream objectInStream = null;
    +    List<BloomDMModel> bloomDMModels = new ArrayList<BloomDMModel>();
    +    try {
    +      String indexFile = getIndexFileFromCacheKey(cacheKey);
    +      dataInStream = FileFactory.getDataInputStream(indexFile, 
FileFactory.getFileType(indexFile));
    +      objectInStream = new ObjectInputStream(dataInStream);
    +      try {
    +        BloomDMModel model = null;
    +        while ((model = (BloomDMModel) objectInStream.readObject()) != 
null) {
    +          bloomDMModels.add(model);
    +        }
    +      } catch (EOFException e) {
    +        LOGGER.info(String.format("Read %d bloom indices from %s",
    +            bloomDMModels.size(), indexFile));
    +      }
    +      this.bloomDMCache.put(cacheKey, bloomDMModels);
    +      return bloomDMModels;
    +    } catch (ClassNotFoundException | IOException e) {
    +      LOGGER.error(e, "Error occurs while reading bloom index");
    +      throw new RuntimeException("Error occurs while reading bloom index", 
e);
    +    } finally {
    +      clear();
    +      CarbonUtil.closeStreams(objectInStream, dataInStream);
    +    }
    +  }
    +
    +  /**
    +   * get bloom index file name from cachekey
    +   */
    +  private String getIndexFileFromCacheKey(CacheKey cacheKey) {
    +    return 
cacheKey.shardPath.concat(File.separator).concat(cacheKey.indexColumn)
    --- End diff --
    
    can you make an utility function to get the file path, I think this utility 
should be used here and in BloomDataMapWriter.initDataMapFile


---

Reply via email to