[ 
https://issues.apache.org/jira/browse/APEXMALHAR-1897?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15202398#comment-15202398
 ] 

ASF GitHub Bot commented on APEXMALHAR-1897:
--------------------------------------------

Github user amberarrow commented on a diff in the pull request:

    
https://github.com/apache/incubator-apex-malhar/pull/145#discussion_r56738629
  
    --- Diff: 
library/src/main/java/com/datatorrent/lib/state/managed/ManagedTimeUnifiedStateImpl.java
 ---
    @@ -0,0 +1,238 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *   http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package com.datatorrent.lib.state.managed;
    +
    +import java.io.DataInputStream;
    +import java.io.DataOutputStream;
    +import java.io.IOException;
    +import java.util.concurrent.Future;
    +import java.util.concurrent.LinkedBlockingQueue;
    +
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import org.apache.hadoop.fs.LocatedFileStatus;
    +import org.apache.hadoop.fs.RemoteIterator;
    +
    +import com.google.common.base.Preconditions;
    +import com.google.common.collect.Queues;
    +import com.google.common.util.concurrent.Futures;
    +
    +import com.datatorrent.lib.fileaccess.FileAccess;
    +import com.datatorrent.lib.state.BucketedState;
    +import com.datatorrent.netlet.util.Slice;
    +
    +/**
    + * In this implementation of {@link ManagedState} the buckets in memory 
are time-buckets.
    + * <p/>
    + *
    + * <b>Difference from {@link ManagedTimeStateImpl}</b>: <br/>
    + * <ol>
    + * <li>The main buckets in {@link ManagedTimeStateImpl} are unique adhoc 
long ids which the user provides with the
    + * key. In this implementation the main buckets are time buckets. The user 
provides just the time and the time bucket is
    + * derived from it.
    + * </li>
    + * <br/>
    + *
    + * <li>In regards to the bucket data on disk, in {@link 
ManagedTimeStateImpl} the buckets are persisted on disk
    + * with each bucket data further grouped into time-buckets: 
{base_path}/{bucketId}/{time-bucket id}. <br/>
    + * In this implementation operator id is used as bucketId (on disk) and 
there is just one time-bucket under a
    + * particular operator id:
    + * {base_path}/{operator id}/{time bucket id}.
    + * </li>
    + * <br/>
    + *
    + * <li>In {@link ManagedTimeStateImpl} a bucket belongs to just one 
partition. Multiple partitions cannot write to
    + * the same bucket. <br/>
    + * In this implementation multiple partitions can be working with the same 
time-bucket (since time-bucket is derived
    + * from time). This works because on the disk the time-bucket data is 
segregated under each operator id.
    + * </li>
    + * <br/>
    + *
    + * <li>While {@link ManagedTimeStateImpl} can support dynamic partitioning 
by pre-allocating buckets this will not
    + * be able to support dynamic partitioning efficiently.
    + * </li>
    +
    + * </ol>
    + */
    +public class ManagedTimeUnifiedStateImpl extends AbstractManagedStateImpl 
implements BucketedState
    +{
    +  private final transient LinkedBlockingQueue<Long> purgedTimeBuckets = 
Queues.newLinkedBlockingQueue();
    +
    +  public ManagedTimeUnifiedStateImpl()
    +  {
    +    bucketsFileSystem = new TimeUnifiedBucketsFileSystem();
    +  }
    +
    +  @Override
    +  public int getNumBuckets()
    +  {
    +    return timeBucketAssigner.getNumBuckets();
    +  }
    +
    +  @Override
    +  public void put(long time, Slice key, Slice value)
    +  {
    +    long timeBucket = timeBucketAssigner.getTimeBucketFor(time);
    +    if (timeBucket == -1) {
    +      //time is expired so return null.
    +      return;
    +    }
    +    int bucketIdx = prepareBucket(timeBucket);
    +
    +    buckets[bucketIdx].put(key, timeBucket, value);
    +
    +  }
    +
    +  @Override
    +  public Slice getSync(long time, Slice key)
    +  {
    +    long timeBucket = timeBucketAssigner.getTimeBucketFor(time);
    +    if (timeBucket == -1) {
    +      //time is expired so return null.
    +      return null;
    +    }
    +    int bucketIdx = prepareBucket(timeBucket);
    +    return buckets[bucketIdx].get(key, timeBucket, Bucket.ReadSource.ALL);
    +  }
    +
    +  @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
    +  @Override
    +  public Future<Slice> getAsync(long time, Slice key)
    +  {
    +    long timeBucket = timeBucketAssigner.getTimeBucketFor(time);
    +    if (timeBucket == -1) {
    +      //time is expired so return null.
    +      return null;
    +    }
    +    int bucketIdx = prepareBucket(timeBucket);
    +    Bucket bucket = buckets[bucketIdx];
    +    synchronized (bucket) {
    +      Slice cachedVal = buckets[bucketIdx].get(key, timeBucket, 
Bucket.ReadSource.MEMORY);
    +      if (cachedVal != null) {
    +        return Futures.immediateFuture(cachedVal);
    +      }
    +      return readerService.submit(new KeyFetchTask(bucket, key, 
timeBucket, throwable));
    +    }
    +  }
    +
    +  @Override
    +  public void endWindow()
    +  {
    +    super.endWindow();
    +    Long purgedTimeBucket;
    +
    +    //tear down all the purged time buckets
    +    while (null != (purgedTimeBucket = purgedTimeBuckets.poll())) {
    +      int purgedTimeBucketIdx = getBucketIdx(purgedTimeBucket);
    +      if (buckets[purgedTimeBucketIdx] != null && 
buckets[purgedTimeBucketIdx].getBucketId() == purgedTimeBucket) {
    +        buckets[purgedTimeBucketIdx].teardown();
    +        buckets[purgedTimeBucketIdx] = null;
    +      }
    +    }
    +  }
    +
    +  @Override
    +  protected void handleBucketConflict(int bucketIdx, long newBucketId)
    +  {
    +    Preconditions.checkArgument(buckets[bucketIdx].getBucketId() < 
newBucketId, "new time bucket should have a value"
    +        + " greater than the old time bucket");
    +    //Time buckets are purged periodically so here a bucket conflict is 
expected and so we just ignore conflicts.
    +    buckets[bucketIdx].teardown();
    +    buckets[bucketIdx] = newBucket(newBucketId);
    +    buckets[bucketIdx].setup(this);
    +  }
    +
    +  @Override
    +  public void purgeTimeBucketsLessThanEqualTo(long timeBucket)
    +  {
    +    purgedTimeBuckets.add(timeBucket);
    +    super.purgeTimeBucketsLessThanEqualTo(timeBucket);
    +  }
    +
    +  /**
    +   * This uses operator id instead of bucket id as the name of parent 
folder of time-buckets. This is because
    +   * multiple partitions may work on same time-buckets.
    +   */
    +  public static class TimeUnifiedBucketsFileSystem extends 
BucketsFileSystem
    --- End diff --
    
    Can this be private ?


> Create ManagedState
> -------------------
>
>                 Key: APEXMALHAR-1897
>                 URL: https://issues.apache.org/jira/browse/APEXMALHAR-1897
>             Project: Apache Apex Malhar
>          Issue Type: Sub-task
>            Reporter: Chandni Singh
>            Assignee: Chandni Singh
>             Fix For: 3.4.0
>
>
> ManagedState is described in the document below:
> https://docs.google.com/document/d/1gRWN9ufKSZSZD0N-pthlhpC9TZ8KwJ6hJlAX6nxl5f8/edit#heading=h.z87ti1fwyt0t



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to