Repository: samza
Updated Branches:
  refs/heads/master 1eb4c2663 -> cc490ea89


SAMZA-1891: Updated javadoc to include config behavior

Instead of referring users to the Samza Configuration Guide, TableDescriptors 
should copy the documentation instead. If the goal is to let users configure 
their job primarily in code, it's simpler for them to see javadocs in their IDE 
than referring to a separate web page.

Author: Wei Song <[email protected]>

Reviewers: Peng Du <[email protected]>

Closes #709 from weisong44/SAMZA-1891 and squashes the following commits:

9f98e1aa [Wei Song] Merge branch 'master' into SAMZA-1891
2c679c39 [Wei Song] Merge remote-tracking branch 'upstream/master'
85081fb1 [Wei Song] SAMZA-1891: Updated javadoc to include config behavior
a56c28dc [Wei Song] Merge remote-tracking branch 'upstream/master'
097958c8 [Wei Song] Merge remote-tracking branch 'upstream/master'
05822f0a [Wei Song] Merge remote-tracking branch 'upstream/master'
f7480505 [Wei Song] Merge remote-tracking branch 'upstream/master'
7706ab1f [Wei Song] Merge remote-tracking branch 'upstream/master'
f5731b10 [Wei Song] Merge remote-tracking branch 'upstream/master'
1e5de45a [Wei Song] Merge remote-tracking branch 'upstream/master'
c85604e0 [Wei Song] Merge remote-tracking branch 'upstream/master'
242d8442 [Wei Song] Merge remote-tracking branch 'upstream/master'
ec7d8409 [Wei Song] Merge remote-tracking branch 'upstream/master'
e19b4dc9 [Wei Song] Merge remote-tracking branch 'upstream/master'
8ee78441 [Wei Song] Merge remote-tracking branch 'upstream/master'
1c6a2eae [Wei Song] Merge remote-tracking branch 'upstream/master'
a6c94add [Wei Song] Merge remote-tracking branch 'upstream/master'
41299b5b [Wei Song] Merge remote-tracking branch 'upstream/master'
239a0950 [Wei Song] Merge remote-tracking branch 'upstream/master'
eca00204 [Wei Song] Merge remote-tracking branch 'upstream/master'
51562391 [Wei Song] Merge remote-tracking branch 'upstream/master'
de708f5e [Wei Song] Merge remote-tracking branch 'upstream/master'
df2f8d7b [Wei Song] Merge remote-tracking branch 'upstream/master'
f28b491d [Wei Song] Merge remote-tracking branch 'upstream/master'
4782c61d [Wei Song] Merge remote-tracking branch 'upstream/master'
0440f75f [Wei Song] Merge remote-tracking branch 'upstream/master'
aae0f380 [Wei Song] Merge remote-tracking branch 'upstream/master'
a15a7c9a [Wei Song] Merge remote-tracking branch 'upstream/master'
5cbf9af9 [Wei Song] Merge remote-tracking branch 'upstream/master'
3f7ed71f [Wei Song] Added self to committer list


Project: http://git-wip-us.apache.org/repos/asf/samza/repo
Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/cc490ea8
Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/cc490ea8
Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/cc490ea8

Branch: refs/heads/master
Commit: cc490ea89d571809501785834f81ffdcb7b508e7
Parents: 1eb4c26
Author: Wei Song <[email protected]>
Authored: Thu Oct 11 15:28:30 2018 -0700
Committer: Wei Song <[email protected]>
Committed: Thu Oct 11 15:28:30 2018 -0700

----------------------------------------------------------------------
 .../samza/operators/BaseTableDescriptor.java    | 10 +-
 .../table/remote/RemoteTableDescriptor.java     |  4 +-
 .../kv/inmemory/InMemoryTableDescriptor.java    |  4 +-
 .../storage/kv/RocksDbTableDescriptor.java      | 98 +++++++++++++++++++-
 .../kv/BaseLocalStoreBackedTableDescriptor.java | 20 +++-
 5 files changed, 124 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/samza/blob/cc490ea8/samza-core/src/main/java/org/apache/samza/operators/BaseTableDescriptor.java
----------------------------------------------------------------------
diff --git 
a/samza-core/src/main/java/org/apache/samza/operators/BaseTableDescriptor.java 
b/samza-core/src/main/java/org/apache/samza/operators/BaseTableDescriptor.java
index 1830d1c..dd47af2 100644
--- 
a/samza-core/src/main/java/org/apache/samza/operators/BaseTableDescriptor.java
+++ 
b/samza-core/src/main/java/org/apache/samza/operators/BaseTableDescriptor.java
@@ -45,7 +45,7 @@ abstract public class BaseTableDescriptor<K, V, D extends 
BaseTableDescriptor<K,
 
   /**
    * Constructs a table descriptor instance
-   * @param tableId Id of the table, it must confirm to pattern { @literal 
[\\d\\w-_]+ }
+   * @param tableId Id of the table, it must conform to pattern {@literal 
[\\d\\w-_]+}
    */
   protected BaseTableDescriptor(String tableId) {
     this.tableId = tableId;
@@ -53,7 +53,7 @@ abstract public class BaseTableDescriptor<K, V, D extends 
BaseTableDescriptor<K,
 
   /**
    * Constructs a table descriptor instance
-   * @param tableId Id of the table, it must confirm to pattern { @literal 
[\\d\\w-_]+ }
+   * @param tableId Id of the table, it must conform to pattern {@literal 
[\\d\\w-_]+}
    * @param serde the serde for key and value
    */
   protected BaseTableDescriptor(String tableId, KVSerde<K, V> serde) {
@@ -61,12 +61,18 @@ abstract public class BaseTableDescriptor<K, V, D extends 
BaseTableDescriptor<K,
     this.serde = serde;
   }
 
+  /**
+   * {@inheritDoc}
+   */
   @Override
   public D withConfig(String key, String value) {
     config.put(key, value);
     return (D) this;
   }
 
+  /**
+   * {@inheritDoc}
+   */
   @Override
   public String getTableId() {
     return tableId;

http://git-wip-us.apache.org/repos/asf/samza/blob/cc490ea8/samza-core/src/main/java/org/apache/samza/table/remote/RemoteTableDescriptor.java
----------------------------------------------------------------------
diff --git 
a/samza-core/src/main/java/org/apache/samza/table/remote/RemoteTableDescriptor.java
 
b/samza-core/src/main/java/org/apache/samza/table/remote/RemoteTableDescriptor.java
index c31348f..fe01e6b 100644
--- 
a/samza-core/src/main/java/org/apache/samza/table/remote/RemoteTableDescriptor.java
+++ 
b/samza-core/src/main/java/org/apache/samza/table/remote/RemoteTableDescriptor.java
@@ -81,7 +81,7 @@ public class RemoteTableDescriptor<K, V> extends 
BaseTableDescriptor<K, V, Remot
 
   /**
    * Constructs a table descriptor instance
-   * @param tableId Id of the table, it must confirm to pattern { @literal 
[\\d\\w-_]+ }
+   * @param tableId Id of the table, it must conform to pattern {@literal 
[\\d\\w-_]+}
    */
   public RemoteTableDescriptor(String tableId) {
     super(tableId);
@@ -89,7 +89,7 @@ public class RemoteTableDescriptor<K, V> extends 
BaseTableDescriptor<K, V, Remot
 
   /**
    * Constructs a table descriptor instance
-   * @param tableId Id of the table, it must confirm to pattern { @literal 
[\\d\\w-_]+ }
+   * @param tableId Id of the table, it must conform to pattern {@literal 
[\\d\\w-_]+}
    * @param serde the serde for key and value
    */
   public RemoteTableDescriptor(String tableId, KVSerde<K, V> serde) {

http://git-wip-us.apache.org/repos/asf/samza/blob/cc490ea8/samza-kv-inmemory/src/main/java/org/apache/samza/storage/kv/inmemory/InMemoryTableDescriptor.java
----------------------------------------------------------------------
diff --git 
a/samza-kv-inmemory/src/main/java/org/apache/samza/storage/kv/inmemory/InMemoryTableDescriptor.java
 
b/samza-kv-inmemory/src/main/java/org/apache/samza/storage/kv/inmemory/InMemoryTableDescriptor.java
index d364234..d052c99 100644
--- 
a/samza-kv-inmemory/src/main/java/org/apache/samza/storage/kv/inmemory/InMemoryTableDescriptor.java
+++ 
b/samza-kv-inmemory/src/main/java/org/apache/samza/storage/kv/inmemory/InMemoryTableDescriptor.java
@@ -36,7 +36,7 @@ public class InMemoryTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDesc
 
   /**
    * Constructs a table descriptor instance
-   * @param tableId Id of the table, it must confirm to pattern { @literal 
[\\d\\w-_]+ }
+   * @param tableId Id of the table, it must conform to pattern {@literal 
[\\d\\w-_]+}
    */
   public InMemoryTableDescriptor(String tableId) {
     super(tableId);
@@ -44,7 +44,7 @@ public class InMemoryTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDesc
 
   /**
    * Constructs a table descriptor instance
-   * @param tableId Id of the table, it must confirm to pattern { @literal 
[\\d\\w-_]+ }
+   * @param tableId Id of the table, it must conform to pattern {@literal 
[\\d\\w-_]+}
    * @param serde the serde for key and value
    */
   public InMemoryTableDescriptor(String tableId, KVSerde<K, V> serde) {

http://git-wip-us.apache.org/repos/asf/samza/blob/cc490ea8/samza-kv-rocksdb/src/main/java/org/apache/samza/storage/kv/RocksDbTableDescriptor.java
----------------------------------------------------------------------
diff --git 
a/samza-kv-rocksdb/src/main/java/org/apache/samza/storage/kv/RocksDbTableDescriptor.java
 
b/samza-kv-rocksdb/src/main/java/org/apache/samza/storage/kv/RocksDbTableDescriptor.java
index 325d023..50bc2c2 100644
--- 
a/samza-kv-rocksdb/src/main/java/org/apache/samza/storage/kv/RocksDbTableDescriptor.java
+++ 
b/samza-kv-rocksdb/src/main/java/org/apache/samza/storage/kv/RocksDbTableDescriptor.java
@@ -59,7 +59,7 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
 
   /**
    * Constructs a table descriptor instance
-   * @param tableId Id of the table, it must confirm to pattern { @literal 
[\\d\\w-_]+ }
+   * @param tableId Id of the table, it must conform to pattern {@literal 
[\\d\\w-_]+}
    */
   public RocksDbTableDescriptor(String tableId) {
     super(tableId);
@@ -67,7 +67,7 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
 
   /**
    * Constructs a table descriptor instance
-   * @param tableId Id of the table, it must confirm to pattern { @literal 
[\\d\\w-_]+ }
+   * @param tableId Id of the table, it must conform to pattern {@literal 
[\\d\\w-_]+}
    * @param serde the serde for key and value
    */
   public RocksDbTableDescriptor(String tableId, KVSerde<K, V> serde) {
@@ -75,7 +75,16 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
   }
 
   /**
+   * For better write performance, the storage engine buffers writes and 
applies them to the
+   * underlying store in a batch. If the same key is written multiple times in 
quick succession,
+   * this buffer also deduplicates writes to the same key. This property is 
set to the number
+   * of key/value pairs that should be kept in this in-memory buffer, per task 
instance.
+   * The number cannot be greater than {@link #withObjectCacheSize}.
+   * <p>
+   * Default value is 500.
+   * <p>
    * Refer to <code>stores.store-name.write.batch.size</code> in Samza 
configuration guide
+   *
    * @param writeBatchSize write batch size
    * @return this table descriptor instance
    */
@@ -85,7 +94,17 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
   }
 
   /**
+   * Samza maintains an additional cache in front of RocksDB for 
frequently-accessed objects.
+   * This cache contains deserialized objects (avoiding the deserialization 
overhead on cache
+   * hits), in contrast to the RocksDB block cache ({@link #withCacheSize}), 
which caches
+   * serialized objects. This property determines the number of objects to 
keep in Samza's
+   * cache, per task instance. This same cache is also used for write buffering
+   * (see {@link #withWriteBatchSize}). A value of 0 disables all caching and 
batching.
+   * <p>
+   * Default value is 1,000.
+   * <p>
    * Refer to <code>stores.store-name.object.cache.size</code> in Samza 
configuration guide
+   *
    * @param objectCacheSize the object cache size
    * @return this table descriptor instance
    */
@@ -95,7 +114,15 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
   }
 
   /**
+   * The size of RocksDB's block cache in bytes, per container. If there are 
several task
+   * instances within one container, each is given a proportional share of 
this cache.
+   * Note that this is an off-heap memory allocation, so the container's total 
memory
+   * use is the maximum JVM heap size plus the size of this cache.
+   * <p>
+   * Default value is 104,857,600.
+   * <p>
    * Refer to <code>stores.store-name.container.cache.size.bytes</code> in 
Samza configuration guide
+   *
    * @param cacheSize the cache size in bytes
    * @return this table descriptor instance
    */
@@ -105,7 +132,15 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
   }
 
   /**
+   * The amount of memory (in bytes) that RocksDB uses for buffering writes 
before they are
+   * written to disk, per container. If there are several task instances 
within one container,
+   * each is given a proportional share of this buffer. This setting also 
determines the
+   * size of RocksDB's segment files.
+   * <p>
+   * Default value is 33,554,432.
+   * <p>
    * Refer to <code>stores.store-name.container.write.buffer.size.bytes</code> 
in Samza configuration guide
+   *
    * @param writeBufferSize the write buffer size in bytes
    * @return this table descriptor instance
    */
@@ -115,7 +150,21 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
   }
 
   /**
+   * Controls whether RocksDB should compress data on disk and in the block 
cache.
+   * The following values are valid:
+   * <ul>
+   *   <li><b>snappy</b> Compress data using the <a 
href="https://code.google.com/p/snappy/";>Snappy</a> codec.
+   *   <li><b>bzip2</b> Compress data using the <a 
href="http://en.wikipedia.org/wiki/Bzip2";>bzip2</a> codec.
+   *   <li><b>zlib</b> Compress data using the <a 
href="http://en.wikipedia.org/wiki/Zlib";>zlib</a> codec.
+   *   <li><b>lz4</b> Compress data using the <a 
href="https://code.google.com/p/lz4/";>lz4</a> codec.
+   *   <li><b>lz4hc</b> Compress data using the <a 
href="https://code.google.com/p/lz4/";>lz4hc</a> (high compression) codec.
+   *   <li><b>none</b> Do not compress data.
+   * </ul>
+   * <p>
+   * Default value is snappy.
+   * <p>
    * Refer to <code>stores.store-name.rocksdb.compression</code> in Samza 
configuration guide
+   *
    * @param compressionType the compression type
    * @return this table descriptor instance
    */
@@ -125,7 +174,13 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
   }
 
   /**
+   * If compression is enabled, RocksDB groups approximately this many 
uncompressed
+   * bytes into one compressed block. You probably don't need to change this 
property.
+   * <p>
+   * Default value is 4,096.
+   * <p>
    * Refer to <code>stores.store-name.rocksdb.block.size.bytes</code> in Samza 
configuration guide
+   *
    * @param blockSize the block size in bytes
    * @return this table descriptor instance
    */
@@ -135,7 +190,14 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
   }
 
   /**
+   * The time-to-live of the store. Please note it's not a strict TTL limit 
(removed
+   * only after compaction). Please use caution opening a database with and 
without
+   * TTL, as it might corrupt the database. Please make sure to read the
+   * <a 
href="https://github.com/facebook/rocksdb/wiki/Time-to-Live";>constraints</a>
+   * before using.
+   * <p>
    * Refer to <code>stores.store-name.rocksdb.ttl.ms</code> in Samza 
configuration guide
+   *
    * @param ttl the time to live in milliseconds
    * @return this table descriptor instance
    */
@@ -145,7 +207,18 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
   }
 
   /**
+   * This property controls the compaction style that RocksDB will employ when 
compacting
+   * its levels. The following values are valid:
+   * <ul>
+   *   <li><b>universal</b> Use <a 
href="https://github.com/facebook/rocksdb/wiki/Universal-Compaction";>universal</a>
 compaction.
+   *   <li><b>fifo</b> Use <a 
href="https://github.com/facebook/rocksdb/wiki/FIFO-compaction-style";>FIFO</a> 
compaction.
+   *   <li><b>level</b> Use RocksDB's standard leveled compaction.
+   * </ul>
+   * <p>
+   * Default value is universal.
+   * <p>
    * Refer to <code>stores.store-name.rocksdb.compaction.style</code> in Samza 
configuration guide
+   *
    * @param compactionStyle the compaction style
    * @return this table descriptor instance
    */
@@ -155,7 +228,16 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
   }
 
   /**
+   * Configures the
+   * <a 
href="https://github.com/facebook/rocksdb/wiki/Basic-Operations#write-buffer";>
+   * number of write buffers</a> that a RocksDB store uses. This allows RocksDB
+   * to continue taking writes to other buffers even while a given write 
buffer is being
+   * flushed to disk.
+   * <p>
+   * Default value is 3.
+   * <p>
    * Refer to <code>stores.store-name.rocksdb.num.write.buffers</code> in 
Samza configuration guide
+   *
    * @param numWriteBuffers the number of write buffers
    * @return this table descriptor instance
    */
@@ -165,7 +247,12 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
   }
 
   /**
+   * The maximum size in bytes of the RocksDB LOG file before it is rotated.
+   * <p>
+   * Default value is 67,108,864.
+   * <p>
    * Refer to <code>stores.store-name.rocksdb.max.log.file.size.bytes</code> 
in Samza configuration guide
+   *
    * @param maxLogFileSize the maximal log file size in bytes
    * @return this table descriptor instance
    */
@@ -175,7 +262,12 @@ public class RocksDbTableDescriptor<K, V> extends 
BaseLocalStoreBackedTableDescr
   }
 
   /**
-   * Refer to <code>stores.store-name.rocksdb.num.write.buffers</code> in 
Samza configuration guide
+   * The number of RocksDB LOG files (including rotated LOG.old.* files) to 
keep.
+   * <p>
+   * Default value is 2.
+   * <p>
+   * Refer to <code>stores.store-name.rocksdb.keep.log.file.num</code> in 
Samza configuration guide
+   *
    * @param numLogFilesToKeep the number of log files to keep
    * @return this table descriptor instance
    */

http://git-wip-us.apache.org/repos/asf/samza/blob/cc490ea8/samza-kv/src/main/java/org/apache/samza/storage/kv/BaseLocalStoreBackedTableDescriptor.java
----------------------------------------------------------------------
diff --git 
a/samza-kv/src/main/java/org/apache/samza/storage/kv/BaseLocalStoreBackedTableDescriptor.java
 
b/samza-kv/src/main/java/org/apache/samza/storage/kv/BaseLocalStoreBackedTableDescriptor.java
index 96057d6..84e5fbe 100644
--- 
a/samza-kv/src/main/java/org/apache/samza/storage/kv/BaseLocalStoreBackedTableDescriptor.java
+++ 
b/samza-kv/src/main/java/org/apache/samza/storage/kv/BaseLocalStoreBackedTableDescriptor.java
@@ -50,7 +50,7 @@ abstract public class BaseLocalStoreBackedTableDescriptor<K, 
V, D extends BaseLo
 
   /**
    * Constructs a table descriptor instance
-   * @param tableId Id of the table, it must confirm to pattern { @literal 
[\\d\\w-_]+ }
+   * @param tableId Id of the table, it must conform to pattern {@literal 
[\\d\\w-_]+}
    */
   public BaseLocalStoreBackedTableDescriptor(String tableId) {
     super(tableId);
@@ -58,7 +58,7 @@ abstract public class BaseLocalStoreBackedTableDescriptor<K, 
V, D extends BaseLo
 
   /**
    * Constructs a table descriptor instance
-   * @param tableId Id of the table, it must confirm to pattern { @literal 
[\\d\\w-_]+ }
+   * @param tableId Id of the table, it must conform to pattern {@literal 
[\\d\\w-_]+}
    * @param serde the serde for key and value
    */
   public BaseLocalStoreBackedTableDescriptor(String tableId, KVSerde<K, V> 
serde) {
@@ -82,7 +82,7 @@ abstract public class BaseLocalStoreBackedTableDescriptor<K, 
V, D extends BaseLo
   /**
    * Enable changelog for this table, by default changelog is disabled. When 
the
    * changelog stream name is not specified, it is automatically generated in
-   * the format { @literal [job-name]-[job-id]-table-[table-id] }.
+   * the format {@literal [job-name]-[job-id]-table-[table-id]}.
    * Refer to <code>stores.store-name.changelog</code> in Samza configuration 
guide
    *
    * @return this table descriptor instance
@@ -93,6 +93,16 @@ abstract public class BaseLocalStoreBackedTableDescriptor<K, 
V, D extends BaseLo
   }
 
   /**
+   * Samza stores are local to a container. If the container fails, the 
contents of
+   * the store are lost. To prevent loss of data, you need to set this 
property to
+   * configure a changelog stream: Samza then ensures that writes to the store 
are
+   * replicated to this stream, and the store is restored from this stream 
after a
+   * failure. The value of this property is given in the form 
system-name.stream-name.
+   * The "system-name" part is optional. If it is omitted you must specify the 
system
+   * in <code>job.changelog.system</code> config. Any output stream can be 
used as
+   * changelog, but you must ensure that only one job ever writes to a given 
changelog
+   * stream (each instance of a job and each store needs its own changelog 
stream).
+   * <p>
    * Refer to <code>stores.store-name.changelog</code> in Samza configuration 
guide
    *
    * @param changelogStream changelog stream name
@@ -105,6 +115,10 @@ abstract public class 
BaseLocalStoreBackedTableDescriptor<K, V, D extends BaseLo
   }
 
   /**
+   * The property defines the number of replicas to use for the change log 
stream.
+   * <p>
+   * Default value is <code>stores.default.changelog.replication.factor</code>.
+   * <p>
    * Refer to <code>stores.store-name.changelog.replication.factor</code> in 
Samza configuration guide
    *
    * @param replicationFactor replication factor

Reply via email to