Repository: samza Updated Branches: refs/heads/master 1eb4c2663 -> cc490ea89
SAMZA-1891: Updated javadoc to include config behavior Instead of referring users to the Samza Configuration Guide, TableDescriptors should copy the documentation instead. If the goal is to let users configure their job primarily in code, it's simpler for them to see javadocs in their IDE than referring to a separate web page. Author: Wei Song <[email protected]> Reviewers: Peng Du <[email protected]> Closes #709 from weisong44/SAMZA-1891 and squashes the following commits: 9f98e1aa [Wei Song] Merge branch 'master' into SAMZA-1891 2c679c39 [Wei Song] Merge remote-tracking branch 'upstream/master' 85081fb1 [Wei Song] SAMZA-1891: Updated javadoc to include config behavior a56c28dc [Wei Song] Merge remote-tracking branch 'upstream/master' 097958c8 [Wei Song] Merge remote-tracking branch 'upstream/master' 05822f0a [Wei Song] Merge remote-tracking branch 'upstream/master' f7480505 [Wei Song] Merge remote-tracking branch 'upstream/master' 7706ab1f [Wei Song] Merge remote-tracking branch 'upstream/master' f5731b10 [Wei Song] Merge remote-tracking branch 'upstream/master' 1e5de45a [Wei Song] Merge remote-tracking branch 'upstream/master' c85604e0 [Wei Song] Merge remote-tracking branch 'upstream/master' 242d8442 [Wei Song] Merge remote-tracking branch 'upstream/master' ec7d8409 [Wei Song] Merge remote-tracking branch 'upstream/master' e19b4dc9 [Wei Song] Merge remote-tracking branch 'upstream/master' 8ee78441 [Wei Song] Merge remote-tracking branch 'upstream/master' 1c6a2eae [Wei Song] Merge remote-tracking branch 'upstream/master' a6c94add [Wei Song] Merge remote-tracking branch 'upstream/master' 41299b5b [Wei Song] Merge remote-tracking branch 'upstream/master' 239a0950 [Wei Song] Merge remote-tracking branch 'upstream/master' eca00204 [Wei Song] Merge remote-tracking branch 'upstream/master' 51562391 [Wei Song] Merge remote-tracking branch 'upstream/master' de708f5e [Wei Song] Merge remote-tracking branch 'upstream/master' df2f8d7b [Wei Song] Merge remote-tracking branch 'upstream/master' f28b491d [Wei Song] Merge remote-tracking branch 'upstream/master' 4782c61d [Wei Song] Merge remote-tracking branch 'upstream/master' 0440f75f [Wei Song] Merge remote-tracking branch 'upstream/master' aae0f380 [Wei Song] Merge remote-tracking branch 'upstream/master' a15a7c9a [Wei Song] Merge remote-tracking branch 'upstream/master' 5cbf9af9 [Wei Song] Merge remote-tracking branch 'upstream/master' 3f7ed71f [Wei Song] Added self to committer list Project: http://git-wip-us.apache.org/repos/asf/samza/repo Commit: http://git-wip-us.apache.org/repos/asf/samza/commit/cc490ea8 Tree: http://git-wip-us.apache.org/repos/asf/samza/tree/cc490ea8 Diff: http://git-wip-us.apache.org/repos/asf/samza/diff/cc490ea8 Branch: refs/heads/master Commit: cc490ea89d571809501785834f81ffdcb7b508e7 Parents: 1eb4c26 Author: Wei Song <[email protected]> Authored: Thu Oct 11 15:28:30 2018 -0700 Committer: Wei Song <[email protected]> Committed: Thu Oct 11 15:28:30 2018 -0700 ---------------------------------------------------------------------- .../samza/operators/BaseTableDescriptor.java | 10 +- .../table/remote/RemoteTableDescriptor.java | 4 +- .../kv/inmemory/InMemoryTableDescriptor.java | 4 +- .../storage/kv/RocksDbTableDescriptor.java | 98 +++++++++++++++++++- .../kv/BaseLocalStoreBackedTableDescriptor.java | 20 +++- 5 files changed, 124 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/samza/blob/cc490ea8/samza-core/src/main/java/org/apache/samza/operators/BaseTableDescriptor.java ---------------------------------------------------------------------- diff --git a/samza-core/src/main/java/org/apache/samza/operators/BaseTableDescriptor.java b/samza-core/src/main/java/org/apache/samza/operators/BaseTableDescriptor.java index 1830d1c..dd47af2 100644 --- a/samza-core/src/main/java/org/apache/samza/operators/BaseTableDescriptor.java +++ b/samza-core/src/main/java/org/apache/samza/operators/BaseTableDescriptor.java @@ -45,7 +45,7 @@ abstract public class BaseTableDescriptor<K, V, D extends BaseTableDescriptor<K, /** * Constructs a table descriptor instance - * @param tableId Id of the table, it must confirm to pattern { @literal [\\d\\w-_]+ } + * @param tableId Id of the table, it must conform to pattern {@literal [\\d\\w-_]+} */ protected BaseTableDescriptor(String tableId) { this.tableId = tableId; @@ -53,7 +53,7 @@ abstract public class BaseTableDescriptor<K, V, D extends BaseTableDescriptor<K, /** * Constructs a table descriptor instance - * @param tableId Id of the table, it must confirm to pattern { @literal [\\d\\w-_]+ } + * @param tableId Id of the table, it must conform to pattern {@literal [\\d\\w-_]+} * @param serde the serde for key and value */ protected BaseTableDescriptor(String tableId, KVSerde<K, V> serde) { @@ -61,12 +61,18 @@ abstract public class BaseTableDescriptor<K, V, D extends BaseTableDescriptor<K, this.serde = serde; } + /** + * {@inheritDoc} + */ @Override public D withConfig(String key, String value) { config.put(key, value); return (D) this; } + /** + * {@inheritDoc} + */ @Override public String getTableId() { return tableId; http://git-wip-us.apache.org/repos/asf/samza/blob/cc490ea8/samza-core/src/main/java/org/apache/samza/table/remote/RemoteTableDescriptor.java ---------------------------------------------------------------------- diff --git a/samza-core/src/main/java/org/apache/samza/table/remote/RemoteTableDescriptor.java b/samza-core/src/main/java/org/apache/samza/table/remote/RemoteTableDescriptor.java index c31348f..fe01e6b 100644 --- a/samza-core/src/main/java/org/apache/samza/table/remote/RemoteTableDescriptor.java +++ b/samza-core/src/main/java/org/apache/samza/table/remote/RemoteTableDescriptor.java @@ -81,7 +81,7 @@ public class RemoteTableDescriptor<K, V> extends BaseTableDescriptor<K, V, Remot /** * Constructs a table descriptor instance - * @param tableId Id of the table, it must confirm to pattern { @literal [\\d\\w-_]+ } + * @param tableId Id of the table, it must conform to pattern {@literal [\\d\\w-_]+} */ public RemoteTableDescriptor(String tableId) { super(tableId); @@ -89,7 +89,7 @@ public class RemoteTableDescriptor<K, V> extends BaseTableDescriptor<K, V, Remot /** * Constructs a table descriptor instance - * @param tableId Id of the table, it must confirm to pattern { @literal [\\d\\w-_]+ } + * @param tableId Id of the table, it must conform to pattern {@literal [\\d\\w-_]+} * @param serde the serde for key and value */ public RemoteTableDescriptor(String tableId, KVSerde<K, V> serde) { http://git-wip-us.apache.org/repos/asf/samza/blob/cc490ea8/samza-kv-inmemory/src/main/java/org/apache/samza/storage/kv/inmemory/InMemoryTableDescriptor.java ---------------------------------------------------------------------- diff --git a/samza-kv-inmemory/src/main/java/org/apache/samza/storage/kv/inmemory/InMemoryTableDescriptor.java b/samza-kv-inmemory/src/main/java/org/apache/samza/storage/kv/inmemory/InMemoryTableDescriptor.java index d364234..d052c99 100644 --- a/samza-kv-inmemory/src/main/java/org/apache/samza/storage/kv/inmemory/InMemoryTableDescriptor.java +++ b/samza-kv-inmemory/src/main/java/org/apache/samza/storage/kv/inmemory/InMemoryTableDescriptor.java @@ -36,7 +36,7 @@ public class InMemoryTableDescriptor<K, V> extends BaseLocalStoreBackedTableDesc /** * Constructs a table descriptor instance - * @param tableId Id of the table, it must confirm to pattern { @literal [\\d\\w-_]+ } + * @param tableId Id of the table, it must conform to pattern {@literal [\\d\\w-_]+} */ public InMemoryTableDescriptor(String tableId) { super(tableId); @@ -44,7 +44,7 @@ public class InMemoryTableDescriptor<K, V> extends BaseLocalStoreBackedTableDesc /** * Constructs a table descriptor instance - * @param tableId Id of the table, it must confirm to pattern { @literal [\\d\\w-_]+ } + * @param tableId Id of the table, it must conform to pattern {@literal [\\d\\w-_]+} * @param serde the serde for key and value */ public InMemoryTableDescriptor(String tableId, KVSerde<K, V> serde) { http://git-wip-us.apache.org/repos/asf/samza/blob/cc490ea8/samza-kv-rocksdb/src/main/java/org/apache/samza/storage/kv/RocksDbTableDescriptor.java ---------------------------------------------------------------------- diff --git a/samza-kv-rocksdb/src/main/java/org/apache/samza/storage/kv/RocksDbTableDescriptor.java b/samza-kv-rocksdb/src/main/java/org/apache/samza/storage/kv/RocksDbTableDescriptor.java index 325d023..50bc2c2 100644 --- a/samza-kv-rocksdb/src/main/java/org/apache/samza/storage/kv/RocksDbTableDescriptor.java +++ b/samza-kv-rocksdb/src/main/java/org/apache/samza/storage/kv/RocksDbTableDescriptor.java @@ -59,7 +59,7 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr /** * Constructs a table descriptor instance - * @param tableId Id of the table, it must confirm to pattern { @literal [\\d\\w-_]+ } + * @param tableId Id of the table, it must conform to pattern {@literal [\\d\\w-_]+} */ public RocksDbTableDescriptor(String tableId) { super(tableId); @@ -67,7 +67,7 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr /** * Constructs a table descriptor instance - * @param tableId Id of the table, it must confirm to pattern { @literal [\\d\\w-_]+ } + * @param tableId Id of the table, it must conform to pattern {@literal [\\d\\w-_]+} * @param serde the serde for key and value */ public RocksDbTableDescriptor(String tableId, KVSerde<K, V> serde) { @@ -75,7 +75,16 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr } /** + * For better write performance, the storage engine buffers writes and applies them to the + * underlying store in a batch. If the same key is written multiple times in quick succession, + * this buffer also deduplicates writes to the same key. This property is set to the number + * of key/value pairs that should be kept in this in-memory buffer, per task instance. + * The number cannot be greater than {@link #withObjectCacheSize}. + * <p> + * Default value is 500. + * <p> * Refer to <code>stores.store-name.write.batch.size</code> in Samza configuration guide + * * @param writeBatchSize write batch size * @return this table descriptor instance */ @@ -85,7 +94,17 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr } /** + * Samza maintains an additional cache in front of RocksDB for frequently-accessed objects. + * This cache contains deserialized objects (avoiding the deserialization overhead on cache + * hits), in contrast to the RocksDB block cache ({@link #withCacheSize}), which caches + * serialized objects. This property determines the number of objects to keep in Samza's + * cache, per task instance. This same cache is also used for write buffering + * (see {@link #withWriteBatchSize}). A value of 0 disables all caching and batching. + * <p> + * Default value is 1,000. + * <p> * Refer to <code>stores.store-name.object.cache.size</code> in Samza configuration guide + * * @param objectCacheSize the object cache size * @return this table descriptor instance */ @@ -95,7 +114,15 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr } /** + * The size of RocksDB's block cache in bytes, per container. If there are several task + * instances within one container, each is given a proportional share of this cache. + * Note that this is an off-heap memory allocation, so the container's total memory + * use is the maximum JVM heap size plus the size of this cache. + * <p> + * Default value is 104,857,600. + * <p> * Refer to <code>stores.store-name.container.cache.size.bytes</code> in Samza configuration guide + * * @param cacheSize the cache size in bytes * @return this table descriptor instance */ @@ -105,7 +132,15 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr } /** + * The amount of memory (in bytes) that RocksDB uses for buffering writes before they are + * written to disk, per container. If there are several task instances within one container, + * each is given a proportional share of this buffer. This setting also determines the + * size of RocksDB's segment files. + * <p> + * Default value is 33,554,432. + * <p> * Refer to <code>stores.store-name.container.write.buffer.size.bytes</code> in Samza configuration guide + * * @param writeBufferSize the write buffer size in bytes * @return this table descriptor instance */ @@ -115,7 +150,21 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr } /** + * Controls whether RocksDB should compress data on disk and in the block cache. + * The following values are valid: + * <ul> + * <li><b>snappy</b> Compress data using the <a href="https://code.google.com/p/snappy/">Snappy</a> codec. + * <li><b>bzip2</b> Compress data using the <a href="http://en.wikipedia.org/wiki/Bzip2">bzip2</a> codec. + * <li><b>zlib</b> Compress data using the <a href="http://en.wikipedia.org/wiki/Zlib">zlib</a> codec. + * <li><b>lz4</b> Compress data using the <a href="https://code.google.com/p/lz4/">lz4</a> codec. + * <li><b>lz4hc</b> Compress data using the <a href="https://code.google.com/p/lz4/">lz4hc</a> (high compression) codec. + * <li><b>none</b> Do not compress data. + * </ul> + * <p> + * Default value is snappy. + * <p> * Refer to <code>stores.store-name.rocksdb.compression</code> in Samza configuration guide + * * @param compressionType the compression type * @return this table descriptor instance */ @@ -125,7 +174,13 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr } /** + * If compression is enabled, RocksDB groups approximately this many uncompressed + * bytes into one compressed block. You probably don't need to change this property. + * <p> + * Default value is 4,096. + * <p> * Refer to <code>stores.store-name.rocksdb.block.size.bytes</code> in Samza configuration guide + * * @param blockSize the block size in bytes * @return this table descriptor instance */ @@ -135,7 +190,14 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr } /** + * The time-to-live of the store. Please note it's not a strict TTL limit (removed + * only after compaction). Please use caution opening a database with and without + * TTL, as it might corrupt the database. Please make sure to read the + * <a href="https://github.com/facebook/rocksdb/wiki/Time-to-Live">constraints</a> + * before using. + * <p> * Refer to <code>stores.store-name.rocksdb.ttl.ms</code> in Samza configuration guide + * * @param ttl the time to live in milliseconds * @return this table descriptor instance */ @@ -145,7 +207,18 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr } /** + * This property controls the compaction style that RocksDB will employ when compacting + * its levels. The following values are valid: + * <ul> + * <li><b>universal</b> Use <a href="https://github.com/facebook/rocksdb/wiki/Universal-Compaction">universal</a> compaction. + * <li><b>fifo</b> Use <a href="https://github.com/facebook/rocksdb/wiki/FIFO-compaction-style">FIFO</a> compaction. + * <li><b>level</b> Use RocksDB's standard leveled compaction. + * </ul> + * <p> + * Default value is universal. + * <p> * Refer to <code>stores.store-name.rocksdb.compaction.style</code> in Samza configuration guide + * * @param compactionStyle the compaction style * @return this table descriptor instance */ @@ -155,7 +228,16 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr } /** + * Configures the + * <a href="https://github.com/facebook/rocksdb/wiki/Basic-Operations#write-buffer"> + * number of write buffers</a> that a RocksDB store uses. This allows RocksDB + * to continue taking writes to other buffers even while a given write buffer is being + * flushed to disk. + * <p> + * Default value is 3. + * <p> * Refer to <code>stores.store-name.rocksdb.num.write.buffers</code> in Samza configuration guide + * * @param numWriteBuffers the number of write buffers * @return this table descriptor instance */ @@ -165,7 +247,12 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr } /** + * The maximum size in bytes of the RocksDB LOG file before it is rotated. + * <p> + * Default value is 67,108,864. + * <p> * Refer to <code>stores.store-name.rocksdb.max.log.file.size.bytes</code> in Samza configuration guide + * * @param maxLogFileSize the maximal log file size in bytes * @return this table descriptor instance */ @@ -175,7 +262,12 @@ public class RocksDbTableDescriptor<K, V> extends BaseLocalStoreBackedTableDescr } /** - * Refer to <code>stores.store-name.rocksdb.num.write.buffers</code> in Samza configuration guide + * The number of RocksDB LOG files (including rotated LOG.old.* files) to keep. + * <p> + * Default value is 2. + * <p> + * Refer to <code>stores.store-name.rocksdb.keep.log.file.num</code> in Samza configuration guide + * * @param numLogFilesToKeep the number of log files to keep * @return this table descriptor instance */ http://git-wip-us.apache.org/repos/asf/samza/blob/cc490ea8/samza-kv/src/main/java/org/apache/samza/storage/kv/BaseLocalStoreBackedTableDescriptor.java ---------------------------------------------------------------------- diff --git a/samza-kv/src/main/java/org/apache/samza/storage/kv/BaseLocalStoreBackedTableDescriptor.java b/samza-kv/src/main/java/org/apache/samza/storage/kv/BaseLocalStoreBackedTableDescriptor.java index 96057d6..84e5fbe 100644 --- a/samza-kv/src/main/java/org/apache/samza/storage/kv/BaseLocalStoreBackedTableDescriptor.java +++ b/samza-kv/src/main/java/org/apache/samza/storage/kv/BaseLocalStoreBackedTableDescriptor.java @@ -50,7 +50,7 @@ abstract public class BaseLocalStoreBackedTableDescriptor<K, V, D extends BaseLo /** * Constructs a table descriptor instance - * @param tableId Id of the table, it must confirm to pattern { @literal [\\d\\w-_]+ } + * @param tableId Id of the table, it must conform to pattern {@literal [\\d\\w-_]+} */ public BaseLocalStoreBackedTableDescriptor(String tableId) { super(tableId); @@ -58,7 +58,7 @@ abstract public class BaseLocalStoreBackedTableDescriptor<K, V, D extends BaseLo /** * Constructs a table descriptor instance - * @param tableId Id of the table, it must confirm to pattern { @literal [\\d\\w-_]+ } + * @param tableId Id of the table, it must conform to pattern {@literal [\\d\\w-_]+} * @param serde the serde for key and value */ public BaseLocalStoreBackedTableDescriptor(String tableId, KVSerde<K, V> serde) { @@ -82,7 +82,7 @@ abstract public class BaseLocalStoreBackedTableDescriptor<K, V, D extends BaseLo /** * Enable changelog for this table, by default changelog is disabled. When the * changelog stream name is not specified, it is automatically generated in - * the format { @literal [job-name]-[job-id]-table-[table-id] }. + * the format {@literal [job-name]-[job-id]-table-[table-id]}. * Refer to <code>stores.store-name.changelog</code> in Samza configuration guide * * @return this table descriptor instance @@ -93,6 +93,16 @@ abstract public class BaseLocalStoreBackedTableDescriptor<K, V, D extends BaseLo } /** + * Samza stores are local to a container. If the container fails, the contents of + * the store are lost. To prevent loss of data, you need to set this property to + * configure a changelog stream: Samza then ensures that writes to the store are + * replicated to this stream, and the store is restored from this stream after a + * failure. The value of this property is given in the form system-name.stream-name. + * The "system-name" part is optional. If it is omitted you must specify the system + * in <code>job.changelog.system</code> config. Any output stream can be used as + * changelog, but you must ensure that only one job ever writes to a given changelog + * stream (each instance of a job and each store needs its own changelog stream). + * <p> * Refer to <code>stores.store-name.changelog</code> in Samza configuration guide * * @param changelogStream changelog stream name @@ -105,6 +115,10 @@ abstract public class BaseLocalStoreBackedTableDescriptor<K, V, D extends BaseLo } /** + * The property defines the number of replicas to use for the change log stream. + * <p> + * Default value is <code>stores.default.changelog.replication.factor</code>. + * <p> * Refer to <code>stores.store-name.changelog.replication.factor</code> in Samza configuration guide * * @param replicationFactor replication factor
