[beam] 01/01: Update python containers to beam-master-20190605
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit 7c2eb17f46c105cded5a0035a4d8f439998f563d Merge: 2b74933 8097972 Author: Lukasz Cwik AuthorDate: Thu Jun 6 09:55:42 2019 -0700 Update python containers to beam-master-20190605 sdks/python/apache_beam/runners/dataflow/internal/names.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
[beam] branch master updated (2b74933 -> 7c2eb17)
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 2b74933 Merge pull request #8748 from ttanay/it-streaming-inserts add 8097972 Update python containers to beam-master-20190605 new 7c2eb17 Update python containers to beam-master-20190605 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: sdks/python/apache_beam/runners/dataflow/internal/names.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
[beam] branch master updated: [BEAM-6356] Add the option to use TFRecord to store cache results using PCollection's PCoder (#8687)
This is an automated email from the ASF dual-hosted git repository. altay pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new ae3a1f2 [BEAM-6356] Add the option to use TFRecord to store cache results using PCollection's PCoder (#8687) ae3a1f2 is described below commit ae3a1f23e2b59c0cad743de416aa3809516f9554 Author: Alexey Strokach AuthorDate: Thu Jun 6 12:16:20 2019 -0700 [BEAM-6356] Add the option to use TFRecord to store cache results using PCollection's PCoder (#8687) * Use TFRecord to store intermediate cache results using PCollection's PCoder. * Add optional support for TFRecord as a cache serialization format * Rename _Reader and _Writer to _reader_class and _writer_class * Clarify the return type of the CacheManager.read method --- .../runners/interactive/cache_manager.py | 88 ++ .../runners/interactive/cache_manager_test.py | 36 +++-- .../runners/interactive/interactive_runner.py | 9 ++- 3 files changed, 113 insertions(+), 20 deletions(-) diff --git a/sdks/python/apache_beam/runners/interactive/cache_manager.py b/sdks/python/apache_beam/runners/interactive/cache_manager.py index e8816fe..20d84e3 100644 --- a/sdks/python/apache_beam/runners/interactive/cache_manager.py +++ b/sdks/python/apache_beam/runners/interactive/cache_manager.py @@ -28,6 +28,8 @@ import urllib import apache_beam as beam from apache_beam import coders from apache_beam.io import filesystems +from apache_beam.io import textio +from apache_beam.io import tfrecordio from apache_beam.transforms import combiners try:# Python 3 @@ -62,9 +64,12 @@ class CacheManager(object): def read(self, *labels): """Return the PCollection as a list as well as the version number. +Args: + *labels: List of labels for PCollection instance. + Returns: - (List[PCollection]) - (int) the version number + Tuple[List[Any], int]: A tuple containing a list of items in the +PCollection and the version number. It is possible that the version numbers from read() and_latest_version() are different. This usually means that the cache's been evicted (thus @@ -81,6 +86,25 @@ class CacheManager(object): """Returns a beam.io.Sink that writes the PCollection cache.""" raise NotImplementedError + def save_pcoder(self, pcoder, *labels): +"""Saves pcoder for given PCollection. + +Correct reading of PCollection from Cache requires PCoder to be known. +This method saves desired PCoder for PCollection that will subsequently +be used by sink(...), source(...), and, most importantly, read(...) method. +The latter must be able to read a PCollection written by Beam using +non-Beam IO. + +Args: + pcoder: A PCoder to be used for reading and writing a PCollection. + *labels: List of labels for PCollection instance. +""" +raise NotImplementedError + + def load_pcoder(self, *labels): +"""Returns previously saved PCoder for reading and writing PCollection.""" +raise NotImplementedError + def cleanup(self): """Cleans up all the PCollection caches.""" raise NotImplementedError @@ -89,7 +113,12 @@ class CacheManager(object): class FileBasedCacheManager(CacheManager): """Maps PCollections to local temp files for materialization.""" - def __init__(self, cache_dir=None): + _available_formats = { + 'text': (textio.ReadFromText, textio.WriteToText), + 'tfrecord': (tfrecordio.ReadFromTFRecord, tfrecordio.WriteToTFRecord) + } + + def __init__(self, cache_dir=None, cache_format='text'): if cache_dir: self._cache_dir = filesystems.FileSystems.join( cache_dir, @@ -99,6 +128,25 @@ class FileBasedCacheManager(CacheManager): prefix='interactive-temp-', dir=os.environ.get('TEST_TMPDIR', None)) self._versions = collections.defaultdict(lambda: self._CacheVersion()) +if cache_format not in self._available_formats: + raise ValueError("Unsupported cache format: '%s'." % cache_format) +self._reader_class, self._writer_class = self._available_formats[ +cache_format] +self._default_pcoder = ( +SafeFastPrimitivesCoder() if cache_format == 'text' else None) + +# List of saved pcoders keyed by PCollection path. It is OK to keep this +# list in memory because once FileBasedCacheManager object is +# destroyed/re-created it loses the access to previously written cache +# objects anyways even if cache_dir already exists. In other words, +# it is not possible to resume execution of Beam pipeline from the +# saved cache if FileBasedCacheManager has been reset. +# +# However, if we are to implement better cache persistence, one needs +# to take care of keeping consistency between the cached PCollection +# and its PCoder
[beam] 01/01: Merge pull request #8777: [BEAM-7493] Use artifactId as archivesBaseName for vendor modules
This is an automated email from the ASF dual-hosted git repository. kenn pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit 907d26941f469f470291914596389a6c4187b4b5 Merge: ae3a1f2 e8ecea0 Author: Kenn Knowles AuthorDate: Thu Jun 6 12:57:37 2019 -0700 Merge pull request #8777: [BEAM-7493] Use artifactId as archivesBaseName for vendor modules .../src/main/groovy/org/apache/beam/gradle/VendorJavaPlugin.groovy | 3 +++ 1 file changed, 3 insertions(+)
[beam] branch master updated (ae3a1f2 -> 907d269)
This is an automated email from the ASF dual-hosted git repository. kenn pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from ae3a1f2 [BEAM-6356] Add the option to use TFRecord to store cache results using PCollection's PCoder (#8687) add e8ecea0 Use artifactId as archivesBaseName for vendor modules new 907d269 Merge pull request #8777: [BEAM-7493] Use artifactId as archivesBaseName for vendor modules The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../src/main/groovy/org/apache/beam/gradle/VendorJavaPlugin.groovy | 3 +++ 1 file changed, 3 insertions(+)
[beam] 01/01: Merge pull request #8736: Perform some simple clean-up for the tornadoes demo
This is an automated email from the ASF dual-hosted git repository. chamikara pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit 3928a55ca71fe8b6d4cd8ae87639b965aaad51f0 Merge: 907d269 6315d6f Author: Chamikara Jayalath AuthorDate: Thu Jun 6 13:35:24 2019 -0700 Merge pull request #8736: Perform some simple clean-up for the tornadoes demo .../beam/examples/cookbook/BigQueryTornadoes.java | 44 -- 1 file changed, 24 insertions(+), 20 deletions(-)
[beam] branch master updated (907d269 -> 3928a55)
This is an automated email from the ASF dual-hosted git repository. chamikara pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 907d269 Merge pull request #8777: [BEAM-7493] Use artifactId as archivesBaseName for vendor modules add 6315d6f Performs some simple clean-up for the tornadoes demo. new 3928a55 Merge pull request #8736: Perform some simple clean-up for the tornadoes demo The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../beam/examples/cookbook/BigQueryTornadoes.java | 44 -- 1 file changed, 24 insertions(+), 20 deletions(-)
[beam] branch master updated (3928a55 -> 9f46698)
This is an automated email from the ASF dual-hosted git repository. kenn pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 3928a55 Merge pull request #8736: Perform some simple clean-up for the tornadoes demo add af9c5dc Add explicit archivesBaseName to applyJavaNature and applyPortabilityNature add 9f46698 Merge pull request #8767: [BEAM-7493] Use specified archivesBaseName for project dependencies No new revisions were added by this update. Summary of changes: .../org/apache/beam/gradle/BeamModulePlugin.groovy | 23 +++--- runners/flink/flink_runner.gradle | 5 +++-- runners/flink/job-server/flink_job_server.gradle | 2 +- .../google-cloud-dataflow-java/worker/build.gradle | 3 +-- .../worker/legacy-worker/build.gradle | 3 +-- .../worker/windmill/build.gradle | 6 -- runners/local-java/build.gradle| 6 +++--- sdks/java/extensions/jackson/build.gradle | 5 +++-- .../elasticsearch-tests-2/build.gradle | 5 +++-- .../elasticsearch-tests-5/build.gradle | 5 +++-- .../elasticsearch-tests-6/build.gradle | 5 +++-- .../elasticsearch-tests-common/build.gradle| 5 +++-- sdks/java/testing/load-tests/build.gradle | 6 -- sdks/java/testing/nexmark/build.gradle | 7 +-- sdks/java/testing/test-utils/build.gradle | 6 -- 15 files changed, 61 insertions(+), 31 deletions(-)
[beam] branch master updated (9f46698 -> d783082)
This is an automated email from the ASF dual-hosted git repository. kenn pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 9f46698 Merge pull request #8767: [BEAM-7493] Use specified archivesBaseName for project dependencies add 2305904 TfIdfTest should not be marked ValidatesRunner add d783082 : [BEAM-7490] TfIdfTest should not be marked ValidatesRunner from kennknowles/tfidf No new revisions were added by this update. Summary of changes: .../src/test/java/org/apache/beam/examples/complete/TfIdfTest.java | 3 --- 1 file changed, 3 deletions(-)
[beam] branch master updated: [BEAM-7499] Skip test since it doesn't work on DirectRunner.
This is an automated email from the ASF dual-hosted git repository. pabloem pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 6ffd60e [BEAM-7499] Skip test since it doesn't work on DirectRunner. new 67e3c34 Merge pull request #8779 from lukecwik/beam7499 6ffd60e is described below commit 6ffd60e2112d780d65aa6080f2a49572618d4e81 Author: Luke Cwik AuthorDate: Thu Jun 6 13:33:52 2019 -0700 [BEAM-7499] Skip test since it doesn't work on DirectRunner. --- sdks/python/apache_beam/transforms/util_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdks/python/apache_beam/transforms/util_test.py b/sdks/python/apache_beam/transforms/util_test.py index 3a18487..b655f66 100644 --- a/sdks/python/apache_beam/transforms/util_test.py +++ b/sdks/python/apache_beam/transforms/util_test.py @@ -485,6 +485,7 @@ class ReifyTest(unittest.TestCase): reified_pc = pc | util.Reify.Timestamp() assert_that(reified_pc, equal_to(expected), reify_windows=True) + @unittest.skip('BEAM-7499') def test_window(self): l = [GlobalWindows.windowed_value('a', 100), GlobalWindows.windowed_value('b', 200),
[beam] branch master updated (67e3c34 -> b26e622)
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from 67e3c34 Merge pull request #8779 from lukecwik/beam7499 add 1de0517 [BEAM-6620, BEAM-3606] Stop shading by default. add 44b042e fixup! Fix sdks/java/container Kafka dependency add 163dbe0 fixup! Fixup JDBC gradle build to use shaded configuration for shaded project. add 2c95ac1 Fix Spark ValidatesRunner since the shadowTest classpath before didn't include testCompile dependencies as they were expected to be shaded into the test jar which hadoop-format wasn't doing. add 8232336 fixup! Fix publishing to use the correct configuration property, also ensure that the jar classifier when not shading is unspecified. add e363d8c Merge branch 'master' into beam3606 new b26e622 [BEAM-6620, BEAM-3606] Stop shading by default. The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../org/apache/beam/gradle/BeamModulePlugin.groovy | 272 - examples/java/build.gradle | 56 ++--- examples/kotlin/build.gradle | 56 ++--- runners/apex/build.gradle | 34 +-- runners/core-construction-java/build.gradle| 34 +-- runners/core-java/build.gradle | 30 +-- runners/direct-java/build.gradle | 12 +- runners/extensions-java/metrics/build.gradle | 12 +- runners/flink/flink_runner.gradle | 68 +++--- runners/flink/job-server/flink_job_server.gradle | 12 +- runners/gearpump/build.gradle | 34 +-- runners/google-cloud-dataflow-java/build.gradle| 76 +++--- .../examples-streaming/build.gradle| 6 +- .../examples/build.gradle | 6 +- .../google-cloud-dataflow-java/worker/build.gradle | 18 +- .../worker/legacy-worker/build.gradle | 31 ++- runners/java-fn-execution/build.gradle | 28 +-- runners/jet-experimental/build.gradle | 36 +-- runners/local-java/build.gradle| 10 +- runners/reference/java/build.gradle| 20 +- runners/samza/build.gradle | 56 ++--- runners/samza/job-server/build.gradle | 4 +- runners/spark/build.gradle | 53 ++-- runners/spark/job-server/build.gradle | 10 +- sdks/java/container/build.gradle | 2 +- sdks/java/extensions/euphoria/build.gradle | 8 +- .../google-cloud-platform-core/build.gradle| 30 +-- sdks/java/extensions/jackson/build.gradle | 8 +- sdks/java/extensions/join-library/build.gradle | 6 +- sdks/java/extensions/protobuf/build.gradle | 6 +- sdks/java/extensions/sketching/build.gradle| 24 +- sdks/java/extensions/sorter/build.gradle | 6 +- sdks/java/extensions/sql/build.gradle | 17 +- sdks/java/extensions/sql/datacatalog/build.gradle | 12 +- sdks/java/extensions/sql/hcatalog/build.gradle | 6 +- sdks/java/extensions/sql/jdbc/build.gradle | 2 +- sdks/java/fn-execution/build.gradle| 14 +- sdks/java/harness/build.gradle | 9 +- sdks/java/io/amazon-web-services/build.gradle | 38 +-- sdks/java/io/amqp/build.gradle | 10 +- sdks/java/io/cassandra/build.gradle| 32 +-- sdks/java/io/clickhouse/build.gradle | 8 +- sdks/java/io/common/build.gradle | 4 +- .../elasticsearch-tests-2/build.gradle | 10 +- .../elasticsearch-tests-5/build.gradle | 8 +- .../elasticsearch-tests-6/build.gradle | 8 +- .../elasticsearch-tests-common/build.gradle| 6 +- sdks/java/io/elasticsearch/build.gradle| 20 +- sdks/java/io/file-based-io-tests/build.gradle | 18 +- sdks/java/io/google-cloud-platform/build.gradle| 100 sdks/java/io/hadoop-common/build.gradle| 2 +- sdks/java/io/hadoop-file-system/build.gradle | 12 +- sdks/java/io/hadoop-format/build.gradle| 30 +-- sdks/java/io/hbase/build.gradle| 14 +- sdks/java/io/hcatalog/build.gradle | 10 +- sdks/java/io/jdbc/build.gradle | 18 +- sdks/java/io/jms/build.gradle | 12 +- sdks/java/io/kafka/build.gradle| 18 +- sdks/java/io/kinesis/build.gradle | 34 +-- sdks/java/io/kudu/build.gradle | 12 +- sdks/java/io/mongodb/build.gradle | 16 +- sdks/java/io/mqtt/
[beam] 01/01: [BEAM-6620, BEAM-3606] Stop shading by default.
This is an automated email from the ASF dual-hosted git repository. lcwik pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit b26e6227c3e41028ed5d6a671e51d2df38e43576 Merge: 67e3c34 e363d8c Author: Lukasz Cwik AuthorDate: Thu Jun 6 15:31:30 2019 -0700 [BEAM-6620, BEAM-3606] Stop shading by default. .../org/apache/beam/gradle/BeamModulePlugin.groovy | 272 - examples/java/build.gradle | 56 ++--- examples/kotlin/build.gradle | 56 ++--- runners/apex/build.gradle | 34 +-- runners/core-construction-java/build.gradle| 34 +-- runners/core-java/build.gradle | 30 +-- runners/direct-java/build.gradle | 12 +- runners/extensions-java/metrics/build.gradle | 12 +- runners/flink/flink_runner.gradle | 68 +++--- runners/flink/job-server/flink_job_server.gradle | 12 +- runners/gearpump/build.gradle | 34 +-- runners/google-cloud-dataflow-java/build.gradle| 76 +++--- .../examples-streaming/build.gradle| 6 +- .../examples/build.gradle | 6 +- .../google-cloud-dataflow-java/worker/build.gradle | 18 +- .../worker/legacy-worker/build.gradle | 31 ++- runners/java-fn-execution/build.gradle | 28 +-- runners/jet-experimental/build.gradle | 36 +-- runners/local-java/build.gradle| 10 +- runners/reference/java/build.gradle| 20 +- runners/samza/build.gradle | 56 ++--- runners/samza/job-server/build.gradle | 4 +- runners/spark/build.gradle | 53 ++-- runners/spark/job-server/build.gradle | 10 +- sdks/java/container/build.gradle | 2 +- sdks/java/extensions/euphoria/build.gradle | 8 +- .../google-cloud-platform-core/build.gradle| 30 +-- sdks/java/extensions/jackson/build.gradle | 8 +- sdks/java/extensions/join-library/build.gradle | 6 +- sdks/java/extensions/protobuf/build.gradle | 6 +- sdks/java/extensions/sketching/build.gradle| 24 +- sdks/java/extensions/sorter/build.gradle | 6 +- sdks/java/extensions/sql/build.gradle | 17 +- sdks/java/extensions/sql/datacatalog/build.gradle | 12 +- sdks/java/extensions/sql/hcatalog/build.gradle | 6 +- sdks/java/extensions/sql/jdbc/build.gradle | 2 +- sdks/java/fn-execution/build.gradle| 14 +- sdks/java/harness/build.gradle | 9 +- sdks/java/io/amazon-web-services/build.gradle | 38 +-- sdks/java/io/amqp/build.gradle | 10 +- sdks/java/io/cassandra/build.gradle| 32 +-- sdks/java/io/clickhouse/build.gradle | 8 +- sdks/java/io/common/build.gradle | 4 +- .../elasticsearch-tests-2/build.gradle | 10 +- .../elasticsearch-tests-5/build.gradle | 8 +- .../elasticsearch-tests-6/build.gradle | 8 +- .../elasticsearch-tests-common/build.gradle| 6 +- sdks/java/io/elasticsearch/build.gradle| 20 +- sdks/java/io/file-based-io-tests/build.gradle | 18 +- sdks/java/io/google-cloud-platform/build.gradle| 100 sdks/java/io/hadoop-common/build.gradle| 2 +- sdks/java/io/hadoop-file-system/build.gradle | 12 +- sdks/java/io/hadoop-format/build.gradle| 30 +-- sdks/java/io/hbase/build.gradle| 14 +- sdks/java/io/hcatalog/build.gradle | 10 +- sdks/java/io/jdbc/build.gradle | 18 +- sdks/java/io/jms/build.gradle | 12 +- sdks/java/io/kafka/build.gradle| 18 +- sdks/java/io/kinesis/build.gradle | 34 +-- sdks/java/io/kudu/build.gradle | 12 +- sdks/java/io/mongodb/build.gradle | 16 +- sdks/java/io/mqtt/build.gradle | 16 +- sdks/java/io/parquet/build.gradle | 20 +- sdks/java/io/rabbitmq/build.gradle | 12 +- sdks/java/io/redis/build.gradle| 10 +- sdks/java/io/solr/build.gradle | 12 +- sdks/java/io/synthetic/build.gradle| 16 +- sdks/java/io/tika/build.gradle | 8 +- sdks/java/io/xml/build.gradle | 12 +- sdks/java/maven-archetypes/examples/build.gradle | 2 +- sdks/java/maven-archetypes/starter/build.gradle| 4 +- sdks/java/testing/load-tests/build.gradle | 24 +- sdks/java/testing/nexmark/build.gradle | 46 ++-- sdks/java/testing/test-utils/build.gradle | 16 +- 74 files changed, 890 insertions(+), 872 deletions(-)
[beam] 01/01: Merge pull request #8782: Fix archivesBaseName method
This is an automated email from the ASF dual-hosted git repository. kenn pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit fc8dc154ea5678363c59c6e8342dbfb733280b2c Merge: b26e622 0f9d71a Author: Kenn Knowles AuthorDate: Thu Jun 6 15:49:27 2019 -0700 Merge pull request #8782: Fix archivesBaseName method buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
[beam] branch master updated (b26e622 -> fc8dc15)
This is an automated email from the ASF dual-hosted git repository. kenn pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from b26e622 [BEAM-6620, BEAM-3606] Stop shading by default. add 0f9d71a Fix archivesBaseName method new fc8dc15 Merge pull request #8782: Fix archivesBaseName method The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
[beam] branch master updated (fc8dc15 -> 5319373)
This is an automated email from the ASF dual-hosted git repository. chamikara pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from fc8dc15 Merge pull request #8782: Fix archivesBaseName method add 97eb7d5 Fix python io developer guide document errors new 5319373 Merge pull request #8781: Fix python io developer guide document errors The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: sdks/python/apache_beam/examples/snippets/snippets.py | 4 ++-- sdks/python/apache_beam/io/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-)
[beam] 01/01: Merge pull request #8781: Fix python io developer guide document errors
This is an automated email from the ASF dual-hosted git repository. chamikara pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit 5319373f9bc117ed0ddc586b5d44564c569577b5 Merge: fc8dc15 97eb7d5 Author: Chamikara Jayalath AuthorDate: Thu Jun 6 16:08:54 2019 -0700 Merge pull request #8781: Fix python io developer guide document errors sdks/python/apache_beam/examples/snippets/snippets.py | 4 ++-- sdks/python/apache_beam/io/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-)
[beam] branch asf-site updated: Publishing website 2019/06/06 23:10:13 at commit 5319373
This is an automated email from the ASF dual-hosted git repository. git-site-role pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/asf-site by this push: new 40883f2 Publishing website 2019/06/06 23:10:13 at commit 5319373 40883f2 is described below commit 40883f2853a842160212c6b7eb4cadf3ed45cf7d Author: jenkins AuthorDate: Thu Jun 6 23:10:14 2019 + Publishing website 2019/06/06 23:10:13 at commit 5319373 --- .../documentation/io/developing-io-python/index.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/generated-content/documentation/io/developing-io-python/index.html b/website/generated-content/documentation/io/developing-io-python/index.html index 0b5b4d4..48f95c5 100644 --- a/website/generated-content/documentation/io/developing-io-python/index.html +++ b/website/generated-content/documentation/io/developing-io-python/index.html @@ -610,8 +610,8 @@ a wrapper. stop_position = self._count bundle_start = start_position -while bundle_start < self._count: - bundle_stop = max(self._count, bundle_start + desired_bundle_size) +while bundle_start < self.stop_position: + bundle_stop = max(self.stop_position, bundle_start + desired_bundle_size) yield iobase.SourceBundle(weight=(bundle_stop - bundle_start), source=self, start_position=bundle_start,
[beam] branch master updated: Importing hamcrest dependency at matcher runtime
This is an automated email from the ASF dual-hosted git repository. pabloem pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new c8631cb Importing hamcrest dependency at matcher runtime new 37dc031 Merge pull request #8778 from pabloem/hamcrest-in-matcher c8631cb is described below commit c8631cb7cedeb3fd6313e8e92525740af1e930cf Author: pabloem AuthorDate: Thu Jun 6 13:12:26 2019 -0700 Importing hamcrest dependency at matcher runtime --- sdks/python/apache_beam/testing/util.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/testing/util.py b/sdks/python/apache_beam/testing/util.py index 2e38d37..32c16db 100644 --- a/sdks/python/apache_beam/testing/util.py +++ b/sdks/python/apache_beam/testing/util.py @@ -25,9 +25,6 @@ import io import tempfile from builtins import object -from hamcrest.core import assert_that as hamcrest_assert -from hamcrest.library.collection import contains_inanyorder - from apache_beam import pvalue from apache_beam.transforms import window from apache_beam.transforms.core import Create @@ -154,6 +151,8 @@ def matches_all(expected): the elements of a single PCollection. """ def _matches(actual): +from hamcrest.core import assert_that as hamcrest_assert +from hamcrest.library.collection import contains_inanyorder expected_list = list(expected) hamcrest_assert(actual, contains_inanyorder(*expected_list))
[beam] branch master updated: Fix code block formatting in docstring. (#8756)
This is an automated email from the ASF dual-hosted git repository. altay pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new 706fd9c Fix code block formatting in docstring. (#8756) 706fd9c is described below commit 706fd9c010db1249c266c37c87d21d923e264850 Author: rasmi AuthorDate: Thu Jun 6 19:46:29 2019 -0400 Fix code block formatting in docstring. (#8756) * Fix code block formatting in docstring. --- sdks/python/apache_beam/transforms/util.py | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/transforms/util.py b/sdks/python/apache_beam/transforms/util.py index 289d8bf..8b84d31 100644 --- a/sdks/python/apache_beam/transforms/util.py +++ b/sdks/python/apache_beam/transforms/util.py @@ -87,16 +87,17 @@ class CoGroupByKey(PTransform): 'tag2': ... , ... }) - For example, given: + For example, given:: {'tag1': pc1, 'tag2': pc2, 333: pc3} - where: + where:: + pc1 = [(k1, v1)] pc2 = [] pc3 = [(k1, v31), (k1, v32), (k2, v33)] - The output PCollection would be: + The output PCollection would be:: [(k1, {'tag1': [v1], 'tag2': [], 333: [v31, v32]}), (k2, {'tag1': [], 'tag2': [], 333: [v33]})]
[beam] branch master updated: [BEAM-6952] fixes concatenated compressed files bug with python sdk (#8187)
This is an automated email from the ASF dual-hosted git repository. chamikara pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/master by this push: new db82464 [BEAM-6952] fixes concatenated compressed files bug with python sdk (#8187) db82464 is described below commit db824645205c1480656b95ab74e4ea11cc9be344 Author: dlesco AuthorDate: Thu Jun 6 19:59:33 2019 -0400 [BEAM-6952] fixes concatenated compressed files bug with python sdk (#8187) --- sdks/python/apache_beam/io/filesystem.py | 29 --- sdks/python/apache_beam/io/filesystem_test.py | 73 +++ 2 files changed, 83 insertions(+), 19 deletions(-) diff --git a/sdks/python/apache_beam/io/filesystem.py b/sdks/python/apache_beam/io/filesystem.py index efc745a..4df3e01 100644 --- a/sdks/python/apache_beam/io/filesystem.py +++ b/sdks/python/apache_beam/io/filesystem.py @@ -207,34 +207,25 @@ class CompressedFile(object): ) < num_bytes: # Continue reading from the underlying file object until enough bytes are # available, or EOF is reached. - buf = self._file.read(self._read_size) + if not self._decompressor.unused_data: +buf = self._file.read(self._read_size) + else: +# Any uncompressed data at the end of the stream of a gzip or bzip2 +# file that is not corrupted points to a concatenated compressed +# file. We read concatenated files by recursively creating decompressor +# objects for the unused compressed data. +buf = self._decompressor.unused_data +self._initialize_decompressor() if buf: decompressed = self._decompressor.decompress(buf) del buf # Free up some possibly large and no-longer-needed memory. self._read_buffer.write(decompressed) else: # EOF of current stream reached. -# -# Any uncompressed data at the end of the stream of a gzip or bzip2 -# file that is not corrupted points to a concatenated compressed -# file. We read concatenated files by recursively creating decompressor -# objects for the unused compressed data. if (self._compression_type == CompressionTypes.BZIP2 or self._compression_type == CompressionTypes.DEFLATE or self._compression_type == CompressionTypes.GZIP): - if self._decompressor.unused_data != b'': -buf = self._decompressor.unused_data - -if self._compression_type == CompressionTypes.BZIP2: - self._decompressor = bz2.BZ2Decompressor() -elif self._compression_type == CompressionTypes.DEFLATE: - self._decompressor = zlib.decompressobj() -else: - self._decompressor = zlib.decompressobj(self._gzip_mask) - -decompressed = self._decompressor.decompress(buf) -self._read_buffer.write(decompressed) -continue + pass else: # Deflate, Gzip and bzip2 formats do not require flushing # remaining data in the decompressor into the read buffer when diff --git a/sdks/python/apache_beam/io/filesystem_test.py b/sdks/python/apache_beam/io/filesystem_test.py index b26d79d..cb48f37 100644 --- a/sdks/python/apache_beam/io/filesystem_test.py +++ b/sdks/python/apache_beam/io/filesystem_test.py @@ -461,6 +461,79 @@ atomized in instants hammered around the if not line: break + def test_concatenated_compressed_file(self): +# The test apache_beam.io.textio_test.test_read_gzip_concat +# does not encounter the problem in the Beam 2.11 and earlier +# code base because the test data is too small: the data is +# smaller than read_size, so it goes through logic in the code +# that avoids the problem in the code. So, this test sets +# read_size smaller and test data bigger, in order to +# encounter the problem. It would be difficult to test in the +# textio_test module, because you'd need very large test data +# because default read_size is 16MiB, and the ReadFromText +# interface does not allow you to modify the read_size. +import random +import signal +from six import int2byte +num_test_lines = 10 +timeout = 30 +read_size = (64<<10) # set much smaller than the line size +byte_table = tuple(int2byte(i) for i in range(32, 96)) + +def generate_random_line(): + byte_list = list(b + for i in range(4096) + for b in random.sample(byte_table, 64) + ) + byte_list.append(b'\n') + return b''.join(byte_list) + +def create_test_file(compression_type, lines): + filenames = list() + file_name = self._create_temp_file() + if compression_type == CompressionTypes.BZIP2: +compress_factory = bz2.BZ2File + elif compression_ty
[beam] 01/01: Merge pull request #8773: [BEAM-7456] Increase font and wording for click to expand link
This is an automated email from the ASF dual-hosted git repository. melap pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git commit cc5b4dae3193e259b6d59ef9ebed4753e470cf65 Merge: db82464 28c35be Author: Melissa Pashniak AuthorDate: Thu Jun 6 19:02:37 2019 -0700 Merge pull request #8773: [BEAM-7456] Increase font and wording for click to expand link website/src/_includes/capability-matrix.md | 9 - website/src/_sass/capability-matrix.scss | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-)
[beam] branch master updated (db82464 -> cc5b4da)
This is an automated email from the ASF dual-hosted git repository. melap pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/beam.git. from db82464 [BEAM-6952] fixes concatenated compressed files bug with python sdk (#8187) add 28c35be Increase font and wording for click to expand link. new cc5b4da Merge pull request #8773: [BEAM-7456] Increase font and wording for click to expand link The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: website/src/_includes/capability-matrix.md | 9 - website/src/_sass/capability-matrix.scss | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-)
[beam] branch asf-site updated: Publishing website 2019/06/07 02:05:12 at commit cc5b4da
This is an automated email from the ASF dual-hosted git repository. git-site-role pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/beam.git The following commit(s) were added to refs/heads/asf-site by this push: new 54d4615 Publishing website 2019/06/07 02:05:12 at commit cc5b4da 54d4615 is described below commit 54d4615d16729414f61a5b07f9ddf04d68a51bc1 Author: jenkins AuthorDate: Fri Jun 7 02:05:12 2019 + Publishing website 2019/06/07 02:05:12 at commit cc5b4da --- .../capability/2016/03/17/capability-matrix.html | 20 -- website/generated-content/css/site.css | 2 +- .../runners/capability-matrix/index.html | 72 +++--- 3 files changed, 81 insertions(+), 13 deletions(-) diff --git a/website/generated-content/beam/capability/2016/03/17/capability-matrix.html b/website/generated-content/beam/capability/2016/03/17/capability-matrix.html index 1215f22..95e7194 100644 --- a/website/generated-content/beam/capability/2016/03/17/capability-matrix.html +++ b/website/generated-content/beam/capability/2016/03/17/capability-matrix.html @@ -224,7 +224,10 @@ limitations under the License. -What is being computed? + + +What is being computed? + @@ -937,7 +940,10 @@ limitations under the License. -Where in event time? + + +Where in event time? + @@ -1496,7 +1502,10 @@ limitations under the License. -When in processing time? + + +When in processing time? + @@ -2132,7 +2141,10 @@ limitations under the License. -How do refinements relate? + + +How do refinements relate? + diff --git a/website/generated-content/css/site.css b/website/generated-content/css/site.css index 27ba7c3..643cccf 100644 --- a/website/generated-content/css/site.css +++ b/website/generated-content/css/site.css @@ -5931,7 +5931,7 @@ div.cap-toggle { text-align: center; cursor: pointer; position: absolute; - font-size: 12px; + font-size: 16px; font-weight: normal; } /** diff --git a/website/generated-content/documentation/runners/capability-matrix/index.html b/website/generated-content/documentation/runners/capability-matrix/index.html index 0fe1b27..8601de7 100644 --- a/website/generated-content/documentation/runners/capability-matrix/index.html +++ b/website/generated-content/documentation/runners/capability-matrix/index.html @@ -263,7 +263,14 @@ limitations under the License. -(expand details)What is being computed? + + + +(click to expand details) + + +What is being computed? + @@ -2509,7 +2516,14 @@ limitations under the License. -(expand details)Where in event time? + + + +(click to expand details) + + +Where in event time? + @@ -4092,7 +4106,14 @@ limitations under the License. -(expand details)When in processing time? + + + +(click to expand details) + + +When in processing time? + @@ -5896,7 +5917,14 @@ limitations under the License. -(expand details)How do refinements relate? + + + +(click to expand details) + + +How do refinements relate? + @@ -6622,7 +6650,14 @@ limitations under the License. -(collapse details)What is being computed? + + + +(click to collapse details) + + +What is being computed? + @@ -8868,7 +8903,14 @@ limitations under the License. -(collapse details)Where in event time? + + + +(click to collapse details) + + +Where in event time? + @@ -10451,7 +10493,14 @@ limitations under the License. -(collapse details)When in processing time? + + + +(click to collapse details) + + +When in processing time? + @@ -12255,7 +12304,14 @@ limitations under the License. -(collapse details)How do refinements relate? + + + +(click to collapse details) + + +How do refinements relate? +