Merge branch 'master' into service_azkaban_orchestrator
Project: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/commit/f423d7da Tree: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/tree/f423d7da Diff: http://git-wip-us.apache.org/repos/asf/incubator-gobblin/diff/f423d7da Branch: refs/heads/master Commit: f423d7da42fe564ef67d4214bf2d31ef2bf1a32c Parents: 9864f69 90be15f Author: Abhishek Tiwari <[email protected]> Authored: Wed Aug 30 00:20:42 2017 -0700 Committer: Abhishek Tiwari <[email protected]> Committed: Wed Aug 30 00:20:42 2017 -0700 ---------------------------------------------------------------------- conf/log4j-compaction.xml | 2 +- conf/log4j-mapreduce.xml | 4 +- config/checkstyle/checkstyle.xml | 16 + config/checkstyle/suppressions.xml | 10 + .../static/js/collections/job-executions.js | 2 +- .../extractor/CheckpointableWatermark.java | 26 +- .../gobblin_scopes/GobblinScopeInstance.java | 3 +- .../gobblin_scopes/GobblinScopeTypes.java | 3 +- .../broker/gobblin_scopes/JobScopeInstance.java | 3 +- .../gobblin_scopes/TaskScopeInstance.java | 3 +- .../broker/iface/SubscopedBrokerBuilder.java | 3 +- .../configuration/ConfigurationKeys.java | 29 ++ .../org/apache/gobblin/dataset/Dataset.java | 12 +- .../apache/gobblin/dataset/DatasetsFinder.java | 1 + .../gobblin/dataset/IterableDatasetFinder.java | 24 + .../gobblin/dataset/PartitionableDataset.java | 61 +++ .../apache/gobblin/dataset/URNIdentified.java | 29 ++ .../URNLexicographicalComparator.java | 53 +++ .../dataset/comparators/package-info.java | 21 + .../dataset/test/SimpleDatasetForTesting.java | 38 ++ .../test/SimpleDatasetPartitionForTesting.java | 35 ++ .../SimplePartitionableDatasetForTesting.java | 56 +++ .../test/StaticDatasetsFinderForTesting.java | 61 +++ .../org/apache/gobblin/fork/CopyHelper.java | 27 +- .../extractor/CheckpointableWatermark.java | 29 +- .../source/extractor/StreamingExtractor.java | 27 +- .../source/workunit/BasicWorkUnitStream.java | 13 +- .../apache/gobblin/writer/WatermarkStorage.java | 27 +- .../java/com/linkedin/gobblin/TestAlias.java | 19 +- .../gobblin/ack/HierarchicalAckableTest.java | 1 - .../gobblin_scopes/GobblinScopesTest.java | 3 +- .../org/apache/gobblin/fork/CopyHelperTest.java | 27 +- .../gobblin/aws/AWSJobConfigurationManager.java | 3 +- .../gobblin/aws/GobblinAWSClusterManager.java | 6 +- .../gobblin/aws/GobblinAWSTaskRunner.java | 6 +- .../gobblin/cluster/GobblinClusterManager.java | 5 +- .../gobblin/cluster/GobblinHelixConstants.java | 26 +- .../apache/gobblin/cluster/GobblinHelixJob.java | 45 +- .../cluster/GobblinHelixJobLauncher.java | 17 +- .../cluster/GobblinHelixMessagingService.java | 16 + .../gobblin/cluster/GobblinHelixTaskDriver.java | 26 +- .../gobblin/cluster/GobblinTaskRunner.java | 12 +- .../apache/helix/task/GobblinJobRebalancer.java | 26 +- .../mapreduce/MRCompactionTaskFactory.java | 16 + .../compaction/verify/CompactionVerifier.java | 16 + .../verify/InputRecordCountHelper.java | 16 + .../mapreduce/RenameSourceDirectoryTest.java | 16 + .../suite/TestCompactionSuiteFactories.java | 16 + .../verify/PinotAuditCountVerifierTest.java | 16 + .../FineGrainedWatermarkTrackerBenchmark.java | 19 +- .../java/org/apache/gobblin/async/Callback.java | 16 + .../gobblin/converter/SamplingConverter.java | 27 +- ...blinTrackingEventFlattenFilterConverter.java | 1 - .../DefaultCheckpointableWatermark.java | 27 +- .../limiter/LimiterConfigurationKeys.java | 16 + .../apache/gobblin/test/AnyToJsonConverter.java | 27 +- .../gobblin/test/AnyToStringConverter.java | 27 +- .../org/apache/gobblin/test/TestRecord.java | 27 +- .../writer/AcknowledgableRecordEnvelope.java | 27 +- .../gobblin/writer/AcknowledgableWatermark.java | 27 +- .../gobblin/writer/AsyncWriterManager.java | 27 +- .../java/org/apache/gobblin/writer/Batch.java | 27 +- .../apache/gobblin/writer/BatchAccumulator.java | 16 + .../gobblin/writer/BatchAsyncDataWriter.java | 27 +- .../gobblin/writer/BufferedAsyncDataWriter.java | 27 +- .../gobblin/writer/BytesBoundedBatch.java | 27 +- .../writer/FineGrainedWatermarkTracker.java | 27 +- .../writer/FutureWrappedWriteCallback.java | 27 +- .../gobblin/writer/GenericWriteResponse.java | 27 +- .../writer/GenericWriteResponseWrapper.java | 27 +- .../gobblin/writer/LastWatermarkTracker.java | 27 +- .../writer/MultiWriterWatermarkManager.java | 27 +- .../writer/MultiWriterWatermarkTracker.java | 27 +- .../org/apache/gobblin/writer/RecordFuture.java | 27 +- .../apache/gobblin/writer/RecordMetadata.java | 27 +- .../writer/SequentialBasedBatchAccumulator.java | 27 +- .../apache/gobblin/writer/SyncDataWriter.java | 27 +- .../writer/TrackerBasedWatermarkManager.java | 27 +- .../gobblin/writer/WatermarkAwareWriter.java | 27 +- .../writer/WatermarkAwareWriterWrapper.java | 27 +- .../apache/gobblin/writer/WatermarkManager.java | 27 +- .../apache/gobblin/writer/WatermarkTracker.java | 27 +- .../gobblin/writer/WatermarkTrackerFactory.java | 27 +- .../apache/gobblin/writer/WriteResponse.java | 27 +- .../gobblin/writer/WriteResponseFuture.java | 27 +- .../gobblin/writer/WriteResponseMapper.java | 27 +- .../converter/SamplingConverterTest.java | 26 +- .../writer/FineGrainedWatermarkTrackerTest.java | 26 +- .../writer/MultiWriterWatermarkManagerTest.java | 26 +- .../gobblin/writer/WatermarkTrackerTest.java | 26 +- ...nElementConversionWithAvroSchemaFactory.java | 166 +++++++ .../JsonRecordAvroSchemaToAvroConverter.java | 128 ++++++ .../converter/csv/CsvToJsonConverter.java | 48 +- .../converter/json/BytesToJsonConverter.java | 51 +++ .../gobblin/lineage/LineageException.java | 39 ++ .../org/apache/gobblin/lineage/LineageInfo.java | 234 ++++++++++ .../gobblin/publisher/BaseDataPublisher.java | 18 +- .../publisher/HiveRegistrationPublisher.java | 36 +- .../source/DatePartitionedDailyAvroSource.java | 19 +- .../extractor/extract/QueryBasedSource.java | 6 + .../source/extractor/partition/Partition.java | 16 + .../writer/CloseOnFlushWriterWrapper.java | 15 +- .../gobblin/writer/PartitionedDataWriter.java | 8 +- .../gobblin/async/AsyncDataDispatcherTest.java | 16 + .../avro/FlattenNestedKeyConverterTest.java | 16 + ...JsonRecordAvroSchemaToAvroConverterTest.java | 81 ++++ .../json/BytesToJsonConverterTest.java | 43 ++ .../string/ObjectToStringConverterTest.java | 1 - .../apache/gobblin/lineage/LineageInfoTest.java | 160 +++++++ .../publisher/BaseDataPublisherTest.java | 32 ++ .../AvroGenericRecordAccessorTest.java | 26 +- .../RecordAccessorProviderFactoryTest.java | 26 +- .../security/ssl/SSLContextFactoryTest.java | 16 + .../DatePartitionedAvroFileExtractorTest.java | 19 +- .../extract/QueryBasedExtractorTest.java | 16 + .../extractor/partition/PartitionerTest.java | 16 + .../writer/CloseOnFlushWriterWrapperTest.java | 77 +++- .../writer/MetadataWriterWrapperTest.java | 16 + .../gobblin/writer/PartitionedWriterTest.java | 37 ++ .../resources/converter/jsonToAvroRecord.json | 13 + .../resources/converter/jsonToAvroSchema.avsc | 50 +++ .../management/copy/OwnerAndPermission.java | 29 ++ .../converter/AbstractAvroToOrcConverter.java | 2 + .../management/copy/CloseableFsCopySource.java | 2 +- .../data/management/copy/CopyConfiguration.java | 5 + .../data/management/copy/CopyEntity.java | 17 +- .../data/management/copy/CopySource.java | 14 +- .../copy/CopyableDatasetMetadata.java | 16 +- .../data/management/copy/CopyableFile.java | 12 +- .../copy/RecursiveCopyableDataset.java | 11 +- .../management/copy/RecursivePathFinder.java | 9 +- .../FileAwareInputStreamExtractor.java | 4 + .../copy/hive/HivePartitionFileSet.java | 3 +- .../copy/hive/UnpartitionedTableFileSet.java | 2 +- .../copy/publisher/CopyDataPublisher.java | 8 + .../replication/ConfigBasedMultiDatasets.java | 15 +- .../replication/CopyRouteGeneratorBase.java | 7 + .../copy/replication/DataFlowTopology.java | 10 + .../replication/ReplicationConfiguration.java | 2 + .../writer/FileAwareInputStreamDataWriter.java | 7 + .../data/management/dataset/DatasetUtils.java | 9 + .../dataset/DefaultFileSystemGlobFinder.java | 16 + .../management/source/DatasetFinderSource.java | 141 ++++++ .../source/LoopingDatasetFinderSource.java | 226 ++++++++++ .../hive/CopyPartitionParametersTest.java | 16 + .../copy/ConcurrentBoundedWorkUnitListTest.java | 2 +- .../copy/CopySourcePrioritizationTest.java | 2 +- .../data/management/copy/CopyableFileTest.java | 4 +- .../data/management/copy/CopyableFileUtils.java | 4 +- .../source/DatasetFinderSourceTest.java | 137 ++++++ .../source/LoopingDatasetFinderSourceTest.java | 218 ++++++++++ gobblin-docs/Powered-By.md | 15 +- .../case-studies/Kafka-HDFS-Ingestion.md | 4 +- .../hive/HiveMetaStoreClientFactoryTest.java | 16 + .../gobblin/metastore/DatasetStateStore.java | 64 +++ .../gobblin/metastore/DatasetStoreDataset.java | 56 +++ .../metastore/DatasetStoreDatasetFinder.java | 93 ++++ .../apache/gobblin/metastore/FsStateStore.java | 13 +- .../apache/gobblin/metastore/StateStore.java | 19 + .../metadata/DatasetStateStoreEntryManager.java | 60 +++ .../metadata/StateStoreEntryManager.java | 63 +++ .../metastore/predicates/DatasetPredicate.java | 55 +++ .../predicates/StateStorePredicate.java | 43 ++ .../predicates/StoreNamePredicate.java | 44 ++ .../config/checkstyle/suppressions.xml | 10 + .../gobblin/metrics/event/sla/SlaEventKeys.java | 1 + ...roToJsonRecordWithMetadataConverterTest.java | 16 + .../retention/Avro2OrcStaleDatasetCleaner.java | 16 + .../compliance/purger/HivePurgerWriterTest.java | 16 + .../converter/AnyToCouchbaseJsonConverter.java | 26 +- .../writer/CouchbaseEnvironmentFactory.java | 26 +- .../AnyToCouchbaseJsonConverterTest.java | 26 +- .../AvroStringFieldEncryptorConverter.java | 26 +- ...ordToEncryptedSerializedRecordConverter.java | 26 +- .../StringFieldEncryptorConverter.java | 26 +- .../AvroStringFieldEncryptorConverterTest.java | 16 + .../crypto/GobblinEncryptionProviderTest.java | 16 + ...oEncryptedSerializedRecordConverterBase.java | 26 +- .../apache/gobblin/crypto/GPGFileDecryptor.java | 133 +++++- .../gobblin/crypto/GPGFileDecryptorTest.java | 61 +++ .../crypto/gpg/KeyBasedEncryptionFile.txt.gpg | Bin 0 -> 383 bytes .../gpg/PasswordBasedEncryptionFile.txt.gpg | 2 + .../src/test/resources/crypto/gpg/private.key | 59 +++ .../gobblin/eventhub/EventhubMetricNames.java | 16 + .../writer/BatchedEventhubDataWriter.java | 16 + .../writer/EventhubBatchAccumulator.java | 16 + .../eventhub/writer/EventhubDataWriter.java | 27 +- .../writer/EventhubDataWriterBuilder.java | 26 +- .../eventhub/writer/EventhubRequest.java | 26 +- .../writer/BatchedEventhubDataWriterTest.java | 26 +- .../writer/EventhubAccumulatorTest.java | 16 + .../eventhub/writer/EventhubBatchTest.java | 16 + .../eventhub/writer/EventhubDataWriterTest.java | 26 +- .../converter/AsyncHttpJoinConverter.java | 16 + .../converter/AvroApacheHttpJoinConverter.java | 16 + .../converter/AvroHttpJoinConverter.java | 16 + .../gobblin/converter/AvroR2JoinConverter.java | 16 + .../gobblin/converter/HttpJoinConverter.java | 16 + .../gobblin/http/ApacheHttpAsyncClient.java | 16 + .../apache/gobblin/http/ApacheHttpClient.java | 16 + .../gobblin/http/ApacheHttpRequestBuilder.java | 16 + .../gobblin/http/ApacheHttpResponseHandler.java | 16 + .../gobblin/http/ApacheHttpResponseStatus.java | 16 + .../gobblin/http/ThrottledHttpClient.java | 16 + .../org/apache/gobblin/r2/R2ResponseStatus.java | 16 + .../apache/gobblin/r2/R2RestRequestBuilder.java | 16 + .../gobblin/r2/R2RestResponseHandler.java | 16 + .../org/apache/gobblin/utils/HttpConstants.java | 16 + .../org/apache/gobblin/utils/HttpUtils.java | 16 + .../gobblin/writer/AvroHttpWriterBuilder.java | 16 + .../java/org/apache/gobblin/HttpTestUtils.java | 16 + .../org/apache/gobblin/MockGenericRecord.java | 16 + .../http/ApacheHttpRequestBuilderTest.java | 16 + .../apache/gobblin/r2/R2ClientFactoryTest.java | 16 + .../gobblin/r2/R2RestRequestBuilderTest.java | 16 + .../org/apache/gobblin/util/HttpUtilsTest.java | 16 + .../gobblin/writer/AsyncHttpWriterTest.java | 16 + .../KafkaSchemaRegistryConfigurationKeys.java | 3 + .../kafka/schemareg/LiKafkaSchemaRegistry.java | 33 +- .../writer/KafkaWriterConfigurationKeys.java | 2 + .../gobblin/kafka/writer/KafkaWriterHelper.java | 1 + .../metrics/kafka/KafkaAvroSchemaRegistry.java | 11 + .../reporter/util/KafkaAvroReporterUtil.java | 75 ++++ ...thMetadataToEnvelopedRecordWithMetadata.java | 26 +- .../apache/gobblin/metadata/types/Metadata.java | 16 + .../apache/gobblin/type/ContentTypeUtils.java | 26 +- .../apache/gobblin/type/RecordWithMetadata.java | 26 +- .../type/SerializedRecordWithMetadata.java | 26 +- .../converter/MetadataConverterWrapperTest.java | 26 +- ...tadataToEnvelopedRecordWithMetadataTest.java | 26 +- .../metadata/GlobalMetadataCollectorTest.java | 16 + .../extractor/extract/jdbc/MysqlSource.java | 13 + .../gobblin/source/jdbc/JdbcExtractor.java | 34 +- .../gobblin/source/jdbc/JdbcExtractorTest.java | 12 + .../google/AsyncIteratorWithDataSink.java | 16 + .../GoggleIngestionConfigurationKeys.java | 16 + .../ingestion/google/util/SchemaUtil.java | 16 + .../local/gobblin-oozie-example-workflow.xml | 2 +- .../config/checkstyle/suppressions.xml | 10 + .../config/checkstyle/suppressions.xml | 10 + .../config/checkstyle/suppressions.xml | 10 + .../config/checkstyle/suppressions.xml | 10 + .../config/checkstyle/suppressions.xml | 10 + .../ThrottlingGuiceServletConfig.java | 19 +- ...adoopKerberosKeytabAuthenticationPlugin.java | 19 +- ...adoopKerberosKeytabAuthenticationPlugin.java | 14 + .../gobblin/runtime/fork/MockTaskContext.java | 19 +- .../main/java/gobblin/runtime/TaskState.java | 16 + .../mapreduce/GobblinWorkUnitsInputFormat.java | 33 ++ .../runtime/CheckpointableWatermarkState.java | 26 +- .../apache/gobblin/runtime/ExecutionModel.java | 26 +- .../gobblin/runtime/FsDatasetStateStore.java | 84 +++- ...RegTaskStateCollectorServiceHandlerImpl.java | 54 +++ .../org/apache/gobblin/runtime/JobContext.java | 4 +- .../gobblin/runtime/SafeDatasetCommit.java | 39 +- .../StateStoreBasedWatermarkStorage.java | 27 +- .../StateStoreBasedWatermarkStorageCli.java | 26 +- .../java/org/apache/gobblin/runtime/Task.java | 17 +- .../gobblin/runtime/TaskConfigurationKeys.java | 29 +- .../apache/gobblin/runtime/TaskExecutor.java | 233 +++++++++- .../runtime/TaskInstantiationException.java | 26 +- .../runtime/TaskStateCollectorService.java | 67 ++- .../TaskStateCollectorServiceHandler.java | 44 ++ ...teCollectorServiceHiveRegHandlerFactory.java | 33 ++ .../runtime/commit/DatasetStateCommitStep.java | 4 +- .../gobblin/runtime/crypto/DecryptCli.java | 16 + .../gobblin/runtime/fork/AsynchronousFork.java | 19 +- .../org/apache/gobblin/runtime/fork/Fork.java | 19 +- .../gobblin/runtime/fork/SynchronousFork.java | 19 +- .../FsDatasetStateStoreEntryManager.java | 51 +++ .../runtime/services/JMXReportingService.java | 13 + .../gobblin/runtime/spec_store/FSSpecStore.java | 4 +- .../apache/gobblin/runtime/task/FailedTask.java | 16 + .../apache/gobblin/runtime/task/NoopTask.java | 60 +++ .../runtime/util/JobStateToJsonConverter.java | 34 +- .../gobblin/runtime/util/StateStores.java | 34 +- .../gobblin/scheduler/BaseGobblinJob.java | 19 +- .../apache/gobblin/scheduler/JobScheduler.java | 80 +++- .../runtime/FsDatasetStateStoreTest.java | 92 ++++ .../gobblin/runtime/JobBrokerInjectionTest.java | 2 +- .../gobblin/runtime/JobLauncherTestHelper.java | 14 +- .../gobblin/runtime/LimiterStopEventTest.java | 16 + .../gobblin/runtime/TaskContinuousTest.java | 26 +- .../runtime/TaskStateCollectorServiceTest.java | 14 + .../runtime/commit/CommitSequenceTest.java | 2 +- .../instance/hadoop/TestHadoopConfigLoader.java | 19 +- .../FileBasedJobLockFactoryManagerTest.java | 15 +- .../util/JobStateToJsonConverterTest.java | 38 +- .../salesforce/SalesforceSourceTest.java | 16 + gobblin-service/build.gradle | 1 + .../gobblin/service/ServiceConfigKeys.java | 1 + .../service/modules/core/GitConfigMonitor.java | 434 +++++++++++++++++++ .../modules/core/GobblinServiceManager.java | 31 ++ .../scheduler/GobblinServiceJobScheduler.java | 10 + .../modules/core/GitConfigMonitorTest.java | 334 ++++++++++++++ .../modules/core/GobblinServiceManagerTest.java | 76 +++- .../gobblin/GobblinLocalJobLauncherUtils.java | 16 + .../TaskSkipErrRecordsIntegrationTest.java | 79 ++++ .../org/apache/gobblin/TestAvroConverter.java | 49 +++ .../org/apache/gobblin/TestAvroExtractor.java | 16 + .../java/org/apache/gobblin/TestAvroSource.java | 16 + .../WriterOutputFormatIntegrationTest.java | 16 + .../task_skip_err_records.properties | 42 ++ .../java/org/apache/gobblin/util/AvroUtils.java | 109 +++++ .../apache/gobblin/util/DatePartitionType.java | 20 +- .../org/apache/gobblin/util/FileListUtils.java | 50 ++- .../org/apache/gobblin/util/HadoopUtils.java | 20 + .../java/org/apache/gobblin/util/HostUtils.java | 16 + .../java/org/apache/gobblin/util/JvmUtils.java | 16 + .../java/org/apache/gobblin/util/PathUtils.java | 12 + .../java/org/apache/gobblin/util/PortUtils.java | 20 +- .../org/apache/gobblin/util/PullFileLoader.java | 29 +- .../util/executors/IteratorExecutor.java | 31 ++ .../util/executors/MDCPropagatingCallable.java | 20 +- .../MDCPropagatingExecutorService.java | 20 +- .../util/executors/MDCPropagatingRunnable.java | 20 +- .../MDCPropagatingScheduledExecutorService.java | 20 +- .../util/hadoop/GobblinSequenceFileReader.java | 49 +++ .../gobblin/util/io/EmptyInputStream.java | 36 ++ .../gobblin/util/limiter/LimiterFactory.java | 3 +- .../gobblin/util/limiter/MultiLimiter.java | 3 +- .../gobblin/util/limiter/NoopLimiter.java | 3 +- .../limiter/broker/SharedLimiterFactory.java | 3 +- .../util/limiter/broker/SharedLimiterKey.java | 3 +- .../org/apache/gobblin/util/AvroUtilsTest.java | 23 + .../apache/gobblin/util/FileListUtilsTest.java | 104 +++++ .../org/apache/gobblin/util/PortUtilsTest.java | 20 +- .../apache/gobblin/util/PullFileLoaderTest.java | 12 +- .../gobblin/util/limiter/MultiLimiterTest.java | 2 +- .../broker/SharedLimiterFactoryTest.java | 2 +- .../pullFileLoaderTest/dir1/dir1.configuration | 19 + .../pullFileLoaderTest/dir1/dir1.properties | 18 - gradle/scripts/dependencyDefinitions.gradle | 1 + gradle/scripts/testSetup.gradle | 5 + 334 files changed, 8363 insertions(+), 1487 deletions(-) ----------------------------------------------------------------------
