[ 
https://issues.apache.org/jira/browse/BEAM-3217?focusedWorklogId=79321&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-79321
 ]

ASF GitHub Bot logged work on BEAM-3217:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 12/Mar/18 02:57
            Start Date: 12/Mar/18 02:57
    Worklog Time Spent: 10m 
      Work Description: chamikaramj closed pull request #4758: [BEAM-3217] 
Jenkins job for HadoopInputFormatIOIT
URL: https://github.com/apache/beam/pull/4758
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/.test-infra/jenkins/common_job_properties.groovy 
b/.test-infra/jenkins/common_job_properties.groovy
index f882b842209..92c1426aa55 100644
--- a/.test-infra/jenkins/common_job_properties.groovy
+++ b/.test-infra/jenkins/common_job_properties.groovy
@@ -255,6 +255,31 @@ class common_job_properties {
     return mapToArgString(joinedArgs)
   }
 
+  static def setupKubernetes(def context, def namespace, def 
kubeconfigLocation) {
+    context.steps {
+      shell('gcloud container clusters get-credentials io-datastores 
--zone=us-central1-a --verbosity=debug')
+      shell("cp /home/jenkins/.kube/config ${kubeconfigLocation}")
+
+      shell("kubectl --kubeconfig=${kubeconfigLocation} create namespace 
${namespace}")
+      shell("kubectl --kubeconfig=${kubeconfigLocation} config set-context 
\$(kubectl config current-context) --namespace=${namespace}")
+    }
+  }
+
+  static def cleanupKubernetes(def context, def namespace, def 
kubeconfigLocation) {
+    context.steps {
+      shell("kubectl --kubeconfig=${kubeconfigLocation} delete namespace 
${namespace}")
+      shell("rm ${kubeconfigLocation}")
+    }
+  }
+
+  static String getKubernetesNamespace(def testName) {
+    return "${testName}-${new Date().getTime()}"
+  }
+
+  static String getKubeconfigLocationForNamespace(def namespace) {
+    return '"$WORKSPACE/' + "config-${namespace}" + '"'
+  }
+
   // Adds the standard performance test job steps.
   static def buildPerformanceTest(def context, def argMap) {
     def pkbArgs = genPerformanceArgs(argMap)
@@ -381,4 +406,27 @@ class common_job_properties {
       }
     }
   }
+
+  /**
+   * Transforms pipeline options to a string of format like below:
+   * ["--pipelineOption=123", "--pipelineOption2=abc", ...]
+   *
+   * @param pipelineOptions A map of pipeline options.
+   */
+  static String joinPipelineOptions(Map pipelineOptions) {
+    List<String> pipelineArgList = []
+    pipelineOptions.each({
+      key, value -> pipelineArgList.add("\"--$key=$value\"")
+    })
+    return "[" + pipelineArgList.join(',') + "]"
+  }
+
+
+  /**
+   * Returns absolute path to beam project's files.
+   * @param path A relative path to project resource.
+   */
+  static String makePathAbsolute(String path) {
+    return '"$WORKSPACE/' + path + '"'
+  }
 }
diff --git 
a/.test-infra/jenkins/job_beam_PerformanceTests_FileBasedIO_IT.groovy 
b/.test-infra/jenkins/job_beam_PerformanceTests_FileBasedIO_IT.groovy
index 667b11d2072..e1d8c5eb3ac 100644
--- a/.test-infra/jenkins/job_beam_PerformanceTests_FileBasedIO_IT.groovy
+++ b/.test-infra/jenkins/job_beam_PerformanceTests_FileBasedIO_IT.groovy
@@ -86,22 +86,16 @@ private void 
create_filebasedio_performance_test_job(testConfiguration) {
                 'commits@beam.apache.org',
                 false)
 
-        def pipelineArgs = [
+        def pipelineOptions = [
                 project        : 'apache-beam-testing',
                 tempRoot       : 'gs://temp-storage-for-perf-tests',
                 numberOfRecords: '1000000',
                 filenamePrefix : 
"gs://temp-storage-for-perf-tests/${testConfiguration.jobName}/\${BUILD_ID}/",
         ]
         if (testConfiguration.containsKey('extraPipelineArgs')) {
-            pipelineArgs << testConfiguration.extraPipelineArgs
+            pipelineOptions << testConfiguration.extraPipelineArgs
         }
 
-        def pipelineArgList = []
-        pipelineArgs.each({
-            key, value -> pipelineArgList.add("\"--$key=$value\"")
-        })
-        def pipelineArgsJoined = "[" + pipelineArgList.join(',') + "]"
-
         def argMap = [
                 benchmarks               : 'beam_integration_benchmark',
                 beam_it_timeout          : '1200',
@@ -110,7 +104,7 @@ private void 
create_filebasedio_performance_test_job(testConfiguration) {
                 beam_sdk                 : 'java',
                 beam_it_module           : 'sdks/java/io/file-based-io-tests',
                 beam_it_class            : testConfiguration.itClass,
-                beam_it_options          : pipelineArgsJoined,
+                beam_it_options          : 
common_job_properties.joinPipelineOptions(pipelineOptions),
                 beam_extra_mvn_properties: '["filesystem=gcs"]',
                 bigquery_table           : testConfiguration.bqTable,
         ]
diff --git 
a/.test-infra/jenkins/job_beam_PerformanceTests_HadoopInputFormat.groovy 
b/.test-infra/jenkins/job_beam_PerformanceTests_HadoopInputFormat.groovy
new file mode 100644
index 00000000000..093390dc930
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PerformanceTests_HadoopInputFormat.groovy
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+job('beam_PerformanceTests_HadoopInputFormat') {
+    // Set default Beam job properties.
+    common_job_properties.setTopLevelMainJobProperties(delegate)
+
+    // Run job in postcommit every 6 hours, don't trigger every push, and
+    // don't email individual committers.
+    common_job_properties.setPostCommit(
+            delegate,
+            '0 */6 * * *',
+            false,
+            'commits@beam.apache.org',
+            false)
+
+    common_job_properties.enablePhraseTriggeringFromPullRequest(
+            delegate,
+            'Java HadoopInputFormatIO Performance Test',
+            'Run Java HadoopInputFormatIO Performance Test')
+
+    def pipelineOptions = [
+            tempRoot       : 'gs://temp-storage-for-perf-tests',
+            project        : 'apache-beam-testing',
+            postgresPort   : '5432',
+            numberOfRecords: '600000'
+    ]
+
+    String namespace = 
common_job_properties.getKubernetesNamespace('hadoopinputformatioit')
+    String kubeconfig = 
common_job_properties.getKubeconfigLocationForNamespace(namespace)
+
+    def testArgs = [
+            kubeconfig              : kubeconfig,
+            beam_it_timeout         : '1200',
+            benchmarks              : 'beam_integration_benchmark',
+            beam_it_profile         : 'io-it',
+            beam_prebuilt           : 'true',
+            beam_sdk                : 'java',
+            beam_it_module          : 'sdks/java/io/hadoop-input-format',
+            beam_it_class           : 
'org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIOIT',
+            beam_it_options         : 
common_job_properties.joinPipelineOptions(pipelineOptions),
+            beam_kubernetes_scripts : 
common_job_properties.makePathAbsolute('src/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml'),
+            beam_options_config_file: 
common_job_properties.makePathAbsolute('src/.test-infra/kubernetes/postgres/pkb-config-local.yml'),
+            bigquery_table          : 
'beam_performance.hadoopinputformatioit_pkb_results'
+    ]
+
+    common_job_properties.setupKubernetes(delegate, namespace, kubeconfig)
+    common_job_properties.buildPerformanceTest(delegate, testArgs)
+    common_job_properties.cleanupKubernetes(delegate, namespace, kubeconfig)
+}
+
diff --git a/.test-infra/jenkins/job_beam_PerformanceTests_JDBC.groovy 
b/.test-infra/jenkins/job_beam_PerformanceTests_JDBC.groovy
index 1e5131f3ddf..401a9104e6a 100644
--- a/.test-infra/jenkins/job_beam_PerformanceTests_JDBC.groovy
+++ b/.test-infra/jenkins/job_beam_PerformanceTests_JDBC.groovy
@@ -18,7 +18,6 @@
 
 import common_job_properties
 
-// This job runs the Beam performance tests on PerfKit Benchmarker.
 job('beam_PerformanceTests_JDBC') {
     // Set default Beam job properties.
     common_job_properties.setTopLevelMainJobProperties(delegate)
@@ -37,15 +36,18 @@ job('beam_PerformanceTests_JDBC') {
             'Java JdbcIO Performance Test',
             'Run Java JdbcIO Performance Test')
 
-    def pipelineArgs = [
+    def pipelineOptions = [
             tempRoot       : 'gs://temp-storage-for-perf-tests',
             project        : 'apache-beam-testing',
             postgresPort   : '5432',
             numberOfRecords: '5000000'
     ]
 
+    String namespace = common_job_properties.getKubernetesNamespace('jdbcioit')
+    String kubeconfig = 
common_job_properties.getKubeconfigLocationForNamespace(namespace)
+
     def testArgs = [
-            kubeconfig              : '"$HOME/.kube/config"',
+            kubeconfig              : kubeconfig,
             beam_it_timeout         : '1800',
             benchmarks              : 'beam_integration_benchmark',
             beam_it_profile         : 'io-it',
@@ -53,29 +55,14 @@ job('beam_PerformanceTests_JDBC') {
             beam_sdk                : 'java',
             beam_it_module          : 'sdks/java/io/jdbc',
             beam_it_class           : 'org.apache.beam.sdk.io.jdbc.JdbcIOIT',
-            beam_it_options         : joinPipelineOptions(pipelineArgs),
-            beam_kubernetes_scripts : 
makePathAbsolute('src/.test-infra/kubernetes/postgres/postgres.yml')
-                    + ',' + 
makePathAbsolute('src/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml'),
-            beam_options_config_file: 
makePathAbsolute('src/.test-infra/kubernetes/postgres/pkb-config-local.yml'),
+            beam_it_options         : 
common_job_properties.joinPipelineOptions(pipelineOptions),
+            beam_kubernetes_scripts : 
common_job_properties.makePathAbsolute('src/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml'),
+            beam_options_config_file: 
common_job_properties.makePathAbsolute('src/.test-infra/kubernetes/postgres/pkb-config-local.yml'),
             bigquery_table          : 'beam_performance.jdbcioit_pkb_results'
     ]
 
-    steps {
-        // create .kube/config file for perfkit (if not exists)
-        shell('gcloud container clusters get-credentials io-datastores 
--zone=us-central1-a --verbosity=debug')
-    }
-
+    common_job_properties.setupKubernetes(delegate, namespace, kubeconfig)
     common_job_properties.buildPerformanceTest(delegate, testArgs)
+    common_job_properties.cleanupKubernetes(delegate, namespace, kubeconfig)
 }
 
-static String joinPipelineOptions(Map pipelineArgs) {
-    List<String> pipelineArgList = []
-    pipelineArgs.each({
-        key, value -> pipelineArgList.add("\"--$key=$value\"")
-    })
-    return "[" + pipelineArgList.join(',') + "]"
-}
-
-static String makePathAbsolute(String path) {
-    return '"$WORKSPACE/' + path + '"'
-}
\ No newline at end of file
diff --git a/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml 
b/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml
index 5d2c6648590..7ba106a73d3 100644
--- a/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml
+++ b/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml
@@ -26,3 +26,30 @@ spec:
   selector:
     name: postgres
   type: LoadBalancer
+
+---
+
+apiVersion: v1
+kind: ReplicationController
+metadata:
+  name: postgres
+spec:
+  replicas: 1
+  selector:
+    name: postgres
+  template:
+    metadata:
+      name: postgres
+      labels:
+        name: postgres
+    spec:
+      containers:
+        - name: postgres
+          image: postgres
+          env:
+            - name: POSTGRES_PASSWORD
+              value: uuinkks
+            - name: PGDATA
+              value: /var/lib/postgresql/data/pgdata
+          ports:
+            - containerPort: 5432
diff --git a/sdks/java/io/hadoop-input-format/pom.xml 
b/sdks/java/io/hadoop-input-format/pom.xml
index 4d86f4574b4..d53afae6fe7 100644
--- a/sdks/java/io/hadoop-input-format/pom.xml
+++ b/sdks/java/io/hadoop-input-format/pom.xml
@@ -253,7 +253,7 @@
                 
<argument>-beam_it_module=sdks/java/io/hadoop-input-format</argument>
                 
<argument>-beam_it_class=org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIOIT</argument>
                 
<argument>-beam_options_config_file=${beamRootProjectDir}/.test-infra/kubernetes/postgres/pkb-config-local.yml</argument>
-                
<argument>-beam_kubernetes_scripts=${beamRootProjectDir}/.test-infra/kubernetes/postgres/postgres.yml,${beamRootProjectDir}/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml</argument>
+                
<argument>-beam_kubernetes_scripts=${beamRootProjectDir}/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml</argument>
                 <!-- arguments typically defined by user -->
                 
<argument>-beam_it_options=${integrationTestPipelineOptions}</argument>
               </arguments>
diff --git a/sdks/java/io/jdbc/pom.xml b/sdks/java/io/jdbc/pom.xml
index c4cc9c5b4b7..5029ab8b8f6 100644
--- a/sdks/java/io/jdbc/pom.xml
+++ b/sdks/java/io/jdbc/pom.xml
@@ -220,7 +220,7 @@
                 <argument>${pkbBeamRunnerOption}</argument>
                 <!-- specific to this IO -->
                 
<argument>-beam_options_config_file=${beamRootProjectDir}/.test-infra/kubernetes/postgres/pkb-config-local.yml</argument>
-                
<argument>-beam_kubernetes_scripts=${beamRootProjectDir}/.test-infra/kubernetes/postgres/postgres.yml,${beamRootProjectDir}/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml</argument>
+                
<argument>-beam_kubernetes_scripts=${beamRootProjectDir}/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml</argument>
                 <argument>-beam_it_module=sdks/java/io/jdbc</argument>
                 
<argument>-beam_it_class=org.apache.beam.sdk.io.jdbc.JdbcIOIT</argument>
                 <!-- arguments typically defined by user -->


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 79321)
    Time Spent: 6h  (was: 5h 50m)

> Add a performance test for HadoopInputFormatIO
> ----------------------------------------------
>
>                 Key: BEAM-3217
>                 URL: https://issues.apache.org/jira/browse/BEAM-3217
>             Project: Beam
>          Issue Type: Test
>          Components: io-java-hadoop
>            Reporter: Chamikara Jayalath
>            Assignee: Ɓukasz Gajowy
>            Priority: Major
>          Time Spent: 6h
>  Remaining Estimate: 0h
>
> We should add a large scale performance test for HadoopInputFormatIO. We 
> should use PerfKitBenchmarker based performance testing framework [1] to 
> manage Kubernetes based muti-node data store and to publish benchmark results.
> Example input format implementation to use: DBInputFormat to connect to a 
> Postgres instance.
> https://github.com/hanborq/hadoop/blob/master/src/mapred/org/apache/hadoop/mapreduce/lib/db/DBInputFormat.java
>  
> Example docker image to use: https://hub.docker.com/_/postgres/
> [1] https://beam.apache.org/documentation/io/testing/



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to