This is an automated email from the ASF dual-hosted git repository. dimberman pushed a commit to branch v1-10-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 7c7766e5345278203250ddde1348d2889fdf778d Author: Aneesh Joseph <[email protected]> AuthorDate: Mon Jul 6 01:27:15 2020 +0530 Add git sync option and unit tests for the Helm chart (#9371) * add git sync sidecars * add a helm test * add more tests * allow users to provide git username and pass via a k8s secrets * set default values for airflow worker repository & tag * change ci timeout * fix link * add credentials_secret to airflow.cfg configmap * set GIT_SYNC_ADD_USER on kubernetes worker pods, set uid * add fsGroup to webserver and kubernete workers * move gitSync to dags.gitSync * rename valueFields * turn off git sync and dag persistence by default * provide option to specify known_hosts * add git-sync details into the chart documentation * Update .gitignore Co-authored-by: Ash Berlin-Taylor <[email protected]> * make git sync max failures configurable * Apply suggestions from code review Co-authored-by: Jarek Potiuk <[email protected]> * add back requirements.lock Co-authored-by: Ash Berlin-Taylor <[email protected]> Co-authored-by: Jarek Potiuk <[email protected]> (cherry picked from commit d93555b863a03754b1bf1d3a3362108be275de9e) --- .github/workflows/ci.yml | 11 ++ CI.rst | 2 +- airflow/kubernetes/worker_configuration.py | 4 + chart/README.md | 40 +++++- chart/templates/_helpers.yaml | 95 +++++++++++++++ chart/templates/configmap.yaml | 40 +++++- chart/templates/dags-persistent-volume-claim.yaml | 41 +++++++ .../templates/scheduler/scheduler-deployment.yaml | 16 +++ .../templates/webserver/webserver-deployment.yaml | 19 +++ chart/templates/workers/worker-deployment.yaml | 18 +++ chart/tests/dags-persistent-volume-claim_test.yaml | 64 ++++++++++ chart/tests/git-sync-scheduler_test.yaml | 135 +++++++++++++++++++++ chart/tests/git-sync-webserver_test.yaml | 66 ++++++++++ chart/tests/git-sync-worker_test.yaml | 70 +++++++++++ chart/values.yaml | 74 ++++++++++- scripts/ci/ci_run_helm_testing.sh | 28 +++++ tests/kubernetes/test_worker_configuration.py | 4 + 17 files changed, 721 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 195f7f7..b5948d3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -306,6 +306,17 @@ ${{ hashFiles('requirements/requirements-python${{matrix.python-version}}.txt') - name: "Tests" run: ./scripts/ci/ci_run_airflow_testing.sh + helm-tests: + timeout-minutes: 5 + name: "Checks: Helm tests" + runs-on: ubuntu-latest + env: + CI_JOB_TYPE: "Tests" + steps: + - uses: actions/checkout@master + - name: "Helm Tests" + run: ./scripts/ci/ci_run_helm_testing.sh + requirements: timeout-minutes: 80 name: "Requirements" diff --git a/CI.rst b/CI.rst index c8986cc..210cf90 100644 --- a/CI.rst +++ b/CI.rst @@ -33,7 +33,7 @@ environments we use. Most of our CI jobs are written as bash scripts which are e the CI jobs and we are mapping all the CI-specific environment variables to generic "CI" variables. The only two places where CI-specific code might be are: -- CI-specific declaration file (for example it is `<.github/workflow/ci.yml>`_ for GitHub Actions +- CI-specific declaration file (for example it is `<.github/workflows/ci.yml>`_ for GitHub Actions - The ``get_environment_for_builds_on_ci`` function in `<scripts/ci/libraries/_build_images.sh>`_ where mapping is performed from the CI-environment specific to generic values. Example for that is CI_EVENT_TYPE variable which determines whether we are running a ``push``. ``schedule`` or ``pull_request`` kind of CI job. For diff --git a/airflow/kubernetes/worker_configuration.py b/airflow/kubernetes/worker_configuration.py index cf232a1..4eae2ef 100644 --- a/airflow/kubernetes/worker_configuration.py +++ b/airflow/kubernetes/worker_configuration.py @@ -135,6 +135,10 @@ class WorkerConfiguration(LoggingMixin): value='/etc/git-secret/ssh' ), k8s.V1EnvVar( + name='GIT_SYNC_ADD_USER', + value='true' + ), + k8s.V1EnvVar( name='GIT_SYNC_SSH', value='true' ) diff --git a/chart/README.md b/chart/README.md index 6cc361e..e7ef973 100644 --- a/chart/README.md +++ b/chart/README.md @@ -66,7 +66,7 @@ The command removes all the Kubernetes components associated with the chart and ## Updating DAGs -The recommended way to update your DAGs with this chart is to build a new docker image with the latest code (`docker build -t my-company/airflow:8a0da78 .`), push it to an accessible registry (`docker push my-company/airflow:8a0da78`), then update the Airflow pods with that image: +The recommended way to update your DAGs with this chart is to build a new docker image with the latest DAG code (`docker build -t my-company/airflow:8a0da78 .`), push it to an accessible registry (`docker push my-company/airflow:8a0da78`), then update the Airflow pods with that image: ```bash helm upgrade airflow . \ @@ -77,6 +77,42 @@ helm upgrade airflow . \ For local development purppose you can also u You can also build the image locally and use it via deployment method described by Breeze. +## Mounting DAGS using Git-Sync side car with Persistence enabled + +This option will use a Persistent Volume Claim with an accessMode of `ReadWriteMany`. The scheduler pod will sync DAGs from a git repository onto the PVC every configured number of seconds. The other pods will read the synced DAGs. Not all volume plugins have support for `ReadWriteMany` accessMode. Refer [Persistent Volume Access Modes](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes) for details + +```bash +helm upgrade airflow . \ + --set dags.persistence.enabled=true \ + --set dags.gitSync.enabled=true + # you can also override the other persistence or gitSync values + # by setting the dags.persistence.* and dags.gitSync.* values + # Please refer to values.yaml for details +``` + +## Mounting DAGS using Git-Sync side car without Persistence +This option will use an always running Git-Sync side car on every scheduler,webserver and worker pods. The Git-Sync side car containers will sync DAGs from a git repository every configured number of seconds. If you are using the KubernetesExecutor, Git-sync will run as an initContainer on your worker pods. + +```bash +helm upgrade airflow . \ + --set dags.persistence.enabled=false \ + --set dags.gitSync.enabled=true + # you can also override the other gitSync values + # by setting the dags.gitSync.* values + # Refer values.yaml for details +``` + +## Mounting DAGS from an externally populated PVC +In this approach, Airflow will read the DAGs from a PVC which has `ReadOnlyMany` or `ReadWriteMany` accessMode. You will have to ensure that the PVC is populated/updated with the required DAGs(this won't be handled by the chart). You can pass in the name of the volume claim to the chart + +```bash +helm upgrade airflow . \ + --set dags.persistence.enabled=true \ + --set dags.persistence.existingClaim=my-volume-claim + --set dags.gitSync.enabled=false +``` + + ## Parameters The following tables lists the configurable parameters of the Airflow chart and their default values. @@ -160,6 +196,8 @@ The following tables lists the configurable parameters of the Airflow chart and | `webserver.resources.requests.cpu` | CPU Request of webserver | `~` | | `webserver.resources.requests.memory` | Memory Request of webserver | `~` | | `webserver.defaultUser` | Optional default airflow user information | `{}` | +| `dags.persistence.*` | Dag persistence configutation | Please refer to `values.yaml` | +| `dags.gitSync.*` | Git sync configuration | Please refer to `values.yaml` | Specify each parameter using the `--set key=value[,key=value]` argument to `helm install`. For example, diff --git a/chart/templates/_helpers.yaml b/chart/templates/_helpers.yaml index ac121a4..898924f 100644 --- a/chart/templates/_helpers.yaml +++ b/chart/templates/_helpers.yaml @@ -93,6 +93,80 @@ {{ end }} {{- end }} +{{/* Git ssh key volume */}} +{{- define "git_sync_ssh_key_volume"}} +- name: git-sync-ssh-key + secret: + secretName: {{ .Values.dags.gitSync.sshKeySecret }} + defaultMode: 288 +{{- end }} + +{{/* Git sync container */}} +{{- define "git_sync_container"}} +- name: {{ .Values.dags.gitSync.containerName }} + image: "{{ .Values.dags.gitSync.containerRepository }}:{{ .Values.dags.gitSync.containerTag }}" + env: + {{- if .Values.dags.gitSync.sshKeySecret }} + - name: GIT_SSH_KEY_FILE + value: "/etc/git-secret/ssh" + - name: GIT_SYNC_SSH + value: "true" + {{- if .Values.dags.gitSync.knownHosts }} + - name: GIT_KNOWN_HOSTS + value: "true" + - name: GIT_SSH_KNOWN_HOSTS_FILE + value: "/etc/git-secret/known_hosts" + {{- else }} + - name: GIT_KNOWN_HOSTS + value: "false" + {{- end }} + {{ else if .Values.dags.gitSync.credentialsSecret }} + - name: GIT_SYNC_USERNAME + valueFrom: + secretKeyRef: + name: {{ .Values.dags.gitSync.credentialsSecret | quote }} + key: GIT_SYNC_USERNAME + - name: GIT_SYNC_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.dags.gitSync.credentialsSecret | quote }} + key: GIT_SYNC_PASSWORD + {{- end }} + - name: GIT_SYNC_REV + value: {{ .Values.dags.gitSync.rev | quote }} + - name: GIT_SYNC_BRANCH + value: {{ .Values.dags.gitSync.branch | quote }} + - name: GIT_SYNC_REPO + value: {{ .Values.dags.gitSync.repo | quote }} + - name: GIT_SYNC_DEPTH + value: {{ .Values.dags.gitSync.depth | quote }} + - name: GIT_SYNC_ROOT + value: {{ .Values.dags.gitSync.root | quote }} + - name: GIT_SYNC_DEST + value: {{ .Values.dags.gitSync.dest | quote }} + - name: GIT_SYNC_ADD_USER + value: "true" + - name: GIT_SYNC_WAIT + value: {{ .Values.dags.gitSync.wait | quote }} + - name: GIT_SYNC_MAX_SYNC_FAILURES + value: {{ .Values.dags.gitSync.maxFailures | quote }} + volumeMounts: + - name: dags + mountPath: {{ .Values.dags.gitSync.root }} + {{- if and .Values.dags.gitSync.enabled .Values.dags.gitSync.sshKeySecret }} + - name: git-sync-ssh-key + mountPath: /etc/git-secret/ssh + readOnly: true + subPath: gitSshKey + {{- if .Values.dags.gitSync.knownHosts }} + - name: config + mountPath: /etc/git-secret/known_hosts + readOnly: true + subPath: known_hosts + {{- end }} + {{- end }} +{{- end }} + # This helper will change when customers deploy a new image. {{ define "airflow_image" -}} {{ printf "%s:%s" (.Values.images.airflow.repository | default .Values.defaultAirflowRepository) (.Values.images.airflow.tag | default .Values.defaultAirflowTag) }} @@ -185,9 +259,30 @@ log_connections = {{ .Values.pgbouncer.logConnections }} {{ (printf "%s/logs" .Values.airflowHome) | quote }} {{- end }} +{{ define "airflow_dags" -}} +{{- if .Values.dags.gitSync.enabled -}} +{{ (printf "%s/dags/%s/%s" .Values.airflowHome .Values.dags.gitSync.dest .Values.dags.gitSync.subPath ) }} +{{- else -}} +{{ (printf "%s/dags" .Values.airflowHome) }} +{{- end -}} +{{- end -}} + +{{ define "airflow_dags_volume_claim" -}} +{{- if and .Values.dags.persistence.enabled .Values.dags.persistence.existingClaim -}} +{{ .Values.dags.persistence.existingClaim }} +{{- else -}} +{{ .Release.Name }}-dags +{{- end -}} +{{- end -}} + +{{ define "airflow_dags_mount_path" -}} +{{ (printf "%s/dags" .Values.airflowHome) }} +{{- end }} + {{ define "airflow_config_path" -}} {{ (printf "%s/airflow.cfg" .Values.airflowHome) | quote }} {{- end }} + {{ define "airflow_webserver_config_path" -}} {{ (printf "%s/webserver_config.py" .Values.airflowHome) | quote }} {{- end }} diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index 9a0c000..00332b7 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -35,6 +35,7 @@ data: # These are system-specified config overrides. airflow.cfg: | [core] + dags_folder = {{ include "airflow_dags" . }} load_examples = False colored_console_log = False executor = {{ .Values.executor }} @@ -84,13 +85,42 @@ data: namespace = {{ .Release.Namespace }} airflow_configmap = {{ include "airflow_config" . }} airflow_local_settings_configmap = {{ include "airflow_config" . }} - worker_container_repository = {{ .Values.images.airflow.repository }} - worker_container_tag = {{ .Values.images.airflow.tag }} + worker_container_repository = {{ .Values.images.airflow.repository | default .Values.defaultAirflowRepository }} + worker_container_tag = {{ .Values.images.airflow.tag | default .Values.defaultAirflowTag }} worker_container_image_pull_policy = {{ .Values.images.airflow.pullPolicy }} worker_service_account_name = {{ .Release.Name }}-worker-serviceaccount image_pull_secrets = {{ template "registry_secret" . }} - dags_in_image = True + dags_in_image = {{ if or .Values.dags.gitSync.enabled .Values.dags.persistence.enabled }}False{{ else }}True{{ end }} delete_worker_pods = True + run_as_user = {{ .Values.uid }} + fs_group = {{ .Values.gid }} + {{- if or .Values.dags.gitSync.enabled .Values.dags.persistence.enabled }} + git_dags_folder_mount_point = {{ include "airflow_dags_mount_path" . }} + dags_volume_mount_point = {{ include "airflow_dags_mount_path" . }} + {{- if .Values.dags.persistence.enabled }} + dags_volume_claim = {{ .Release.Name }}-dags + dags_volume_subpath = {{.Values.dags.gitSync.dest }}/{{ .Values.dags.gitSync.subPath }} + {{- else }} + git_repo = {{ .Values.dags.gitSync.repo }} + git_branch = {{ .Values.dags.gitSync.branch }} + git_sync_rev = {{ .Values.dags.gitSync.rev }} + git_sync_depth = {{ .Values.dags.gitSync.depth }} + git_sync_root = {{ .Values.dags.gitSync.root }} + git_sync_dest = {{ .Values.dags.gitSync.dest }} + git_sync_container_repository = {{ .Values.dags.gitSync.containerRepository }} + git_sync_container_tag = {{ .Values.dags.gitSync.containerTag }} + git_sync_init_container_name = {{ .Values.dags.gitSync.containerName }} + git_sync_run_as_user = {{ .Values.uid }} + {{- if .Values.dags.gitSync.knownHosts }} + git_ssh_known_hosts_configmap_name = {{ include "airflow_config" . }} + {{- end }} + {{- if .Values.dags.gitSync.sshKeySecret }} + git_ssh_key_secret_name = {{ .Values.dags.gitSync.sshKeySecret }} + {{- else if .Values.dags.gitSync.credentialsSecret }} + git_sync_credentials_secret = {{ .Values.dags.gitSync.credentialsSecret }} + {{- end }} + {{- end }} + {{- end }} [kubernetes_secrets] AIRFLOW__CORE__SQL_ALCHEMY_CONN = {{ printf "%s=connection" (include "airflow_metadata_secret" .) }} @@ -117,3 +147,7 @@ data: airflow_local_settings.py: | {{ .Values.scheduler.airflowLocalSettings | nindent 4 }} {{- end }} +{{- if and .Values.dags.gitSync.enabled .Values.dags.gitSync.knownHosts }} + known_hosts: | + {{ .Values.dags.gitSync.knownHosts | nindent 4 }} +{{- end }} diff --git a/chart/templates/dags-persistent-volume-claim.yaml b/chart/templates/dags-persistent-volume-claim.yaml new file mode 100644 index 0000000..53ea550 --- /dev/null +++ b/chart/templates/dags-persistent-volume-claim.yaml @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{{- if and (not .Values.dags.persistence.existingClaim ) .Values.dags.persistence.enabled }} +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: {{ .Release.Name }}-dags + labels: + tier: airflow + component: dags-pvc + release: {{ .Release.Name }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: {{ .Release.Service }} +spec: + accessModes: [{{ .Values.dags.persistence.accessMode | quote }}] + resources: + requests: + storage: {{ .Values.dags.persistence.size | quote }} + {{- if .Values.dags.persistence.storageClass }} + {{- if (eq "-" .Values.dags.persistence.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.dags.persistence.storageClass }}" + {{- end }} + {{- end }} +{{- end }} diff --git a/chart/templates/scheduler/scheduler-deployment.yaml b/chart/templates/scheduler/scheduler-deployment.yaml index d5c3a06..9331556 100644 --- a/chart/templates/scheduler/scheduler-deployment.yaml +++ b/chart/templates/scheduler/scheduler-deployment.yaml @@ -145,6 +145,11 @@ spec: subPath: airflow_local_settings.py readOnly: true {{- end }} +{{- if .Values.dags.gitSync.enabled }} + - name: dags + mountPath: {{ template "airflow_dags_mount_path" . }} + {{- include "git_sync_container" . | indent 8 }} +{{- end }} # Always start the garbage collector sidecar. - name: scheduler-gc image: {{ template "airflow_image" . }} @@ -177,6 +182,17 @@ spec: - name: config configMap: name: {{ template "airflow_config" . }} + {{- if .Values.dags.persistence.enabled }} + - name: dags + persistentVolumeClaim: + claimName: {{ template "airflow_dags_volume_claim" . }} + {{- else if .Values.dags.gitSync.enabled }} + - name: dags + emptyDir: {} + {{- end }} + {{- if and .Values.dags.gitSync.enabled .Values.dags.gitSync.sshKeySecret }} + {{- include "git_sync_ssh_key_volume" . | indent 8 }} + {{- end }} {{- if not $stateful }} - name: logs emptyDir: {} diff --git a/chart/templates/webserver/webserver-deployment.yaml b/chart/templates/webserver/webserver-deployment.yaml index b4c9714..9ea2bc1 100644 --- a/chart/templates/webserver/webserver-deployment.yaml +++ b/chart/templates/webserver/webserver-deployment.yaml @@ -68,6 +68,7 @@ spec: restartPolicy: Always securityContext: runAsUser: {{ .Values.uid }} + fsGroup: {{ .Values.gid }} {{- if or .Values.registry.secretName .Values.registry.connection }} imagePullSecrets: - name: {{ template "registry_secret" . }} @@ -82,6 +83,9 @@ spec: {{- include "custom_airflow_environment" . | indent 10 }} {{- include "standard_airflow_environment" . | indent 10 }} containers: +{{- if and (.Values.dags.gitSync.enabled) (not .Values.dags.persistence.enabled) }} +{{- include "git_sync_container" . | indent 8 }} +{{- end }} - name: webserver image: {{ template "airflow_image" . }} imagePullPolicy: {{ .Values.images.airflow.pullPolicy }} @@ -105,6 +109,10 @@ spec: subPath: airflow_local_settings.py readOnly: true {{- end }} +{{- if or .Values.dags.gitSync.enabled .Values.dags.persistence.enabled }} + - name: dags + mountPath: {{ template "airflow_dags_mount_path" . }} +{{- end }} {{- if .Values.webserver.extraVolumeMounts }} {{ toYaml .Values.webserver.extraVolumeMounts | indent 12 }} {{- end }} @@ -134,6 +142,17 @@ spec: - name: config configMap: name: {{ template "airflow_config" . }} + {{- if .Values.dags.persistence.enabled }} + - name: dags + persistentVolumeClaim: + claimName: {{ .Release.Name }}-dags + {{- else if .Values.dags.gitSync.enabled }} + - name: dags + emptyDir: {} + {{- if .Values.dags.gitSync.sshKeySecret }} + {{- include "git_sync_ssh_key_volume" . | indent 8 }} + {{- end }} + {{- end }} {{- if .Values.webserver.extraVolumes }} {{ toYaml .Values.webserver.extraVolumes | indent 8 }} {{- end }} diff --git a/chart/templates/workers/worker-deployment.yaml b/chart/templates/workers/worker-deployment.yaml index d5043a7..439843d 100644 --- a/chart/templates/workers/worker-deployment.yaml +++ b/chart/templates/workers/worker-deployment.yaml @@ -105,6 +105,9 @@ spec: {{- include "custom_airflow_environment" . | indent 10 }} {{- include "standard_airflow_environment" . | indent 10 }} containers: + {{- if and (.Values.dags.gitSync.enabled) (not .Values.dags.persistence.enabled) }} + {{- include "git_sync_container" . | indent 8 }} + {{- end }} - name: worker image: {{ template "airflow_image" . }} imagePullPolicy: {{ .Values.images.airflow.pullPolicy }} @@ -127,6 +130,10 @@ spec: subPath: airflow_local_settings.py readOnly: true {{- end }} +{{- if or .Values.dags.persistence.enabled .Values.dags.gitSync.enabled }} + - name: dags + mountPath: {{ template "airflow_dags_mount_path" . }} +{{- end }} env: {{- include "custom_airflow_environment" . | indent 10 }} {{- include "standard_airflow_environment" . | indent 10 }} @@ -142,6 +149,17 @@ spec: - name: config configMap: name: {{ template "airflow_config" . }} + {{- if .Values.dags.persistence.enabled }} + - name: dags + persistentVolumeClaim: + claimName: {{ template "airflow_dags_volume_claim" . }} + {{- else if .Values.dags.gitSync.enabled }} + - name: dags + emptyDir: {} + {{- if .Values.dags.gitSync.sshKeySecret }} + {{- include "git_sync_ssh_key_volume" . | indent 8 }} + {{- end }} + {{- end }} {{- if not $persistence }} - name: logs emptyDir: {} diff --git a/chart/tests/dags-persistent-volume-claim_test.yaml b/chart/tests/dags-persistent-volume-claim_test.yaml new file mode 100644 index 0000000..2e24615 --- /dev/null +++ b/chart/tests/dags-persistent-volume-claim_test.yaml @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +templates: + - dags-persistent-volume-claim.yaml +tests: + - it: should not generate a document if persistence is disabled + set: + dags: + persistence: + enabled: false + asserts: + - hasDocuments: + count: 0 + - it: should not generate a document when using an existingClaim + set: + dags: + persistence: + enabled: true + existingClaim: test-claim + asserts: + - hasDocuments: + count: 0 + - it: should generate a document if persistence is enabled & not using an existingClaim + set: + dags: + persistence: + enabled: true + existingClaim: ~ + asserts: + - hasDocuments: + count: 1 + - it: should set PVC details correctly + set: + dags: + persistence: + enabled: true + size: 1G + existingClaim: ~ + storageClass: "MyStorageClass" + accessMode: ReadWriteMany + asserts: + - equal: + path: spec + value: + accessModes: ["ReadWriteMany"] + resources: + requests: + storage: 1G + storageClassName: "MyStorageClass" diff --git a/chart/tests/git-sync-scheduler_test.yaml b/chart/tests/git-sync-scheduler_test.yaml new file mode 100644 index 0000000..10d2288 --- /dev/null +++ b/chart/tests/git-sync-scheduler_test.yaml @@ -0,0 +1,135 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +templates: + - scheduler/scheduler-deployment.yaml +tests: + - it: should add dags volume + set: + dags: + gitSync: + enabled: true + asserts: + - equal: + path: spec.template.spec.volumes[1].name + value: dags + - it: validate the git sync container spec + set: + dags: + gitSync: + enabled: true + containerName: git-sync-test + containerTag: test-tag + containerRepository: test-registry/test-repo + wait: 66 + maxFailures: 70 + subPath: "path1/path2" + dest: "test-dest" + root: "/git-root" + rev: HEAD + depth: 1 + repo: https://github.com/apache/airflow.git + branch: test-branch + sshKeySecret: ~ + credentialsSecret: ~ + knownHosts: ~ + persistence: + enabled: true + asserts: + - equal: + path: spec.template.spec.containers[1] + value: + name: git-sync-test + image: test-registry/test-repo:test-tag + env: + - name: GIT_SYNC_REV + value: HEAD + - name: GIT_SYNC_BRANCH + value: test-branch + - name: GIT_SYNC_REPO + value: https://github.com/apache/airflow.git + - name: GIT_SYNC_DEPTH + value: "1" + - name: GIT_SYNC_ROOT + value: /git-root + - name: GIT_SYNC_DEST + value: test-dest + - name: GIT_SYNC_ADD_USER + value: "true" + - name: GIT_SYNC_WAIT + value: "66" + - name: GIT_SYNC_MAX_SYNC_FAILURES + value: "70" + volumeMounts: + - mountPath: /git-root + name: dags + - it: validate if ssh params are added + set: + dags: + gitSync: + enabled: true + containerName: git-sync-test + sshKeySecret: ssh-secret + knownHosts: ~ + branch: test-branch + asserts: + - contains: + path: spec.template.spec.containers[1].env + content: + name: GIT_SSH_KEY_FILE + value: "/etc/git-secret/ssh" + - contains: + path: spec.template.spec.containers[1].env + content: + name: GIT_SYNC_SSH + value: "true" + - contains: + path: spec.template.spec.containers[1].env + content: + name: GIT_KNOWN_HOSTS + value: "false" + - contains: + path: spec.template.spec.volumes + content: + name: git-sync-ssh-key + secret: + secretName: ssh-secret + defaultMode: 288 + - it: should set username and pass env variables + set: + dags: + gitSync: + enabled: true + credentialsSecret: user-pass-secret + sshKeySecret: ~ + asserts: + - contains: + path: spec.template.spec.containers[1].env + content: + name: GIT_SYNC_USERNAME + valueFrom: + secretKeyRef: + name: user-pass-secret + key: GIT_SYNC_USERNAME + - contains: + path: spec.template.spec.containers[1].env + content: + name: GIT_SYNC_PASSWORD + valueFrom: + secretKeyRef: + name: user-pass-secret + key: GIT_SYNC_PASSWORD diff --git a/chart/tests/git-sync-webserver_test.yaml b/chart/tests/git-sync-webserver_test.yaml new file mode 100644 index 0000000..bdbb7c6 --- /dev/null +++ b/chart/tests/git-sync-webserver_test.yaml @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +templates: + - webserver/webserver-deployment.yaml +tests: + - it: should add dags volume to the webserver if git sync & peristence is enabled + set: + dags: + gitSync: + enabled: true + persistence: + enabled: true + asserts: + - equal: + path: spec.template.spec.volumes[1].name + value: dags + - it: should add dags volume to the webserver if git sync is enabled & peristence is disabled + set: + dags: + gitSync: + enabled: true + persistence: + enabled: false + asserts: + - equal: + path: spec.template.spec.volumes[1].name + value: dags + - it: should add git sync container to webserver if persistence is not enabled, but git sync is + set: + dags: + gitSync: + enabled: true + containerName: git-sync + persistence: + enabled: false + asserts: + - equal: + path: spec.template.spec.containers[0].name + value: git-sync + - it: should not add sync container to webserver if git sync and persistence are enabled + set: + dags: + gitSync: + enabled: true + container_name: git-sync + persistence: + enabled: true + asserts: + - notEqual: + path: spec.template.spec.containers[0].name + value: git-sync diff --git a/chart/tests/git-sync-worker_test.yaml b/chart/tests/git-sync-worker_test.yaml new file mode 100644 index 0000000..847a4dc --- /dev/null +++ b/chart/tests/git-sync-worker_test.yaml @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +templates: + - workers/worker-deployment.yaml +tests: + - it: should add dags volume to the worker if git sync & peristence is enabled + set: + executor: CeleryExecutor + dags: + persistence: + enabled: true + gitSync: + enabled: true + asserts: + - equal: + path: spec.template.spec.volumes[1].name + value: dags + - it: should add dags volume to the worker if git sync is enabled & peristence is disabled + set: + executor: CeleryExecutor + dags: + gitSync: + enabled: true + persistence: + enabled: false + asserts: + - equal: + path: spec.template.spec.volumes[1].name + value: dags + - it: should add git sync container to worker if persistence is not enabled, but git sync is + set: + executor: CeleryExecutor + dags: + gitSync: + enabled: true + containerName: git-sync + persistence: + enabled: false + asserts: + - equal: + path: spec.template.spec.containers[0].name + value: git-sync + - it: should not add sync container to worker if git sync and persistence are enabled + set: + executor: CeleryExecutor + dags: + gitSync: + enabled: true + containerName: git-sync + persistence: + enabled: true + asserts: + - notEqual: + path: spec.template.spec.containers[0].name + value: git-sync diff --git a/chart/values.yaml b/chart/values.yaml index 1918419..8fa7ab6 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +--- # Default values for airflow. # This is a YAML-formatted file. # Declare variables to be passed into your templates. @@ -434,3 +434,75 @@ postgresql: enabled: true postgresqlPassword: postgres postgresqlUsername: postgres + +# Authentication backend used for the experimental API +api: + authBackend: airflow.api.auth.backend.deny_all + +# Git sync +dags: + persistence: + # Enable persistent volume for storing dags + enabled: false + # Volume size for dags + size: 1Gi + # If using a custom storageClass, pass name here + storageClassName: + # access mode of the persistent volume + accessMode: ReadWriteMany + ## the name of an existing PVC to use + existingClaim: ~ + gitSync: + enabled: false + # git repo clone url + # ssh examples ssh://[email protected]/apache/airflow.git + # [email protected]:apache/airflow.git + # https example: https://github.com/apache/airflow.git + repo: https://github.com/apache/airflow.git + branch: v1-10-stable + rev: HEAD + root: "/git" + dest: "repo" + depth: 1 + # the number of consecutive failures allowed before aborting + maxFailures: 0 + # subpath within the repo where dags are located + # should be "" if dags are at repo root + subPath: "tests/dags" + # if your repo needs a user name password + # you can load them to a k8s secret like the one below + # --- + # apiVersion: v1 + # kind: Secret + # metadata: + # name: git-credentials + # data: + # GIT_SYNC_USERNAME: <base64_encoded_git_username> + # GIT_SYNC_PASSWORD: <base64_encoded_git_password> + # and specify the name of the secret below + #credentialsSecret: git-credentials + # + # + # If you are using an ssh clone url, you can load + # the ssh private key to a k8s secret like the one below + # --- + # apiVersion: v1 + # kind: Secret + # metadata: + # name: airflow-ssh-secret + # data: + # # key needs to be gitSshKey + # gitSshKey: <base64_encoded_data> + # and specify the name of the secret below + #sshKeySecret: airflow-ssh-secret + # If you are using an ssh private key, you can additionally + # specify the content of your known_hosts file, example: + #knownHosts: | + # <host1>,<ip1> <key1> + # <host2>,<ip2> <key2> + # interval between git sync attempts in seconds + wait: 60 + # git sync image details + containerRepository: k8s.gcr.io/git-sync + containerTag: v3.1.6 + containerName: git-sync diff --git a/scripts/ci/ci_run_helm_testing.sh b/scripts/ci/ci_run_helm_testing.sh new file mode 100755 index 0000000..0a267d4 --- /dev/null +++ b/scripts/ci/ci_run_helm_testing.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +echo "Running helm tests" + +CHART_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/../../chart/" + +echo "Chart directory is $CHART_DIR" + +docker run -w /airflow-chart -v "$CHART_DIR":/airflow-chart \ + --entrypoint /bin/sh \ + aneeshkj/helm-unittest \ + -c "helm repo add stable https://kubernetes-charts.storage.googleapis.com; helm dependency update ; helm unittest ." diff --git a/tests/kubernetes/test_worker_configuration.py b/tests/kubernetes/test_worker_configuration.py index e57ae68..a94a112 100644 --- a/tests/kubernetes/test_worker_configuration.py +++ b/tests/kubernetes/test_worker_configuration.py @@ -216,6 +216,7 @@ class TestKubernetesWorkerConfiguration(unittest.TestCase): env = init_containers[0].env self.assertIn(k8s.V1EnvVar(name='GIT_SSH_KEY_FILE', value='/etc/git-secret/ssh'), env) + self.assertIn(k8s.V1EnvVar(name='GIT_SYNC_ADD_USER', value='true'), env) self.assertIn(k8s.V1EnvVar(name='GIT_KNOWN_HOSTS', value='false'), env) self.assertIn(k8s.V1EnvVar(name='GIT_SYNC_SSH', value='true'), env) @@ -236,6 +237,7 @@ class TestKubernetesWorkerConfiguration(unittest.TestCase): env = init_containers[0].env self.assertIn(k8s.V1EnvVar(name='GIT_SSH_KEY_FILE', value='/etc/git-secret/ssh'), env) + self.assertIn(k8s.V1EnvVar(name='GIT_SYNC_ADD_USER', value='true'), env) self.assertIn(k8s.V1EnvVar(name='GIT_KNOWN_HOSTS', value='true'), env) self.assertIn(k8s.V1EnvVar( name='GIT_SSH_KNOWN_HOSTS_FILE', @@ -262,6 +264,7 @@ class TestKubernetesWorkerConfiguration(unittest.TestCase): env = init_containers[0].env self.assertNotIn(k8s.V1EnvVar(name='GIT_SSH_KEY_FILE', value='/etc/git-secret/ssh'), env) + self.assertNotIn(k8s.V1EnvVar(name='GIT_SYNC_ADD_USER', value='true'), env) self.assertIn(k8s.V1EnvVar(name='GIT_SYNC_USERNAME', value='git_user'), env) self.assertIn(k8s.V1EnvVar(name='GIT_SYNC_PASSWORD', value='git_password'), env) self.assertIn(k8s.V1EnvVar(name='GIT_KNOWN_HOSTS', value='false'), env) @@ -290,6 +293,7 @@ class TestKubernetesWorkerConfiguration(unittest.TestCase): env = init_containers[0].env self.assertNotIn(k8s.V1EnvVar(name='GIT_SSH_KEY_FILE', value='/etc/git-secret/ssh'), env) + self.assertNotIn(k8s.V1EnvVar(name='GIT_SYNC_ADD_USER', value='true'), env) self.assertIn(k8s.V1EnvVar(name='GIT_SYNC_USERNAME', value='git_user'), env) self.assertIn(k8s.V1EnvVar(name='GIT_SYNC_PASSWORD', value='git_password'), env) self.assertIn(k8s.V1EnvVar(name='GIT_KNOWN_HOSTS', value='true'), env)
