Hi folks,
I am trying to create multiple instances(containers) out of single image. I
also need a docker volume to have the communication between the containers.
This thing is working perfectly fine when I am running the containers
manually(through command line in linux) but when I'm trying to orchestrate
it through airflow, I am can't see any file inside the volume(my local host
directory).
PS: Please find the attached dag file and suggest any changes needed.
Thanks
Regards
Shikhar Sharma
from airflow import DAG
from airflow.operators.docker_operator import DockerOperator
from datetime import datetime, timedelta
default_args = {
'owner':'Shikhar Sharma',
'depends_on_past':False,
'start_date':datetime(2018,6,4),
}
dag = DAG('docker_sample', default_args=default_args, schedule_interval='@once')
t1 = DockerOperator(api_version='1.37',
task_id='docker_1',
image='model_1:python',
command= "['/bin/sh', '-c', 'cd Docker-assignment-multi-container', 'python exploratory.py', 'cd ..', 'mkdir result', 'cp /Docker-assignment-multi-container/*.pkl /result']",
dag=dag,
#tmp_dir='/home/shikhar/container_output',
volumes=['/home/shikhar/container_output:/result'])
t2 = DockerOperator(api_version='1.37',
task_id='docker_2',
image='model_1:python',
command="['/bin/sh', '-c', 'cd Docker-assignment-multi-container', 'git pull', 'python model_1.py', 'cd ..', 'mkdir result', 'cp /Docker-assignment-multi-container/model_1_score_file.pkl /result']",
dag=dag,
#tmp_dir='/home/shikhar/container_output',
volumes=['/home/shikhar/container_output:/result']
)
t3 = DockerOperator(api_version='1.37',
task_id='docker_3',
image='model_1:python',
command="['/bin/sh', '-c', 'cd Docker-assignment-multi-container', 'git pull', 'python model_2.py', 'cd ..', 'mkdir result', 'cp /Docker-assignment-multi-container/model_2_score_file.pkl /result']",
dag=dag,
#tmp_dir='/home/shikhar/container_output',
volumes=['/home/shikhar/container_output:/result']
)
t4 = DockerOperator(api_version='1.37',
task_id='docker_4',
image='model_1:python',
command="['/bin/sh', '-c', 'cd Docker-assignment-multi-container', 'git pull', 'python output.py', 'cd ..', 'mkdir result', 'cp /Docker-assignment-multi-container/dataOn*.txt /result']",
dag=dag,
#tmp_dir='/home/shikhar/container_output',
volumes=['/home/shikhar/container_output:/result']
)
t2.set_upstream(t1)
t3.set_upstream(t1)
t4.set_upstream(t3)
t4.set_upstream(t2)