http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/28a3eb60/_modules/airflow/operators/docker_operator.html ---------------------------------------------------------------------- diff --git a/_modules/airflow/operators/docker_operator.html b/_modules/airflow/operators/docker_operator.html index 06aa65e..10ff19a 100644 --- a/_modules/airflow/operators/docker_operator.html +++ b/_modules/airflow/operators/docker_operator.html @@ -13,6 +13,8 @@ + + @@ -30,6 +32,9 @@ + <link rel="index" title="Index" + href="../../../genindex.html"/> + <link rel="search" title="Search" href="../../../search.html"/> <link rel="top" title="Airflow Documentation" href="../../../index.html"/> <link rel="up" title="Module code" href="../../index.html"/> @@ -40,6 +45,7 @@ <body class="wy-body-for-nav" role="document"> + <div class="wy-grid-for-nav"> @@ -76,7 +82,10 @@ - <ul> + + + + <ul> <li class="toctree-l1"><a class="reference internal" href="../../../project.html">Project</a></li> <li class="toctree-l1"><a class="reference internal" href="../../../license.html">License</a></li> <li class="toctree-l1"><a class="reference internal" href="../../../start.html">Quick Start</a></li> @@ -90,6 +99,8 @@ <li class="toctree-l1"><a class="reference internal" href="../../../scheduler.html">Scheduling & Triggers</a></li> <li class="toctree-l1"><a class="reference internal" href="../../../plugins.html">Plugins</a></li> <li class="toctree-l1"><a class="reference internal" href="../../../security.html">Security</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../api.html">Experimental Rest API</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../integration.html">Integration</a></li> <li class="toctree-l1"><a class="reference internal" href="../../../faq.html">FAQ</a></li> <li class="toctree-l1"><a class="reference internal" href="../../../code.html">API Reference</a></li> </ul> @@ -104,8 +115,10 @@ <nav class="wy-nav-top" role="navigation" aria-label="top navigation"> - <i data-toggle="wy-nav-top" class="fa fa-bars"></i> - <a href="../../../index.html">Airflow</a> + + <i data-toggle="wy-nav-top" class="fa fa-bars"></i> + <a href="../../../index.html">Airflow</a> + </nav> @@ -118,19 +131,36 @@ + + + + + + + + + + <div role="navigation" aria-label="breadcrumbs navigation"> + <ul class="wy-breadcrumbs"> - <li><a href="../../../index.html">Docs</a> »</li> - + + <li><a href="../../../index.html">Docs</a> »</li> + <li><a href="../../index.html">Module code</a> »</li> - - <li>airflow.operators.docker_operator</li> + + <li>airflow.operators.docker_operator</li> + + <li class="wy-breadcrumbs-aside"> - + </li> + </ul> + + <hr/> </div> <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> @@ -152,12 +182,13 @@ <span class="c1"># limitations under the License.</span> <span class="kn">import</span> <span class="nn">json</span> -<span class="kn">import</span> <span class="nn">logging</span> -<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="kn">import</span> <span class="n">AirflowException</span> -<span class="kn">from</span> <span class="nn">airflow.models</span> <span class="kn">import</span> <span class="n">BaseOperator</span> -<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="kn">import</span> <span class="n">apply_defaults</span> -<span class="kn">from</span> <span class="nn">airflow.utils.file</span> <span class="kn">import</span> <span class="n">TemporaryDirectory</span> -<span class="kn">from</span> <span class="nn">docker</span> <span class="kn">import</span> <span class="n">Client</span><span class="p">,</span> <span class="n">tls</span> + +<span class="kn">from</span> <span class="nn">airflow.hooks.docker_hook</span> <span class="k">import</span> <span class="n">DockerHook</span> +<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="k">import</span> <span class="n">AirflowException</span> +<span class="kn">from</span> <span class="nn">airflow.models</span> <span class="k">import</span> <span class="n">BaseOperator</span> +<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="k">import</span> <span class="n">apply_defaults</span> +<span class="kn">from</span> <span class="nn">airflow.utils.file</span> <span class="k">import</span> <span class="n">TemporaryDirectory</span> +<span class="kn">from</span> <span class="nn">docker</span> <span class="k">import</span> <span class="n">Client</span><span class="p">,</span> <span class="n">tls</span> <span class="kn">import</span> <span class="nn">ast</span> @@ -169,9 +200,14 @@ <span class="sd"> that together exceed the default disk size of 10GB in a container. The path to the mounted</span> <span class="sd"> directory can be accessed via the environment variable ``AIRFLOW_TMP_DIR``.</span> +<span class="sd"> If a login to a private registry is required prior to pulling the image, a</span> +<span class="sd"> Docker connection needs to be configured in Airflow and the connection ID</span> +<span class="sd"> be provided with the parameter ``docker_conn_id``.</span> + <span class="sd"> :param image: Docker image from which to create the container.</span> <span class="sd"> :type image: str</span> -<span class="sd"> :param api_version: Remote API version.</span> +<span class="sd"> :param api_version: Remote API version. Set to ``auto`` to automatically</span> +<span class="sd"> detect the server's version.</span> <span class="sd"> :type api_version: str</span> <span class="sd"> :param command: Command to be run in the container.</span> <span class="sd"> :type command: str or list</span> @@ -180,10 +216,11 @@ <span class="sd"> https://docs.docker.com/engine/reference/run/#cpu-share-constraint</span> <span class="sd"> :type cpus: float</span> <span class="sd"> :param docker_url: URL of the host running the docker daemon.</span> +<span class="sd"> Default is unix://var/run/docker.sock</span> <span class="sd"> :type docker_url: str</span> <span class="sd"> :param environment: Environment variables to set in the container.</span> <span class="sd"> :type environment: dict</span> -<span class="sd"> :param force_pull: Pull the docker image on every run.</span> +<span class="sd"> :param force_pull: Pull the docker image on every run. Default is false.</span> <span class="sd"> :type force_pull: bool</span> <span class="sd"> :param mem_limit: Maximum amount of memory the container can use. Either a float value, which</span> <span class="sd"> represents the limit in bytes, or a string like ``128m`` or ``1g``.</span> @@ -209,11 +246,16 @@ <span class="sd"> :type user: int or str</span> <span class="sd"> :param volumes: List of volumes to mount into the container, e.g.</span> <span class="sd"> ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``.</span> +<span class="sd"> :param working_dir: Working directory to set on the container (equivalent to the -w switch</span> +<span class="sd"> the docker client)</span> +<span class="sd"> :type working_dir: str</span> <span class="sd"> :param xcom_push: Does the stdout will be pushed to the next step using XCom.</span> <span class="sd"> The default is False.</span> <span class="sd"> :type xcom_push: bool</span> <span class="sd"> :param xcom_all: Push all the stdout or just the last line. The default is False (last line).</span> <span class="sd"> :type xcom_all: bool</span> +<span class="sd"> :param docker_conn_id: ID of the Airflow connection to use</span> +<span class="sd"> :type docker_conn_id: str</span> <span class="sd"> """</span> <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'command'</span><span class="p">,)</span> <span class="n">template_ext</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'.sh'</span><span class="p">,</span> <span class="s1">'.bash'</span><span class="p">,)</span> @@ -222,28 +264,30 @@ <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span> <span class="bp">self</span><span class="p">,</span> <span class="n">image</span><span class="p">,</span> - <span class="n">api_version</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> - <span class="n">command</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> + <span class="n">api_version</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">command</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">cpus</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span> <span class="n">docker_url</span><span class="o">=</span><span class="s1">'unix://var/run/docker.sock'</span><span class="p">,</span> - <span class="n">environment</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> - <span class="n">force_pull</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> - <span class="n">mem_limit</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> - <span class="n">network_mode</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> - <span class="n">tls_ca_cert</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> - <span class="n">tls_client_cert</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> - <span class="n">tls_client_key</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> - <span class="n">tls_hostname</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> - <span class="n">tls_ssl_version</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> + <span class="n">environment</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">force_pull</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> + <span class="n">mem_limit</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">network_mode</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">tls_ca_cert</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">tls_client_cert</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">tls_client_key</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">tls_hostname</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">tls_ssl_version</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">tmp_dir</span><span class="o">=</span><span class="s1">'/tmp/airflow'</span><span class="p">,</span> - <span class="n">user</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> - <span class="n">volumes</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> - <span class="n">xcom_push</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> - <span class="n">xcom_all</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> + <span class="n">user</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">volumes</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">working_dir</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="n">xcom_push</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> + <span class="n">xcom_all</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> + <span class="n">docker_conn_id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> - <span class="nb">super</span><span class="p">(</span><span class="n">DockerOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> + <span class="nb">super</span><span class="p">(</span><span class="n">DockerOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">api_version</span> <span class="o">=</span> <span class="n">api_version</span> <span class="bp">self</span><span class="o">.</span><span class="n">command</span> <span class="o">=</span> <span class="n">command</span> <span class="bp">self</span><span class="o">.</span><span class="n">cpus</span> <span class="o">=</span> <span class="n">cpus</span> @@ -261,27 +305,35 @@ <span class="bp">self</span><span class="o">.</span><span class="n">tmp_dir</span> <span class="o">=</span> <span class="n">tmp_dir</span> <span class="bp">self</span><span class="o">.</span><span class="n">user</span> <span class="o">=</span> <span class="n">user</span> <span class="bp">self</span><span class="o">.</span><span class="n">volumes</span> <span class="o">=</span> <span class="n">volumes</span> <span class="ow">or</span> <span class="p">[]</span> - <span class="bp">self</span><span class="o">.</span><span class="n">xcom_push</span> <span class="o">=</span> <span class="n">xcom_push</span> + <span class="bp">self</span><span class="o">.</span><span class="n">working_dir</span> <span class="o">=</span> <span class="n">working_dir</span> + <span class="bp">self</span><span class="o">.</span><span class="n">xcom_push_flag</span> <span class="o">=</span> <span class="n">xcom_push</span> <span class="bp">self</span><span class="o">.</span><span class="n">xcom_all</span> <span class="o">=</span> <span class="n">xcom_all</span> + <span class="bp">self</span><span class="o">.</span><span class="n">docker_conn_id</span> <span class="o">=</span> <span class="n">docker_conn_id</span> - <span class="bp">self</span><span class="o">.</span><span class="n">cli</span> <span class="o">=</span> <span class="bp">None</span> - <span class="bp">self</span><span class="o">.</span><span class="n">container</span> <span class="o">=</span> <span class="bp">None</span> + <span class="bp">self</span><span class="o">.</span><span class="n">cli</span> <span class="o">=</span> <span class="kc">None</span> + <span class="bp">self</span><span class="o">.</span><span class="n">container</span> <span class="o">=</span> <span class="kc">None</span> + + <span class="k">def</span> <span class="nf">get_hook</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="k">return</span> <span class="n">DockerHook</span><span class="p">(</span> + <span class="n">docker_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">docker_conn_id</span><span class="p">,</span> + <span class="n">base_url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">base_url</span><span class="p">,</span> + <span class="n">version</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">api_version</span><span class="p">,</span> + <span class="n">tls</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__get_tls_config</span><span class="p">()</span> + <span class="p">)</span> <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span> - <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Starting docker container from image '</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">image</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Starting docker container from image </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">image</span><span class="p">)</span> - <span class="n">tls_config</span> <span class="o">=</span> <span class="bp">None</span> - <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_ca_cert</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_client_cert</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_client_key</span><span class="p">:</span> - <span class="n">tls_config</span> <span class="o">=</span> <span class="n">tls</span><span class="o">.</span><span class="n">TLSConfig</span><span class="p">(</span> - <span class="n">ca_cert</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_ca_cert</span><span class="p">,</span> - <span class="n">client_cert</span><span class="o">=</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_client_cert</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_client_key</span><span class="p">),</span> - <span class="n">verify</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> - <span class="n">ssl_version</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_ssl_version</span><span class="p">,</span> - <span class="n">assert_hostname</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_hostname</span> - <span class="p">)</span> - <span class="bp">self</span><span class="o">.</span><span class="n">docker_url</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">docker_url</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'tcp://'</span><span class="p">,</span> <span class="s1">'https://'</span><span class="p">)</span> + <span class="n">tls_config</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__get_tls_config</span><span class="p">()</span> - <span class="bp">self</span><span class="o">.</span><span class="n">cli</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span><span class="n">base_url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">docker_url</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">api_version</span><span class="p">,</span> <span class="n">tls</span><span class="o">=</span><span class="n">tls_config</span><span class="p">)</span> + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">docker_conn_id</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">cli</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_hook</span><span class="p">()</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span> + <span class="k">else</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">cli</span> <span class="o">=</span> <span class="n">Client</span><span class="p">(</span> + <span class="n">base_url</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">docker_url</span><span class="p">,</span> + <span class="n">version</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">api_version</span><span class="p">,</span> + <span class="n">tls</span><span class="o">=</span><span class="n">tls_config</span> + <span class="p">)</span> <span class="k">if</span> <span class="s1">':'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">image</span><span class="p">:</span> <span class="n">image</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">image</span> <span class="o">+</span> <span class="s1">':latest'</span> @@ -289,16 +341,16 @@ <span class="n">image</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">image</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">force_pull</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">images</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">image</span><span class="p">))</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> - <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Pulling docker image '</span> <span class="o">+</span> <span class="n">image</span><span class="p">)</span> - <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">pull</span><span class="p">(</span><span class="n">image</span><span class="p">,</span> <span class="n">stream</span><span class="o">=</span><span class="bp">True</span><span class="p">):</span> - <span class="n">output</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">l</span><span class="p">)</span> - <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"{}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">output</span><span class="p">[</span><span class="s1">'status'</span><span class="p">]))</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Pulling docker image </span><span class="si">%s</span><span class="s1">'</span><span class="p">,</span> <span class="n">image</span><span class="p">)</span> + <span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">pull</span><span class="p">(</span><span class="n">image</span><span class="p">,</span> <span class="n">stream</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> + <span class="n">output</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">l</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">))</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"</span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">output</span><span class="p">[</span><span class="s1">'status'</span><span class="p">])</span> <span class="n">cpu_shares</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="nb">round</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cpus</span> <span class="o">*</span> <span class="mi">1024</span><span class="p">))</span> <span class="k">with</span> <span class="n">TemporaryDirectory</span><span class="p">(</span><span class="n">prefix</span><span class="o">=</span><span class="s1">'airflowtmp'</span><span class="p">)</span> <span class="k">as</span> <span class="n">host_tmp_dir</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">environment</span><span class="p">[</span><span class="s1">'AIRFLOW_TMP_DIR'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp_dir</span> - <span class="bp">self</span><span class="o">.</span><span class="n">volumes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">'{0}:{1}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">host_tmp_dir</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp_dir</span><span class="p">))</span> + <span class="bp">self</span><span class="o">.</span><span class="n">volumes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">'</span><span class="si">{0}</span><span class="s1">:</span><span class="si">{1}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">host_tmp_dir</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmp_dir</span><span class="p">))</span> <span class="bp">self</span><span class="o">.</span><span class="n">container</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">create_container</span><span class="p">(</span> <span class="n">command</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">get_command</span><span class="p">(),</span> @@ -308,35 +360,55 @@ <span class="n">network_mode</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">network_mode</span><span class="p">),</span> <span class="n">image</span><span class="o">=</span><span class="n">image</span><span class="p">,</span> <span class="n">mem_limit</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">mem_limit</span><span class="p">,</span> - <span class="n">user</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">user</span> + <span class="n">user</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">user</span><span class="p">,</span> + <span class="n">working_dir</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">working_dir</span> <span class="p">)</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">start</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">])</span> <span class="n">line</span> <span class="o">=</span> <span class="s1">''</span> - <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">logs</span><span class="p">(</span><span class="n">container</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">],</span> <span class="n">stream</span><span class="o">=</span><span class="bp">True</span><span class="p">):</span> - <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"{}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()))</span> + <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">logs</span><span class="p">(</span><span class="n">container</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">],</span> <span class="n">stream</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> + <span class="n">line</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> + <span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">line</span><span class="p">,</span> <span class="s1">'decode'</span><span class="p">):</span> + <span class="n">line</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">line</span><span class="p">)</span> <span class="n">exit_code</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">wait</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">])</span> <span class="k">if</span> <span class="n">exit_code</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span> <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s1">'docker container failed'</span><span class="p">)</span> - <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">xcom_push</span><span class="p">:</span> - <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">logs</span><span class="p">(</span><span class="n">container</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">])</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">xcom_all</span> <span class="k">else</span> <span class="nb">str</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span> + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">xcom_push_flag</span><span class="p">:</span> + <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">logs</span><span class="p">(</span><span class="n">container</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">])</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">xcom_all</span> <span class="k">else</span> <span class="nb">str</span><span class="p">(</span><span class="n">line</span><span class="p">)</span> <span class="k">def</span> <span class="nf">get_command</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> - <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">command</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">command</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'['</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">command</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">command</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'['</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> <span class="n">commands</span> <span class="o">=</span> <span class="n">ast</span><span class="o">.</span><span class="n">literal_eval</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">command</span><span class="p">)</span> <span class="k">else</span><span class="p">:</span> <span class="n">commands</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">command</span> <span class="k">return</span> <span class="n">commands</span> <span class="k">def</span> <span class="nf">on_kill</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> - <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span> - <span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Stopping docker container'</span><span class="p">)</span> - <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">stop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">])</span></div> + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cli</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Stopping docker container'</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">cli</span><span class="o">.</span><span class="n">stop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">container</span><span class="p">[</span><span class="s1">'Id'</span><span class="p">])</span> + + <span class="k">def</span> <span class="nf">__get_tls_config</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span> + <span class="n">tls_config</span> <span class="o">=</span> <span class="kc">None</span> + <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_ca_cert</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_client_cert</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_client_key</span><span class="p">:</span> + <span class="n">tls_config</span> <span class="o">=</span> <span class="n">tls</span><span class="o">.</span><span class="n">TLSConfig</span><span class="p">(</span> + <span class="n">ca_cert</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_ca_cert</span><span class="p">,</span> + <span class="n">client_cert</span><span class="o">=</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_client_cert</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tls_client_key</span><span class="p">),</span> + <span class="n">verify</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> + <span class="n">ssl_version</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_ssl_version</span><span class="p">,</span> + <span class="n">assert_hostname</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tls_hostname</span> + <span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">docker_url</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">docker_url</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'tcp://'</span><span class="p">,</span> <span class="s1">'https://'</span><span class="p">)</span> + <span class="k">return</span> <span class="n">tls_config</span></div> </pre></div> </div> + <div class="articleComments"> + + </div> </div> <footer> @@ -369,7 +441,8 @@ VERSION:'', COLLAPSE_INDEX:false, FILE_SUFFIX:'.html', - HAS_SOURCE: true + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt' }; </script> <script type="text/javascript" src="../../../_static/jquery.js"></script>
http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/28a3eb60/_modules/airflow/operators/redshift_to_s3_operator.html ---------------------------------------------------------------------- diff --git a/_modules/airflow/operators/redshift_to_s3_operator.html b/_modules/airflow/operators/redshift_to_s3_operator.html new file mode 100644 index 0000000..6b8e248 --- /dev/null +++ b/_modules/airflow/operators/redshift_to_s3_operator.html @@ -0,0 +1,337 @@ + + +<!DOCTYPE html> +<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> +<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> +<head> + <meta charset="utf-8"> + + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + <title>airflow.operators.redshift_to_s3_operator — Airflow Documentation</title> + + + + + + + + + + + + + + + + + + <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" /> + + + + + + <link rel="index" title="Index" + href="../../../genindex.html"/> + <link rel="search" title="Search" href="../../../search.html"/> + <link rel="top" title="Airflow Documentation" href="../../../index.html"/> + <link rel="up" title="Module code" href="../../index.html"/> + + + <script src="../../../_static/js/modernizr.min.js"></script> + +</head> + +<body class="wy-body-for-nav" role="document"> + + + <div class="wy-grid-for-nav"> + + + <nav data-toggle="wy-nav-shift" class="wy-nav-side"> + <div class="wy-side-scroll"> + <div class="wy-side-nav-search"> + + + + <a href="../../../index.html" class="icon icon-home"> Airflow + + + + </a> + + + + + + + +<div role="search"> + <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get"> + <input type="text" name="q" placeholder="Search docs" /> + <input type="hidden" name="check_keywords" value="yes" /> + <input type="hidden" name="area" value="default" /> + </form> +</div> + + + </div> + + <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> + + + + + + + <ul> +<li class="toctree-l1"><a class="reference internal" href="../../../project.html">Project</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../license.html">License</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../start.html">Quick Start</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../installation.html">Installation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../tutorial.html">Tutorial</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../configuration.html">Configuration</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../ui.html">UI / Screenshots</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../concepts.html">Concepts</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../profiling.html">Data Profiling</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../cli.html">Command Line Interface</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../scheduler.html">Scheduling & Triggers</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../plugins.html">Plugins</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../security.html">Security</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../api.html">Experimental Rest API</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../integration.html">Integration</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../faq.html">FAQ</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../code.html">API Reference</a></li> +</ul> + + + + </div> + </div> + </nav> + + <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> + + + <nav class="wy-nav-top" role="navigation" aria-label="top navigation"> + + <i data-toggle="wy-nav-top" class="fa fa-bars"></i> + <a href="../../../index.html">Airflow</a> + + </nav> + + + + <div class="wy-nav-content"> + <div class="rst-content"> + + + + + + + + + + + + + + + + +<div role="navigation" aria-label="breadcrumbs navigation"> + + <ul class="wy-breadcrumbs"> + + <li><a href="../../../index.html">Docs</a> »</li> + + <li><a href="../../index.html">Module code</a> »</li> + + <li>airflow.operators.redshift_to_s3_operator</li> + + + <li class="wy-breadcrumbs-aside"> + + + + </li> + + </ul> + + + <hr/> +</div> + <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> + <div itemprop="articleBody"> + + <h1>Source code for airflow.operators.redshift_to_s3_operator</h1><div class="highlight"><pre> +<span></span><span class="c1"># -*- coding: utf-8 -*-</span> +<span class="c1">#</span> +<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span> +<span class="c1"># you may not use this file except in compliance with the License.</span> +<span class="c1"># You may obtain a copy of the License at</span> +<span class="c1">#</span> +<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span> +<span class="c1">#</span> +<span class="c1"># Unless required by applicable law or agreed to in writing, software</span> +<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span> +<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> +<span class="c1"># See the License for the specific language governing permissions and</span> +<span class="c1"># limitations under the License.</span> +<span class="kn">from</span> <span class="nn">airflow.hooks.postgres_hook</span> <span class="k">import</span> <span class="n">PostgresHook</span> +<span class="kn">from</span> <span class="nn">airflow.hooks.S3_hook</span> <span class="k">import</span> <span class="n">S3Hook</span> +<span class="kn">from</span> <span class="nn">airflow.models</span> <span class="k">import</span> <span class="n">BaseOperator</span> +<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="k">import</span> <span class="n">apply_defaults</span> + + +<div class="viewcode-block" id="RedshiftToS3Transfer"><a class="viewcode-back" href="../../../integration.html#airflow.operators.redshift_to_s3_operator.RedshiftToS3Transfer">[docs]</a><span class="k">class</span> <span class="nc">RedshiftToS3Transfer</span><span class="p">(</span><span class="n">BaseOperator</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Executes an UNLOAD command to s3 as a CSV with headers</span> +<span class="sd"> :param schema: reference to a specific schema in redshift database</span> +<span class="sd"> :type schema: string</span> +<span class="sd"> :param table: reference to a specific table in redshift database</span> +<span class="sd"> :type table: string</span> +<span class="sd"> :param s3_bucket: reference to a specific S3 bucket</span> +<span class="sd"> :type s3_bucket: string</span> +<span class="sd"> :param s3_key: reference to a specific S3 key</span> +<span class="sd"> :type s3_key: string</span> +<span class="sd"> :param redshift_conn_id: reference to a specific redshift database</span> +<span class="sd"> :type redshift_conn_id: string</span> +<span class="sd"> :param aws_conn_id: reference to a specific S3 connection</span> +<span class="sd"> :type aws_conn_id: string</span> +<span class="sd"> :param options: reference to a list of UNLOAD options</span> +<span class="sd"> :type options: list</span> +<span class="sd"> """</span> + + <span class="n">template_fields</span> <span class="o">=</span> <span class="p">()</span> + <span class="n">template_ext</span> <span class="o">=</span> <span class="p">()</span> + <span class="n">ui_color</span> <span class="o">=</span> <span class="s1">'#ededed'</span> + + <span class="nd">@apply_defaults</span> + <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span> + <span class="bp">self</span><span class="p">,</span> + <span class="n">schema</span><span class="p">,</span> + <span class="n">table</span><span class="p">,</span> + <span class="n">s3_bucket</span><span class="p">,</span> + <span class="n">s3_key</span><span class="p">,</span> + <span class="n">redshift_conn_id</span><span class="o">=</span><span class="s1">'redshift_default'</span><span class="p">,</span> + <span class="n">aws_conn_id</span><span class="o">=</span><span class="s1">'aws_default'</span><span class="p">,</span> + <span class="n">unload_options</span><span class="o">=</span><span class="nb">tuple</span><span class="p">(),</span> + <span class="n">autocommit</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> + <span class="n">parameters</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> + <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> + <span class="nb">super</span><span class="p">(</span><span class="n">RedshiftToS3Transfer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">schema</span> <span class="o">=</span> <span class="n">schema</span> + <span class="bp">self</span><span class="o">.</span><span class="n">table</span> <span class="o">=</span> <span class="n">table</span> + <span class="bp">self</span><span class="o">.</span><span class="n">s3_bucket</span> <span class="o">=</span> <span class="n">s3_bucket</span> + <span class="bp">self</span><span class="o">.</span><span class="n">s3_key</span> <span class="o">=</span> <span class="n">s3_key</span> + <span class="bp">self</span><span class="o">.</span><span class="n">redshift_conn_id</span> <span class="o">=</span> <span class="n">redshift_conn_id</span> + <span class="bp">self</span><span class="o">.</span><span class="n">aws_conn_id</span> <span class="o">=</span> <span class="n">aws_conn_id</span> + <span class="bp">self</span><span class="o">.</span><span class="n">unload_options</span> <span class="o">=</span> <span class="n">unload_options</span> + <span class="bp">self</span><span class="o">.</span><span class="n">autocommit</span> <span class="o">=</span> <span class="n">autocommit</span> + <span class="bp">self</span><span class="o">.</span><span class="n">parameters</span> <span class="o">=</span> <span class="n">parameters</span> + + <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span> + <span class="bp">self</span><span class="o">.</span><span class="n">hook</span> <span class="o">=</span> <span class="n">PostgresHook</span><span class="p">(</span><span class="n">postgres_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">redshift_conn_id</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">s3</span> <span class="o">=</span> <span class="n">S3Hook</span><span class="p">(</span><span class="n">aws_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">aws_conn_id</span><span class="p">)</span> + <span class="n">a_key</span><span class="p">,</span> <span class="n">s_key</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3</span><span class="o">.</span><span class="n">get_credentials</span><span class="p">()</span> + <span class="n">unload_options</span> <span class="o">=</span> <span class="s1">'</span><span class="se">\n\t\t\t</span><span class="s1">'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">unload_options</span><span class="p">)</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Retrieving headers from </span><span class="si">%s</span><span class="s2">.</span><span class="si">%s</span><span class="s2">..."</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="p">)</span> + + <span class="n">columns_query</span> <span class="o">=</span> <span class="s2">"""SELECT column_name</span> +<span class="s2"> FROM information_schema.columns</span> +<span class="s2"> WHERE table_schema = '</span><span class="si">{0}</span><span class="s2">'</span> +<span class="s2"> AND table_name = '</span><span class="si">{1}</span><span class="s2">'</span> +<span class="s2"> ORDER BY ordinal_position</span> +<span class="s2"> """</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="p">)</span> + + <span class="n">cursor</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hook</span><span class="o">.</span><span class="n">get_conn</span><span class="p">()</span><span class="o">.</span><span class="n">cursor</span><span class="p">()</span> + <span class="n">cursor</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span><span class="n">columns_query</span><span class="p">)</span> + <span class="n">rows</span> <span class="o">=</span> <span class="n">cursor</span><span class="o">.</span><span class="n">fetchall</span><span class="p">()</span> + <span class="n">columns</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">row</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">rows</span><span class="p">)</span> + <span class="n">column_names</span> <span class="o">=</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">c</span><span class="p">:</span> <span class="s2">"</span><span class="se">\\</span><span class="s2">'</span><span class="si">{0}</span><span class="se">\\</span><span class="s2">'"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">c</span><span class="p">),</span> <span class="n">columns</span><span class="p">))</span> + <span class="n">column_castings</span> <span class="o">=</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">c</span><span class="p">:</span> <span class="s2">"CAST(</span><span class="si">{0}</span><span class="s2"> AS text) AS </span><span class="si">{0}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">c</span><span class="p">),</span> + <span class="n">columns</span><span class="p">))</span> + + <span class="n">unload_query</span> <span class="o">=</span> <span class="s2">"""</span> +<span class="s2"> UNLOAD ('SELECT </span><span class="si">{0}</span><span class="s2"></span> +<span class="s2"> UNION ALL</span> +<span class="s2"> SELECT </span><span class="si">{1}</span><span class="s2"> FROM </span><span class="si">{2}</span><span class="s2">.</span><span class="si">{3}</span><span class="s2"></span> +<span class="s2"> ORDER BY 1 DESC')</span> +<span class="s2"> TO 's3://</span><span class="si">{4}</span><span class="s2">/</span><span class="si">{5}</span><span class="s2">/</span><span class="si">{3}</span><span class="s2">_'</span> +<span class="s2"> with</span> +<span class="s2"> credentials 'aws_access_key_id=</span><span class="si">{6}</span><span class="s2">;aws_secret_access_key=</span><span class="si">{7}</span><span class="s2">'</span> +<span class="s2"> </span><span class="si">{8}</span><span class="s2">;</span> +<span class="s2"> """</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">column_names</span><span class="p">,</span> <span class="n">column_castings</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">schema</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">table</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">s3_bucket</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">s3_key</span><span class="p">,</span> <span class="n">a_key</span><span class="p">,</span> <span class="n">s_key</span><span class="p">,</span> <span class="n">unload_options</span><span class="p">)</span> + + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Executing UNLOAD command...'</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">hook</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">unload_query</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">autocommit</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"UNLOAD command complete..."</span><span class="p">)</span></div> +</pre></div> + + </div> + <div class="articleComments"> + + </div> + </div> + <footer> + + + <hr/> + + <div role="contentinfo"> + <p> + + </p> + </div> + Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. + +</footer> + + </div> + </div> + + </section> + + </div> + + + + + + <script type="text/javascript"> + var DOCUMENTATION_OPTIONS = { + URL_ROOT:'../../../', + VERSION:'', + COLLAPSE_INDEX:false, + FILE_SUFFIX:'.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt' + }; + </script> + <script type="text/javascript" src="../../../_static/jquery.js"></script> + <script type="text/javascript" src="../../../_static/underscore.js"></script> + <script type="text/javascript" src="../../../_static/doctools.js"></script> + + + + + + <script type="text/javascript" src="../../../_static/js/theme.js"></script> + + + + + <script type="text/javascript"> + jQuery(function () { + SphinxRtdTheme.StickyNav.enable(); + }); + </script> + + +</body> +</html> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-airflow-site/blob/28a3eb60/_modules/airflow/operators/s3_file_transform_operator.html ---------------------------------------------------------------------- diff --git a/_modules/airflow/operators/s3_file_transform_operator.html b/_modules/airflow/operators/s3_file_transform_operator.html new file mode 100644 index 0000000..8db7bc2 --- /dev/null +++ b/_modules/airflow/operators/s3_file_transform_operator.html @@ -0,0 +1,342 @@ + + +<!DOCTYPE html> +<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> +<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> +<head> + <meta charset="utf-8"> + + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + + <title>airflow.operators.s3_file_transform_operator — Airflow Documentation</title> + + + + + + + + + + + + + + + + + + <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" /> + + + + + + <link rel="index" title="Index" + href="../../../genindex.html"/> + <link rel="search" title="Search" href="../../../search.html"/> + <link rel="top" title="Airflow Documentation" href="../../../index.html"/> + <link rel="up" title="Module code" href="../../index.html"/> + + + <script src="../../../_static/js/modernizr.min.js"></script> + +</head> + +<body class="wy-body-for-nav" role="document"> + + + <div class="wy-grid-for-nav"> + + + <nav data-toggle="wy-nav-shift" class="wy-nav-side"> + <div class="wy-side-scroll"> + <div class="wy-side-nav-search"> + + + + <a href="../../../index.html" class="icon icon-home"> Airflow + + + + </a> + + + + + + + +<div role="search"> + <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get"> + <input type="text" name="q" placeholder="Search docs" /> + <input type="hidden" name="check_keywords" value="yes" /> + <input type="hidden" name="area" value="default" /> + </form> +</div> + + + </div> + + <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> + + + + + + + <ul> +<li class="toctree-l1"><a class="reference internal" href="../../../project.html">Project</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../license.html">License</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../start.html">Quick Start</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../installation.html">Installation</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../tutorial.html">Tutorial</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../configuration.html">Configuration</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../ui.html">UI / Screenshots</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../concepts.html">Concepts</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../profiling.html">Data Profiling</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../cli.html">Command Line Interface</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../scheduler.html">Scheduling & Triggers</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../plugins.html">Plugins</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../security.html">Security</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../api.html">Experimental Rest API</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../integration.html">Integration</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../faq.html">FAQ</a></li> +<li class="toctree-l1"><a class="reference internal" href="../../../code.html">API Reference</a></li> +</ul> + + + + </div> + </div> + </nav> + + <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> + + + <nav class="wy-nav-top" role="navigation" aria-label="top navigation"> + + <i data-toggle="wy-nav-top" class="fa fa-bars"></i> + <a href="../../../index.html">Airflow</a> + + </nav> + + + + <div class="wy-nav-content"> + <div class="rst-content"> + + + + + + + + + + + + + + + + +<div role="navigation" aria-label="breadcrumbs navigation"> + + <ul class="wy-breadcrumbs"> + + <li><a href="../../../index.html">Docs</a> »</li> + + <li><a href="../../index.html">Module code</a> »</li> + + <li>airflow.operators.s3_file_transform_operator</li> + + + <li class="wy-breadcrumbs-aside"> + + + + </li> + + </ul> + + + <hr/> +</div> + <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> + <div itemprop="articleBody"> + + <h1>Source code for airflow.operators.s3_file_transform_operator</h1><div class="highlight"><pre> +<span></span><span class="c1"># -*- coding: utf-8 -*-</span> +<span class="c1">#</span> +<span class="c1"># Licensed under the Apache License, Version 2.0 (the "License");</span> +<span class="c1"># you may not use this file except in compliance with the License.</span> +<span class="c1"># You may obtain a copy of the License at</span> +<span class="c1">#</span> +<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span> +<span class="c1">#</span> +<span class="c1"># Unless required by applicable law or agreed to in writing, software</span> +<span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span> +<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> +<span class="c1"># See the License for the specific language governing permissions and</span> +<span class="c1"># limitations under the License.</span> + +<span class="kn">from</span> <span class="nn">tempfile</span> <span class="k">import</span> <span class="n">NamedTemporaryFile</span> +<span class="kn">import</span> <span class="nn">subprocess</span> + +<span class="kn">from</span> <span class="nn">airflow.exceptions</span> <span class="k">import</span> <span class="n">AirflowException</span> +<span class="kn">from</span> <span class="nn">airflow.hooks.S3_hook</span> <span class="k">import</span> <span class="n">S3Hook</span> +<span class="kn">from</span> <span class="nn">airflow.models</span> <span class="k">import</span> <span class="n">BaseOperator</span> +<span class="kn">from</span> <span class="nn">airflow.utils.decorators</span> <span class="k">import</span> <span class="n">apply_defaults</span> + + +<div class="viewcode-block" id="S3FileTransformOperator"><a class="viewcode-back" href="../../../integration.html#airflow.operators.s3_file_transform_operator.S3FileTransformOperator">[docs]</a><span class="k">class</span> <span class="nc">S3FileTransformOperator</span><span class="p">(</span><span class="n">BaseOperator</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> Copies data from a source S3 location to a temporary location on the</span> +<span class="sd"> local filesystem. Runs a transformation on this file as specified by</span> +<span class="sd"> the transformation script and uploads the output to a destination S3</span> +<span class="sd"> location.</span> + +<span class="sd"> The locations of the source and the destination files in the local</span> +<span class="sd"> filesystem is provided as an first and second arguments to the</span> +<span class="sd"> transformation script. The transformation script is expected to read the</span> +<span class="sd"> data from source , transform it and write the output to the local</span> +<span class="sd"> destination file. The operator then takes over control and uploads the</span> +<span class="sd"> local destination file to S3.</span> + +<span class="sd"> :param source_s3_key: The key to be retrieved from S3</span> +<span class="sd"> :type source_s3_key: str</span> +<span class="sd"> :param source_aws_conn_id: source s3 connection</span> +<span class="sd"> :type source_aws_conn_id: str</span> +<span class="sd"> :param dest_s3_key: The key to be written from S3</span> +<span class="sd"> :type dest_s3_key: str</span> +<span class="sd"> :param dest_aws_conn_id: destination s3 connection</span> +<span class="sd"> :type dest_aws_conn_id: str</span> +<span class="sd"> :param replace: Replace dest S3 key if it already exists</span> +<span class="sd"> :type replace: bool</span> +<span class="sd"> :param transform_script: location of the executable transformation script</span> +<span class="sd"> :type transform_script: str</span> +<span class="sd"> """</span> + + <span class="n">template_fields</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'source_s3_key'</span><span class="p">,</span> <span class="s1">'dest_s3_key'</span><span class="p">)</span> + <span class="n">template_ext</span> <span class="o">=</span> <span class="p">()</span> + <span class="n">ui_color</span> <span class="o">=</span> <span class="s1">'#f9c915'</span> + + <span class="nd">@apply_defaults</span> + <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span> + <span class="bp">self</span><span class="p">,</span> + <span class="n">source_s3_key</span><span class="p">,</span> + <span class="n">dest_s3_key</span><span class="p">,</span> + <span class="n">transform_script</span><span class="p">,</span> + <span class="n">source_aws_conn_id</span><span class="o">=</span><span class="s1">'aws_default'</span><span class="p">,</span> + <span class="n">dest_aws_conn_id</span><span class="o">=</span><span class="s1">'aws_default'</span><span class="p">,</span> + <span class="n">replace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> + <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> + <span class="nb">super</span><span class="p">(</span><span class="n">S3FileTransformOperator</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">source_s3_key</span> <span class="o">=</span> <span class="n">source_s3_key</span> + <span class="bp">self</span><span class="o">.</span><span class="n">source_aws_conn_id</span> <span class="o">=</span> <span class="n">source_aws_conn_id</span> + <span class="bp">self</span><span class="o">.</span><span class="n">dest_s3_key</span> <span class="o">=</span> <span class="n">dest_s3_key</span> + <span class="bp">self</span><span class="o">.</span><span class="n">dest_aws_conn_id</span> <span class="o">=</span> <span class="n">dest_aws_conn_id</span> + <span class="bp">self</span><span class="o">.</span><span class="n">replace</span> <span class="o">=</span> <span class="n">replace</span> + <span class="bp">self</span><span class="o">.</span><span class="n">transform_script</span> <span class="o">=</span> <span class="n">transform_script</span> + + <span class="k">def</span> <span class="nf">execute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">context</span><span class="p">):</span> + <span class="n">source_s3</span> <span class="o">=</span> <span class="n">S3Hook</span><span class="p">(</span><span class="n">aws_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">source_aws_conn_id</span><span class="p">)</span> + <span class="n">dest_s3</span> <span class="o">=</span> <span class="n">S3Hook</span><span class="p">(</span><span class="n">aws_conn_id</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dest_aws_conn_id</span><span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Downloading source S3 file </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">source_s3_key</span><span class="p">)</span> + <span class="k">if</span> <span class="ow">not</span> <span class="n">source_s3</span><span class="o">.</span><span class="n">check_for_key</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">source_s3_key</span><span class="p">):</span> + <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"The source key </span><span class="si">{0}</span><span class="s2"> does not exist"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">source_s3_key</span><span class="p">))</span> + <span class="n">source_s3_key_object</span> <span class="o">=</span> <span class="n">source_s3</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">source_s3_key</span><span class="p">)</span> + <span class="k">with</span> <span class="n">NamedTemporaryFile</span><span class="p">(</span><span class="s2">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">f_source</span><span class="p">,</span> <span class="n">NamedTemporaryFile</span><span class="p">(</span><span class="s2">"w"</span><span class="p">)</span> <span class="k">as</span> <span class="n">f_dest</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span> + <span class="s2">"Dumping S3 file </span><span class="si">%s</span><span class="s2"> contents to local file </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> + <span class="bp">self</span><span class="o">.</span><span class="n">source_s3_key</span><span class="p">,</span> <span class="n">f_source</span><span class="o">.</span><span class="n">name</span> + <span class="p">)</span> + <span class="n">source_s3_key_object</span><span class="o">.</span><span class="n">get_contents_to_file</span><span class="p">(</span><span class="n">f_source</span><span class="p">)</span> + <span class="n">f_source</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> + <span class="n">source_s3</span><span class="o">.</span><span class="n">connection</span><span class="o">.</span><span class="n">close</span><span class="p">()</span> + <span class="n">transform_script_process</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">Popen</span><span class="p">(</span> + <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">transform_script</span><span class="p">,</span> <span class="n">f_source</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">f_dest</span><span class="o">.</span><span class="n">name</span><span class="p">],</span> + <span class="n">stdout</span><span class="o">=</span><span class="n">subprocess</span><span class="o">.</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">subprocess</span><span class="o">.</span><span class="n">PIPE</span><span class="p">)</span> + <span class="p">(</span><span class="n">transform_script_stdoutdata</span><span class="p">,</span> <span class="n">transform_script_stderrdata</span><span class="p">)</span> <span class="o">=</span> <span class="n">transform_script_process</span><span class="o">.</span><span class="n">communicate</span><span class="p">()</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Transform script stdout </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">transform_script_stdoutdata</span><span class="p">)</span> + <span class="k">if</span> <span class="n">transform_script_process</span><span class="o">.</span><span class="n">returncode</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> + <span class="k">raise</span> <span class="n">AirflowException</span><span class="p">(</span><span class="s2">"Transform script failed </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">transform_script_stderrdata</span><span class="p">)</span> + <span class="k">else</span><span class="p">:</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span> + <span class="s2">"Transform script successful. Output temporarily located at </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> + <span class="n">f_dest</span><span class="o">.</span><span class="n">name</span> + <span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Uploading transformed file to S3"</span><span class="p">)</span> + <span class="n">f_dest</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> + <span class="n">dest_s3</span><span class="o">.</span><span class="n">load_file</span><span class="p">(</span> + <span class="n">filename</span><span class="o">=</span><span class="n">f_dest</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> + <span class="n">key</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dest_s3_key</span><span class="p">,</span> + <span class="n">replace</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">replace</span> + <span class="p">)</span> + <span class="bp">self</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Upload successful"</span><span class="p">)</span> + <span class="n">dest_s3</span><span class="o">.</span><span class="n">connection</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></div> +</pre></div> + + </div> + <div class="articleComments"> + + </div> + </div> + <footer> + + + <hr/> + + <div role="contentinfo"> + <p> + + </p> + </div> + Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. + +</footer> + + </div> + </div> + + </section> + + </div> + + + + + + <script type="text/javascript"> + var DOCUMENTATION_OPTIONS = { + URL_ROOT:'../../../', + VERSION:'', + COLLAPSE_INDEX:false, + FILE_SUFFIX:'.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt' + }; + </script> + <script type="text/javascript" src="../../../_static/jquery.js"></script> + <script type="text/javascript" src="../../../_static/underscore.js"></script> + <script type="text/javascript" src="../../../_static/doctools.js"></script> + + + + + + <script type="text/javascript" src="../../../_static/js/theme.js"></script> + + + + + <script type="text/javascript"> + jQuery(function () { + SphinxRtdTheme.StickyNav.enable(); + }); + </script> + + +</body> +</html> \ No newline at end of file
