[
https://issues.apache.org/jira/browse/AIRFLOW-3264?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16666764#comment-16666764
]
ASF GitHub Bot commented on AIRFLOW-3264:
-----------------------------------------
jghoman closed pull request #4109: [AIRFLOW-3264] URL decoding when parsing URI
for connection
URL: https://github.com/apache/incubator-airflow/pull/4109
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/airflow/hooks/base_hook.py b/airflow/hooks/base_hook.py
index 103fa6260b..ef44f6469d 100644
--- a/airflow/hooks/base_hook.py
+++ b/airflow/hooks/base_hook.py
@@ -80,7 +80,7 @@ def get_connection(cls, conn_id):
conn = random.choice(cls.get_connections(conn_id))
if conn.host:
log = LoggingMixin().log
- log.info("Using connection to: %s", conn.host)
+ log.info("Using connection to: %s", conn.debug_info())
return conn
@classmethod
diff --git a/airflow/models.py b/airflow/models.py
index 3594ca204a..82b87c6d53 100755
--- a/airflow/models.py
+++ b/airflow/models.py
@@ -22,6 +22,7 @@
from __future__ import print_function
from __future__ import unicode_literals
+
from future.standard_library import install_aliases
from builtins import str, object, bytes, ImportError as BuiltinImportError
@@ -58,7 +59,8 @@
import uuid
from datetime import datetime
-from urllib.parse import urlparse, quote, parse_qsl
+from urllib.parse import urlparse, quote, parse_qsl, unquote
+
from sqlalchemy import (
Boolean, Column, DateTime, Float, ForeignKey, ForeignKeyConstraint, Index,
Integer, LargeBinary, PickleType, String, Text, UniqueConstraint,
@@ -685,16 +687,17 @@ def __init__(
def parse_from_uri(self, uri):
temp_uri = urlparse(uri)
hostname = temp_uri.hostname or ''
- if '%2f' in hostname:
- hostname = hostname.replace('%2f', '/').replace('%2F', '/')
conn_type = temp_uri.scheme
if conn_type == 'postgresql':
conn_type = 'postgres'
self.conn_type = conn_type
- self.host = hostname
- self.schema = temp_uri.path[1:]
- self.login = temp_uri.username
- self.password = temp_uri.password
+ self.host = unquote(hostname) if hostname else hostname
+ quoted_schema = temp_uri.path[1:]
+ self.schema = unquote(quoted_schema) if quoted_schema else
quoted_schema
+ self.login = unquote(temp_uri.username) \
+ if temp_uri.username else temp_uri.username
+ self.password = unquote(temp_uri.password) \
+ if temp_uri.password else temp_uri.password
self.port = temp_uri.port
if temp_uri.query:
self.extra = json.dumps(dict(parse_qsl(temp_uri.query)))
@@ -808,6 +811,17 @@ def get_hook(self):
def __repr__(self):
return self.conn_id
+ def debug_info(self):
+ return ("id: {}. Host: {}, Port: {}, Schema: {}, "
+ "Login: {}, Password: {}, extra: {}".
+ format(self.conn_id,
+ self.host,
+ self.port,
+ self.schema,
+ self.login,
+ "XXXXXXXX" if self.password else None,
+ self.extra_dejson))
+
@property
def extra_dejson(self):
"""Returns the extra property by deserializing json."""
diff --git a/tests/models.py b/tests/models.py
index 5d0243dee0..d4cb738386 100644
--- a/tests/models.py
+++ b/tests/models.py
@@ -2824,6 +2824,59 @@ def test_connection_from_uri_with_extras(self):
self.assertDictEqual(connection.extra_dejson, {'extra1': 'a value',
'extra2': '/path/'})
+ def test_connection_from_uri_with_colon_in_hostname(self):
+ uri = 'scheme://user:password@host%2flocation%3ax%3ay:1234/schema?' \
+ 'extra1=a%20value&extra2=%2fpath%2f'
+ connection = Connection(uri=uri)
+ self.assertEqual(connection.conn_type, 'scheme')
+ self.assertEqual(connection.host, 'host/location:x:y')
+ self.assertEqual(connection.schema, 'schema')
+ self.assertEqual(connection.login, 'user')
+ self.assertEqual(connection.password, 'password')
+ self.assertEqual(connection.port, 1234)
+ self.assertDictEqual(connection.extra_dejson, {'extra1': 'a value',
+ 'extra2': '/path/'})
+
+ def test_connection_from_uri_with_encoded_password(self):
+ uri =
'scheme://user:password%20with%20space@host%2flocation%3ax%3ay:1234/schema'
+ connection = Connection(uri=uri)
+ self.assertEqual(connection.conn_type, 'scheme')
+ self.assertEqual(connection.host, 'host/location:x:y')
+ self.assertEqual(connection.schema, 'schema')
+ self.assertEqual(connection.login, 'user')
+ self.assertEqual(connection.password, 'password with space')
+ self.assertEqual(connection.port, 1234)
+
+ def test_connection_from_uri_with_encoded_user(self):
+ uri =
'scheme://domain%2fuser:password@host%2flocation%3ax%3ay:1234/schema'
+ connection = Connection(uri=uri)
+ self.assertEqual(connection.conn_type, 'scheme')
+ self.assertEqual(connection.host, 'host/location:x:y')
+ self.assertEqual(connection.schema, 'schema')
+ self.assertEqual(connection.login, 'domain/user')
+ self.assertEqual(connection.password, 'password')
+ self.assertEqual(connection.port, 1234)
+
+ def test_connection_from_uri_with_encoded_schema(self):
+ uri = 'scheme://user:password%20with%20space@host:1234/schema%2ftest'
+ connection = Connection(uri=uri)
+ self.assertEqual(connection.conn_type, 'scheme')
+ self.assertEqual(connection.host, 'host')
+ self.assertEqual(connection.schema, 'schema/test')
+ self.assertEqual(connection.login, 'user')
+ self.assertEqual(connection.password, 'password with space')
+ self.assertEqual(connection.port, 1234)
+
+ def test_connection_from_uri_no_schema(self):
+ uri = 'scheme://user:password%20with%20space@host:1234'
+ connection = Connection(uri=uri)
+ self.assertEqual(connection.conn_type, 'scheme')
+ self.assertEqual(connection.host, 'host')
+ self.assertEqual(connection.schema, '')
+ self.assertEqual(connection.login, 'user')
+ self.assertEqual(connection.password, 'password with space')
+ self.assertEqual(connection.port, 1234)
+
class TestSkipMixin(unittest.TestCase):
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Only '/' is URL-decoded when parsing hostname in the Connection
> ---------------------------------------------------------------
>
> Key: AIRFLOW-3264
> URL: https://issues.apache.org/jira/browse/AIRFLOW-3264
> Project: Apache Airflow
> Issue Type: Bug
> Components: database
> Reporter: Jarek Potiuk
> Assignee: Jarek Potiuk
> Priority: Major
>
> Parsing of the URL for connection(using AIRFLOW_CONN_ environment variables)
> does not perform full URL-decode on the URL. It only handles hard-coded %2f
> encode to support "/" in hostname. However there are valid cases where the
> hostname, login, password, and query parameters can contain url-encoded
> values. For example in cloud-sql-proxy, generated socket path contains ":"
> (for example {{/cloudsql/myProject:us-central1:myInstance)}}
> We need to URL-encode ":" because otherwise urlparse will treat those ":" as
> separator for port number. Similarly user/password can contain url-encoded
> characters.
> I think we should fully URL-decode all relevant URL fields (including query
> parameters, user, password, hostname, path). However it is potentially
> breaking change (if someone has a user/password/hostname with % ) so maybe we
> should do some compromises around that (for example not decode the password -
> which are likely to contain '%' characters) although that would violate URL
> encoding/decoding specification.
> I will provide proposed fix shortly
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)