Repository: incubator-impala
Updated Branches:
  refs/heads/master 3403037b9 -> 36b524f68


Add Kudu test helpers

Changes:

1) Add the python Kudu module to the virtualenv. Building the virtualenv
is much slower now because Cython and numpy are required. To help with
the rebuild time --no-cache was removed. That option was added to help
when using the dev version of impyla, the version number would be the
same but the module contents were different and the cache used the old
module contents.

2) Add some py.test fixtures to help create Kudu and Impala connections.

Change-Id: I8e5e22b38d5bd09a36238e66a69aa42d1a941de7
Reviewed-on: http://gerrit.cloudera.org:8080/2855
Reviewed-by: Casey Ching <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/36b524f6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/36b524f6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/36b524f6

Branch: refs/heads/master
Commit: 36b524f68c925c89389aca0ba242391935614170
Parents: 3403037
Author: casey <[email protected]>
Authored: Fri Apr 22 17:48:47 2016 -0700
Committer: Tim Armstrong <[email protected]>
Committed: Thu May 19 19:45:48 2016 -0700

----------------------------------------------------------------------
 bin/impala-ipython                   |   9 +-
 bin/impala-py.test                   |  17 +--
 bin/impala-python                    |   4 +-
 bin/impala-python-common.sh          |  25 +++++
 infra/python/bootstrap_virtualenv.py | 152 ++++++++++++++++++++++++---
 infra/python/deps/requirements.txt   |  15 +++
 tests/common/__init__.py             |   3 +-
 tests/conftest.py                    | 167 +++++++++++++++++++++++++++++-
 8 files changed, 348 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/36b524f6/bin/impala-ipython
----------------------------------------------------------------------
diff --git a/bin/impala-ipython b/bin/impala-ipython
index 8833384..e7f9806 100755
--- a/bin/impala-ipython
+++ b/bin/impala-ipython
@@ -1,10 +1,3 @@
 #!/bin/bash
-
-set -eu -o pipefail
-
-PY_DIR=$(dirname "$0")/../infra/python
-
-# impala-python will build or upgrade the python virtualenv automatically.
-impala-python -c ""
-
+source $(dirname "$0")/impala-python-common.sh
 exec "$PY_DIR/env/bin/ipython" "$@"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/36b524f6/bin/impala-py.test
----------------------------------------------------------------------
diff --git a/bin/impala-py.test b/bin/impala-py.test
index 55756a0..48d01e9 100755
--- a/bin/impala-py.test
+++ b/bin/impala-py.test
@@ -1,14 +1,3 @@
-#!/usr/bin/env impala-python
-# EASY-INSTALL-ENTRY-SCRIPT: 'pytest==2.7.2','console_scripts','py.test'
-
-# This was copied straight from py.test, the only modification is to use 
impala-python
-# instead of python above
-
-__requires__ = 'pytest==2.7.2'
-import sys
-from pkg_resources import load_entry_point
-
-if __name__ == '__main__':
-    sys.exit(
-        load_entry_point('pytest==2.7.2', 'console_scripts', 'py.test')()
-    )
+#!/bin/bash
+source $(dirname "$0")/impala-python-common.sh
+exec "$PY_DIR/env/bin/py.test" "$@"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/36b524f6/bin/impala-python
----------------------------------------------------------------------
diff --git a/bin/impala-python b/bin/impala-python
index 67c7be3..5ff4190 100755
--- a/bin/impala-python
+++ b/bin/impala-python
@@ -1,5 +1,3 @@
 #!/bin/bash
-set -eu -o pipefail
-PY_DIR=$(dirname "$0")/../infra/python
-python "$PY_DIR/bootstrap_virtualenv.py"
+source $(dirname "$0")/impala-python-common.sh
 exec "$PY_DIR/env/bin/python" "$@"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/36b524f6/bin/impala-python-common.sh
----------------------------------------------------------------------
diff --git a/bin/impala-python-common.sh b/bin/impala-python-common.sh
new file mode 100644
index 0000000..298a0ca
--- /dev/null
+++ b/bin/impala-python-common.sh
@@ -0,0 +1,25 @@
+# Copyright 2016 Cloudera Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file is intended to be sourced to perform common setup for
+# $IMPALA_HOME/bin/impala-py* executables.
+
+set -euo pipefail
+trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" 
$0)' ERR
+
+LD_LIBRARY_PATH+=":$(python $IMPALA_HOME/infra/python/bootstrap_virtualenv.py \
+  --print-ld-library-path)"
+
+PY_DIR=$(dirname "$0")/../infra/python
+python "$PY_DIR/bootstrap_virtualenv.py"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/36b524f6/infra/python/bootstrap_virtualenv.py
----------------------------------------------------------------------
diff --git a/infra/python/bootstrap_virtualenv.py 
b/infra/python/bootstrap_virtualenv.py
index 11065db..6f20064 100644
--- a/infra/python/bootstrap_virtualenv.py
+++ b/infra/python/bootstrap_virtualenv.py
@@ -62,17 +62,28 @@ def create_virtualenv():
   shutil.rmtree(build_dir)
 
 
-def exec_cmd(args):
+def exec_cmd(args, **kwargs):
   '''Executes a command and waits for it to finish, raises an exception if the 
return
-     status is not zero.
+     status is not zero. The command output is returned.
 
-     'args' uses the same format as subprocess.Popen().
+     'args' and 'kwargs' use the same format as subprocess.Popen().
   '''
-  process = subprocess.Popen(args, stdout=subprocess.PIPE, 
stderr=subprocess.STDOUT)
+  process = subprocess.Popen(args, stdout=subprocess.PIPE, 
stderr=subprocess.STDOUT,
+      **kwargs)
   output = process.communicate()[0]
   if process.returncode != 0:
     raise Exception("Command returned non-zero status\nCommand: %s\nOutput: %s"
         % (args, output))
+  return output
+
+
+def exec_pip_install(args, **popen_kwargs):
+  # Don't call the virtualenv pip directly, it uses a hashbang to to call the 
python
+  # virtualenv using an absolute path. If the path to the virtualenv is very 
long, the
+  # hashbang won't work.
+  exec_cmd([os.path.join(ENV_DIR, "bin", "python"), os.path.join(ENV_DIR, 
"bin", "pip"),
+    "install", "--no-index", "--find-links",
+    "file://%s" % urllib.pathname2url(os.path.abspath(DEPS_DIR))] + args, 
**popen_kwargs)
 
 
 def find_file(*paths):
@@ -108,19 +119,118 @@ def detect_python_cmd():
 
 
 def install_deps():
-  LOG.info("Installing packages into virtualenv")
-  # Don't call the virtualenv pip directly, it uses a hashbang to to call the 
python
-  # virtualenv using an absolute path. If the path to the virtualenv is very 
long, the
-  # hashbang won't work.
-  # --no-cache-dir is used because the dev version of Impyla may be the same 
even though
-  # the contents are different. Since the version doesn't change, pip may use 
its cached
-  # build.
-  exec_cmd([os.path.join(ENV_DIR, "bin", "python"), os.path.join(ENV_DIR, 
"bin", "pip"),
-    "install", "--no-cache-dir", "--no-index", "--find-links",
-    "file://%s" % urllib.pathname2url(os.path.abspath(DEPS_DIR)), "-r", 
REQS_PATH])
+  LOG.info("Installing packages into the virtualenv")
+  exec_pip_install(["-r", REQS_PATH])
   shutil.copyfile(REQS_PATH, INSTALLED_REQS_PATH)
 
 
+def install_kudu_client_if_possible():
+  """Installs the Kudu python module if possible. The Kudu module is the only 
one that
+     requires the toolchain. If the toolchain isn't in use or hasn't been 
populated
+     yet, nothing will be done. Also nothing will be done if the Kudu client 
lib required
+     by the module isn't available (as determined by KUDU_IS_SUPPORTED).
+  """
+  if os.environ["KUDU_IS_SUPPORTED"] != "true":
+    LOG.debug("Skipping Kudu: Kudu is not supported")
+    return
+  impala_toolchain_dir = os.environ.get("IMPALA_TOOLCHAIN")
+  if not impala_toolchain_dir:
+    LOG.debug("Skipping Kudu: IMPALA_TOOLCHAIN not set")
+    return
+  toolchain_kudu_dir = os.path.join(
+      impala_toolchain_dir, "kudu-" + os.environ["IMPALA_KUDU_VERSION"])
+  if not os.path.exists(toolchain_kudu_dir):
+    LOG.debug("Skipping Kudu: %s doesn't exist" % toolchain_kudu_dir)
+    return
+
+  # The "pip" command could be used to provide the version of Kudu installed 
(if any)
+  # but it's a little too slow. Running the virtualenv python to detect the 
installed
+  # version is faster.
+  actual_version_string = exec_cmd([os.path.join(ENV_DIR, "bin", "python"), 
"-c",
+      textwrap.dedent("""
+      try:
+        import kudu
+        print kudu.__version__
+      except ImportError:
+        pass""")]).strip()
+  actual_version = [int(v) for v in actual_version_string.split(".") if v]
+
+  reqs_file = open(REQS_PATH)
+  try:
+    for line in reqs_file:
+      if not line.startswith("# kudu-python=="):
+        continue
+      expected_version_string = line.split()[1].split("==")[1]
+      break
+    else:
+      raise Exception("Unable to find kudu-python version in requirements 
file")
+  finally:
+    reqs_file.close()
+  expected_version = [int(v) for v in expected_version_string.split(".")]
+
+  if actual_version and actual_version == expected_version:
+    LOG.debug("Skipping Kudu: Installed %s == required %s"
+        % (actual_version_string, expected_version_string))
+    return
+  LOG.debug("Kudu installation required. Actual version %s. Required version 
%s.",
+      actual_version, expected_version)
+
+  LOG.info("Installing Kudu into the virtualenv")
+  # The installation requires that KUDU_HOME/build/latest exists. An empty 
directory
+  # structure will be made to satisfy that. The Kudu client headers and lib 
will be made
+  # available through GCC environment variables.
+  fake_kudu_build_dir = os.path.join(tempfile.gettempdir(), "virtualenv-kudu")
+  try:
+    artifact_dir = os.path.join(fake_kudu_build_dir, "build", "latest")
+    if not os.path.exists(artifact_dir):
+      os.makedirs(artifact_dir)
+    env = dict(os.environ)
+    env["KUDU_HOME"] = fake_kudu_build_dir
+    kudu_client_dir = find_kudu_client_install_dir()
+    env["CPLUS_INCLUDE_PATH"] = os.path.join(kudu_client_dir, "include")
+    env["LIBRARY_PATH"] = "{0}{1}lib{2}{0}{1}lib64".format(
+        kudu_client_dir, os.path.sep, os.path.pathsep)
+    exec_pip_install(["kudu-python==" + expected_version_string], env=env)
+  finally:
+    try:
+      shutil.rmtree(fake_kudu_build_dir)
+    except Exception:
+      LOG.debug("Error removing temp Kudu build dir", exc_info=True)
+
+
+def find_kudu_client_install_dir():
+  custom_client_dir = os.environ["KUDU_CLIENT_DIR"]
+  if custom_client_dir:
+    install_dir = os.path.join(custom_client_dir, "usr", "local")
+    error_if_kudu_client_not_found(install_dir)
+  else:
+    # If the toolchain appears to have been setup already, then the Kudu 
client is
+    # required to exist. It's possible that the toolchain won't be setup yet 
though
+    # since the toolchain bootstrap script depends on the virtualenv.
+    kudu_base_dir = os.path.join(os.environ["IMPALA_TOOLCHAIN"],
+        "kudu-%s" % os.environ["IMPALA_KUDU_VERSION"])
+    install_dir = os.path.join(kudu_base_dir, "debug")
+    if os.path.exists(kudu_base_dir):
+      error_if_kudu_client_not_found(install_dir)
+  return install_dir
+
+
+def error_if_kudu_client_not_found(install_dir):
+  header_path = os.path.join(install_dir, "include", "kudu", "client", 
"client.h")
+  if not os.path.exists(header_path):
+    raise Exception("Kudu client header not found at %s" % header_path)
+
+  kudu_client_lib = "libkudu_client.so"
+  lib_dir = os.path.join(install_dir, "lib64")
+  if not os.path.exists(lib_dir):
+    lib_dir = os.path.join(install_dir, "lib")
+  for _, _, files in os.walk(lib_dir):
+    for file in files:
+      if file == kudu_client_lib:
+        return
+  raise Exception("%s not found at %s" % (kudu_client_lib, lib_dir))
+
+
 def deps_are_installed():
   if not os.path.exists(INSTALLED_REQS_PATH):
     return False
@@ -148,11 +258,23 @@ def setup_virtualenv_if_not_exists():
 
 
 if __name__ == "__main__":
-  logging.basicConfig(level=logging.INFO)
   parser = optparse.OptionParser()
+  parser.add_option("-l", "--log-level", default="INFO",
+      choices=("DEBUG", "INFO", "WARN", "ERROR"))
   parser.add_option("-r", "--rebuild", action="store_true", help="Force a 
rebuild of"
       " the virtualenv even if it exists and appears to be completely 
up-to-date.")
+  parser.add_option("--print-ld-library-path", action="store_true", 
help="Print the"
+      " LD_LIBRARY_PATH that should be used when running python from the 
virtualenv.")
   options, args = parser.parse_args()
+
+  if options.print_ld_library_path:
+    kudu_client_dir = find_kudu_client_install_dir()
+    print("{0}{1}lib{2}{0}{1}lib64".format(
+        kudu_client_dir, os.path.sep, os.path.pathsep))
+    exit()
+
+  logging.basicConfig(level=getattr(logging, options.log_level))
   if options.rebuild:
     delete_virtualenv_if_exist()
   setup_virtualenv_if_not_exists()
+  install_kudu_client_if_possible()

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/36b524f6/infra/python/deps/requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/requirements.txt 
b/infra/python/deps/requirements.txt
index 2725344..5409027 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -53,5 +53,20 @@ requests == 2.7.0
 sh == 1.11
 sqlparse == 0.1.15
 texttable == 0.8.3
+
+# kudu-python is needed but cannot be listed as usual. The Kudu client lib 
(.so file)
+# is needed for compilation/installation but the client lib is provided by the 
toolchain.
+# The virtualenv may need to be functional even if the toolchain isn't 
present. The
+# bootstap_virtualenv.py script special-cases kudu-python, the line below is 
actually
+# functional and determines the expected kudu-python version. The version must 
be listed
+# in the format below including # and spacing. Keep this formatting!
+# kudu-python==0.1.1
+  Cython == 0.23.4
+  numpy == 1.10.4
+  # These should eventually be removed  
https://issues.apache.org/jira/browse/KUDU-1456
+  unittest2 == 1.1.0
+    linecache2 == 1.0.0
+    traceback2 == 1.4.0
+
 # For dev purposes, not used in scripting. Version 1.2.1 is the latest that 
supports 2.6.
 ipython == 1.2.1

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/36b524f6/tests/common/__init__.py
----------------------------------------------------------------------
diff --git a/tests/common/__init__.py b/tests/common/__init__.py
index 946a474..010f973 100644
--- a/tests/common/__init__.py
+++ b/tests/common/__init__.py
@@ -1 +1,2 @@
-# This file is needed to make the files in this directory a python module
+KUDU_MASTER_HOST = "127.0.0.1"
+KUDU_MASTER_PORT = 7051

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/36b524f6/tests/conftest.py
----------------------------------------------------------------------
diff --git a/tests/conftest.py b/tests/conftest.py
index c6a22c0..3fb7f04 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,17 +1,21 @@
 # Copyright (c) 2012 Cloudera, Inc. All rights reserved.
 # py.test configuration module
 #
+from impala.dbapi import connect as impala_connect
+from kudu import connect as kudu_connect
+from random import choice, sample
+from string import ascii_lowercase, digits
+from zlib import crc32
+import contextlib
 import logging
 import os
 import pytest
 
-from zlib import crc32
-
+from common import KUDU_MASTER_HOST, KUDU_MASTER_PORT
 from common.test_result_verifier import QueryTestResult
 from tests.common.patterns import is_valid_impala_identifier
 from tests.util.filesystem_utils import FILESYSTEM, ISILON_WEBHDFS_PORT
 
-
 logging.basicConfig(level=logging.INFO, format='%(threadName)s: %(message)s')
 LOG = logging.getLogger('test_configuration')
 
@@ -203,6 +207,9 @@ def unique_database(request, testid_checksum):
 
     name_prefix: string (defaults to test function __name__) - prefix to be 
used for the
     database name
+
+  For a similar DB-API 2 compliant connection/cursor that use HS2 see the 
'conn' and
+  'unique_cursor' fixtures below.
   """
 
   # Test cases are at the function level, so no one should "accidentally" 
re-scope this.
@@ -235,3 +242,157 @@ def unique_database(request, testid_checksum):
   LOG.info('Created database "{0}" for test ID "{1}"'.format(db_name,
                                                              
str(request.node.nodeid)))
   return db_name
+
+
[email protected]_fixture
+def kudu_client():
+  """Provides a new Kudu client as a pytest fixture. The client only exists 
for the
+     duration of the method it is used in.
+  """
+  kudu_client = kudu_connect(KUDU_MASTER_HOST, KUDU_MASTER_PORT)
+  try:
+    yield kudu_client
+  finally:
+    try:
+      kudu_client.close()
+    except Exception as e:
+      LOG.warn("Error closing Kudu client: %s", e)
+
+
[email protected]_fixture(scope="class")
+def conn(request):
+  """Provides a new DB-API compliant connection to Impala as a pytest fixture. 
The
+     same connection is used for all test methods in a class. The class may 
provide the
+     following customizations:
+       - get_db_name(): The name of the database to connect to.
+       - auto_create_db(): If declared and the method returns True, the 
database will
+         be created before tests run and dropped afterwards. If a database 
name is
+         provided by get_db_name(), it must not exist. Classes that use both
+         auto_create_db() and get_db_name() should generate a random name in
+         get_db_name() and cache it.
+     The returned connection will have a 'db_name' property.
+
+     See the 'unique_database' fixture above if you want to use Impala's 
custom python
+     API instead of DB-API.
+  """
+  db_name = __call_cls_method_if_exists(request.cls, "get_db_name")
+  use_unique_conn = __call_cls_method_if_exists(request.cls, "auto_create_db")
+  if use_unique_conn:
+    with __unique_conn(db_name=db_name) as conn:
+      yield conn
+  else:
+    with __auto_closed_conn(db_name=db_name) as conn:
+      yield conn
+
+
+def __call_cls_method_if_exists(cls, method_name):
+  """Returns the result of calling the method 'method_name' on class 'class' 
if the class
+     defined such a method, otherwise returns None.
+  """
+  method = getattr(cls, method_name, None)
+  if method:
+    return method()
+
+
[email protected]
+def __unique_conn(db_name=None):
+  """Connects to Impala and creates a new database, then returns a connection 
to it.
+     This is intended to be used in a "with" block. Upon exit, the database 
will be
+     dropped. A database name can be provided by 'db_name', a database by that 
name
+     must not exist prior to calling this method.
+
+     with __unique_conn() as conn:
+       # Use conn
+     # The database no longer exists and the conn is closed.
+
+     The returned connection will have a 'db_name' property.
+  """
+  if not db_name:
+    db_name = choice(ascii_lowercase) + "".join(sample(ascii_lowercase + 
digits, 5))
+  with __auto_closed_conn() as conn:
+    with __auto_closed_cursor(conn) as cur:
+      cur.execute("CREATE DATABASE %s" % db_name)
+  with __auto_closed_conn(db_name=db_name) as conn:
+    try:
+      yield conn
+    finally:
+      try:
+        with __auto_closed_cursor(conn) as cur:
+          try:
+            cur.execute("USE DEFAULT")
+            cur.execute("DROP DATABASE IF EXISTS %s CASCADE" % db_name)
+          except Exception as e:
+            LOG.warn("Error dropping database: %s", e)
+      except Exception as e:
+        LOG.warn("Error creating a cursor: %s", e)
+
+
[email protected]
+def __auto_closed_conn(db_name=None):
+  """Returns a connection to Impala. This is intended to be used in a "with" 
block. The
+     connection will be closed upon exiting the block.
+
+     The returned connection will have a 'db_name' property.
+  """
+  conn = impala_connect(database=db_name)
+  try:
+    conn.db_name = db_name
+    yield conn
+  finally:
+    try:
+      conn.close()
+    except Exception as e:
+      LOG.warn("Error closing Impala connection: %s", e)
+
+
[email protected]_fixture
+def cursor(conn):
+  """Provides a new DB-API compliant cursor from a connection provided by the 
conn()
+     fixture. The cursor only exists for the duration of the method it is used 
in.
+
+     The returned cursor will have a 'conn' property. The 'conn' will have a 
'db_name'
+     property.
+  """
+  with __auto_closed_cursor(conn) as cur:
+    yield cur
+
[email protected]_fixture(scope="class")
+def cls_cursor(conn):
+  """Provides a new DB-API compliant cursor from a connection provided by the 
conn()
+     fixture. The cursor exists for the duration of the class it is used in.
+
+     The returned cursor will have a 'conn' property. The 'conn' will have a 
'db_name'
+     property.
+  """
+  with __auto_closed_cursor(conn) as cur:
+    yield cur
+
+
[email protected]_fixture
+def unique_cursor():
+  """Provides a new DB-API compliant cursor to a newly created Impala 
database. The
+     cursor only exists for the duration of the method it is used in. The 
database will
+     be dropped after the test executes.
+
+     The returned cursor will have a 'conn' property. The 'conn' will have a 
'db_name'
+     property.
+  """
+  with __unique_conn() as conn:
+    with __auto_closed_cursor(conn) as cur:
+      yield cur
+
+
[email protected]
+def __auto_closed_cursor(conn):
+  """Returns a cursor created from conn. This is intended to be used in a 
"with" block.
+     The cursor will be closed upon exiting the block.
+  """
+  cursor = conn.cursor()
+  cursor.conn = conn
+  try:
+    yield cursor
+  finally:
+    try:
+      cursor.close()
+    except Exception as e:
+      LOG.warn("Error closing Impala cursor: %s", e)

Reply via email to