HADOOP-13583. Incorporate checkcompatibility script which runs Java API 
Compliance Checker.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0cd22d66
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0cd22d66
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0cd22d66

Branch: refs/heads/HADOOP-13345
Commit: 0cd22d66691fdbc3fb3e0a35ad1625236c3ae3f7
Parents: aacf214
Author: Andrew Wang <w...@apache.org>
Authored: Tue Nov 1 16:36:10 2016 -0700
Committer: Andrew Wang <w...@apache.org>
Committed: Tue Nov 1 16:36:10 2016 -0700

----------------------------------------------------------------------
 BUILDING.txt                          |  11 +
 dev-support/bin/checkcompatibility.py | 345 +++++++++++++++++++++++++++++
 2 files changed, 356 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/0cd22d66/BUILDING.txt
----------------------------------------------------------------------
diff --git a/BUILDING.txt b/BUILDING.txt
index 1fbf8bb..d6f6db7 100644
--- a/BUILDING.txt
+++ b/BUILDING.txt
@@ -389,3 +389,14 @@ http://www.zlib.net/
 Building distributions:
 
  * Build distribution with native code    : mvn package 
[-Pdist][-Pdocs][-Psrc][-Dtar]
+
+----------------------------------------------------------------------------------
+Running compatibility checks with checkcompatibility.py
+
+Invoke `./dev-support/bin/checkcompatibility.py` to run Java API Compliance 
Checker
+to compare the public Java APIs of two git objects. This can be used by release
+managers to compare the compatibility of a previous and current release.
+
+As an example, this invocation will check the compatibility of interfaces 
annotated as Public or LimitedPrivate:
+
+./dev-support/bin/checkcompatibility.py --annotation 
org.apache.hadoop.classification.InterfaceAudience.Public --annotation 
org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate --include 
"hadoop.*" branch-2.7.2 trunk

http://git-wip-us.apache.org/repos/asf/hadoop/blob/0cd22d66/dev-support/bin/checkcompatibility.py
----------------------------------------------------------------------
diff --git a/dev-support/bin/checkcompatibility.py 
b/dev-support/bin/checkcompatibility.py
new file mode 100755
index 0000000..ad1e9cb
--- /dev/null
+++ b/dev-support/bin/checkcompatibility.py
@@ -0,0 +1,345 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Script which checks Java API compatibility between two revisions of the
+# Java client.
+#
+# Originally sourced from Apache Kudu, which was based on the
+# compatibility checker from the Apache HBase project, but ported to
+# Python for better readability.
+
+import logging
+import os
+import re
+import shutil
+import subprocess
+import sys
+import urllib2
+try:
+  import argparse
+except ImportError:
+  sys.stderr.write("Please install argparse, e.g. via `pip install argparse`.")
+  sys.exit(2)
+
+# Various relative paths
+REPO_DIR = os.getcwd()
+
+def check_output(*popenargs, **kwargs):
+  r"""Run command with arguments and return its output as a byte string.
+  Backported from Python 2.7 as it's implemented as pure python on stdlib.
+  >>> check_output(['/usr/bin/python', '--version'])
+  Python 2.6.2
+  """
+  process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
+  output, _ = process.communicate()
+  retcode = process.poll()
+  if retcode:
+    cmd = kwargs.get("args")
+    if cmd is None:
+      cmd = popenargs[0]
+    error = subprocess.CalledProcessError(retcode, cmd)
+    error.output = output
+    raise error
+  return output
+
+def get_repo_dir():
+  """ Return the path to the top of the repo. """
+  dirname, _ = os.path.split(os.path.abspath(__file__))
+  return os.path.join(dirname, "../..")
+
+def get_scratch_dir():
+  """ Return the path to the scratch dir that we build within. """
+  scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
+  if not os.path.exists(scratch_dir):
+    os.makedirs(scratch_dir)
+  return scratch_dir
+
+def get_java_acc_dir():
+  """ Return the path where we check out the Java API Compliance Checker. """
+  return os.path.join(get_repo_dir(), "target", "java-acc")
+
+
+def clean_scratch_dir(scratch_dir):
+  """ Clean up and re-create the scratch directory. """
+  if os.path.exists(scratch_dir):
+    logging.info("Removing scratch dir %s...", scratch_dir)
+    shutil.rmtree(scratch_dir)
+  logging.info("Creating empty scratch dir %s...", scratch_dir)
+  os.makedirs(scratch_dir)
+
+
+def checkout_java_tree(rev, path):
+  """ Check out the Java source tree for the given revision into
+  the given path. """
+  logging.info("Checking out %s in %s", rev, path)
+  os.makedirs(path)
+  # Extract java source
+  subprocess.check_call(["bash", '-o', 'pipefail', "-c",
+                         ("git archive --format=tar %s | " +
+                          "tar -C \"%s\" -xf -") % (rev, path)],
+                        cwd=get_repo_dir())
+
+def get_git_hash(revname):
+  """ Convert 'revname' to its SHA-1 hash. """
+  return check_output(["git", "rev-parse", revname],
+                      cwd=get_repo_dir()).strip()
+
+def get_repo_name():
+  """Get the name of the repo based on the git remote."""
+  remotes = check_output(["git", "remote", "-v"],
+                         cwd=get_repo_dir()).strip().split("\n")
+  # Example output:
+  # origin     https://github.com/apache/hadoop.git (fetch)
+  # origin     https://github.com/apache/hadoop.git (push)
+  remote_url = remotes[0].split("\t")[1].split(" ")[0]
+  remote = remote_url.split("/")[-1]
+  if remote.endswith(".git"):
+    remote = remote[:-4]
+  return remote
+
+def build_tree(java_path):
+  """ Run the Java build within 'path'. """
+  logging.info("Building in %s...", java_path)
+  subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true",
+                         "package"],
+                        cwd=java_path)
+
+
+def checkout_java_acc(force):
+  """
+  Check out the Java API Compliance Checker. If 'force' is true, will
+  re-download even if the directory exists.
+  """
+  acc_dir = get_java_acc_dir()
+  if os.path.exists(acc_dir):
+    logging.info("Java ACC is already downloaded.")
+    if not force:
+      return
+    logging.info("Forcing re-download.")
+    shutil.rmtree(acc_dir)
+
+  logging.info("Downloading Java ACC...")
+
+  url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz";
+  scratch_dir = get_scratch_dir()
+  path = os.path.join(scratch_dir, os.path.basename(url))
+  jacc = urllib2.urlopen(url)
+  with open(path, 'wb') as w:
+    w.write(jacc.read())
+
+  subprocess.check_call(["tar", "xzf", path],
+                        cwd=scratch_dir)
+
+  shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"),
+              os.path.join(acc_dir))
+
+
+def find_jars(path):
+  """ Return a list of jars within 'path' to be checked for compatibility. """
+  all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines())
+
+  return [j for j in all_jars if (
+      "-tests" not in j and
+      "-sources" not in j and
+      "-with-dependencies" not in j)]
+
+def write_xml_file(path, version, jars):
+  """Write the XML manifest file for JACC."""
+  with open(path, "wt") as f:
+    f.write("<version>" + version + "</version>\n")
+    f.write("<archives>")
+    for j in jars:
+      f.write(j + "\n")
+    f.write("</archives>")
+
+def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations):
+  """ Run the compliance checker to compare 'src' and 'dst'. """
+  logging.info("Will check compatibility between original jars:\n\t%s\n" +
+               "and new jars:\n\t%s",
+               "\n\t".join(src_jars),
+               "\n\t".join(dst_jars))
+
+  java_acc_path = os.path.join(get_java_acc_dir(), 
"japi-compliance-checker.pl")
+
+  src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
+  dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
+  write_xml_file(src_xml_path, src_name, src_jars)
+  write_xml_file(dst_xml_path, dst_name, dst_jars)
+
+  out_path = os.path.join(get_scratch_dir(), "report.html")
+
+  args = ["perl", java_acc_path,
+          "-l", get_repo_name(),
+          "-d1", src_xml_path,
+          "-d2", dst_xml_path,
+          "-report-path", out_path]
+
+  if annotations is not None:
+    annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
+    with file(annotations_path, "w") as f:
+      for ann in annotations:
+        print >>f, ann
+    args += ["-annotations-list", annotations_path]
+
+  subprocess.check_call(args)
+
+def filter_jars(jars, include_filters, exclude_filters):
+  """Filter the list of JARs based on include and exclude filters."""
+  filtered = []
+  # Apply include filters
+  for j in jars:
+    found = False
+    basename = os.path.basename(j)
+    for f in include_filters:
+      if f.match(basename):
+        found = True
+        break
+    if found:
+      filtered += [j]
+    else:
+      logging.debug("Ignoring JAR %s", j)
+  # Apply exclude filters
+  exclude_filtered = []
+  for j in filtered:
+    basename = os.path.basename(j)
+    found = False
+    for f in exclude_filters:
+      if f.match(basename):
+        found = True
+        break
+    if found:
+      logging.debug("Ignoring JAR %s", j)
+    else:
+      exclude_filtered += [j]
+
+  return exclude_filtered
+
+
+def main():
+  """Main function."""
+  logging.basicConfig(level=logging.INFO)
+  parser = argparse.ArgumentParser(
+      description="Run Java API Compliance Checker.")
+  parser.add_argument("-f", "--force-download",
+                      action="store_true",
+                      help="Download dependencies (i.e. Java JAVA_ACC) " +
+                      "even if they are already present")
+  parser.add_argument("-i", "--include-file",
+                      action="append",
+                      dest="include_files",
+                      help="Regex filter for JAR files to be included. " +
+                      "Applied before the exclude filters. " +
+                      "Can be specified multiple times.")
+  parser.add_argument("-e", "--exclude-file",
+                      action="append",
+                      dest="exclude_files",
+                      help="Regex filter for JAR files to be excluded. " +
+                      "Applied after the include filters. " +
+                      "Can be specified multiple times.")
+  parser.add_argument("-a", "--annotation",
+                      action="append",
+                      dest="annotations",
+                      help="Fully-qualified Java annotation. " +
+                      "Java ACC will only check compatibility of " +
+                      "annotated classes. Can be specified multiple times.")
+  parser.add_argument("--skip-clean",
+                      action="store_true",
+                      help="Skip cleaning the scratch directory.")
+  parser.add_argument("--skip-build",
+                      action="store_true",
+                      help="Skip building the projects.")
+  parser.add_argument("src_rev", nargs=1, help="Source revision.")
+  parser.add_argument("dst_rev", nargs="?", default="HEAD",
+                      help="Destination revision. " +
+                      "If not specified, will use HEAD.")
+
+  if len(sys.argv) == 1:
+    parser.print_help()
+    sys.exit(1)
+
+  args = parser.parse_args()
+
+  src_rev, dst_rev = args.src_rev[0], args.dst_rev
+
+  logging.info("Source revision: %s", src_rev)
+  logging.info("Destination revision: %s", dst_rev)
+
+  # Construct the JAR regex patterns for filtering.
+  include_filters = []
+  if args.include_files is not None:
+    for f in args.include_files:
+      logging.info("Applying JAR filename include filter: %s", f)
+      include_filters += [re.compile(f)]
+  else:
+    include_filters = [re.compile(".*")]
+
+  exclude_filters = []
+  if args.exclude_files is not None:
+    for f in args.exclude_files:
+      logging.info("Applying JAR filename exclude filter: %s", f)
+      exclude_filters += [re.compile(f)]
+
+  # Construct the annotation list
+  annotations = args.annotations
+  if annotations is not None:
+    logging.info("Filtering classes using %d annotation(s):", len(annotations))
+    for a in annotations:
+      logging.info("\t%s", a)
+
+  # Download deps.
+  checkout_java_acc(args.force_download)
+
+  # Set up the build.
+  scratch_dir = get_scratch_dir()
+  src_dir = os.path.join(scratch_dir, "src")
+  dst_dir = os.path.join(scratch_dir, "dst")
+
+  if args.skip_clean:
+    logging.info("Skipping cleaning the scratch directory")
+  else:
+    clean_scratch_dir(scratch_dir)
+    # Check out the src and dst source trees.
+    checkout_java_tree(get_git_hash(src_rev), src_dir)
+    checkout_java_tree(get_git_hash(dst_rev), dst_dir)
+
+  # Run the build in each.
+  if args.skip_build:
+    logging.info("Skipping the build")
+  else:
+    build_tree(src_dir)
+    build_tree(dst_dir)
+
+  # Find the JARs.
+  src_jars = find_jars(src_dir)
+  dst_jars = find_jars(dst_dir)
+
+  # Filter the JARs.
+  src_jars = filter_jars(src_jars, include_filters, exclude_filters)
+  dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)
+
+  if len(src_jars) == 0 or len(dst_jars) == 0:
+    logging.error("No JARs found! Are your filters too strong?")
+    sys.exit(1)
+
+  run_java_acc(src_rev, src_jars,
+               dst_rev, dst_jars, annotations)
+
+
+if __name__ == "__main__":
+  main()


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to