[
https://issues.apache.org/jira/browse/BEAM-5071?focusedWorklogId=134788&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-134788
]
ASF GitHub Bot logged work on BEAM-5071:
----------------------------------------
Author: ASF GitHub Bot
Created on: 14/Aug/18 23:25
Start Date: 14/Aug/18 23:25
Worklog Time Spent: 10m
Work Description: chamikaramj closed pull request #6167: [BEAM-5071]
Replace the bigquery with restful APIs to query dependenc…
URL: https://github.com/apache/beam/pull/6167
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/.test-infra/jenkins/dependency_check/dependency_check_report_generator.py
b/.test-infra/jenkins/dependency_check/dependency_check_report_generator.py
index 6171ad1e093..11e0a1d3b1a 100644
--- a/.test-infra/jenkins/dependency_check/dependency_check_report_generator.py
+++ b/.test-infra/jenkins/dependency_check/dependency_check_report_generator.py
@@ -21,11 +21,14 @@
import re
import traceback
import logging
+import requests
+import time
from datetime import datetime
from dependency_check.bigquery_client_utils import BigQueryClientUtils
from jira_utils.jira_manager import JiraManager
from dependency_check.report_generator_config import ReportGeneratorConfig
-
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.retry import Retry
_MAX_STALE_DAYS = 360
_MAX_MINOR_VERSION_DIFF = 3
@@ -105,18 +108,26 @@ def prioritize_dependencies(deps, sdk_type):
try:
logging.info("\n\nStart processing: " + dep)
dep_name, curr_ver, latest_ver = extract_single_dep(dep)
- curr_release_date, latest_release_date =
query_dependency_release_dates(bigquery_client,
-
dep_name,
-
curr_ver,
-
latest_ver)
+ curr_release_date = None
+ latest_release_date = None
group_id = None
+
if sdk_type == 'Java':
# extract the groupid and artifactid
group_id, artifact_id = dep_name.split(":")
dep_details_url = "{0}g:\"{1}\" AND
a:\"{2}\"".format(_MAVEN_CENTRAL_URL, group_id, artifact_id)
+ curr_release_date = find_release_time_from_maven_central(group_id,
artifact_id, curr_ver)
+ latest_release_date = find_release_time_from_maven_central(group_id,
artifact_id, latest_ver)
else:
dep_details_url = _PYPI_URL + dep_name
-
+ curr_release_date =
find_release_time_from_python_compatibility_checking_service(dep_name, curr_ver)
+ latest_release_date =
find_release_time_from_python_compatibility_checking_service(dep_name, curr_ver)
+
+ if not curr_release_date or not latest_release_date:
+ curr_release_date, latest_release_date =
query_dependency_release_dates_from_bigquery(bigquery_client,
+
dep_name,
+
curr_ver,
+
latest_ver)
dep_info = """<tr>
<td><a href=\'{0}\'>{1}</a></td>
<td>{2}</td>
@@ -176,7 +187,90 @@ def compare_dependency_versions(curr_ver, latest_ver):
return False
-def query_dependency_release_dates(bigquery_client, dep_name,
curr_ver_in_beam, latest_ver):
+def find_release_time_from_maven_central(group_id, artifact_id, version):
+ """
+ Find release dates from Maven Central REST API.
+ Args:
+ group_id:
+ artifact_id:
+ version:
+ Return:
+ release date
+ """
+ url =
"http://search.maven.org/solrsearch/select?q=g:{0}+AND+a:{1}+AND+v:{2}".format(
+ group_id,
+ artifact_id,
+ version
+ )
+ logging.info('Finding release date of {0}:{1} {2} from the Maven
Central').format(
+ group_id,
+ artifact_id,
+ version
+ )
+ try:
+ response = request_session_with_retries().get(url)
+ if not response.ok:
+ logging.error("""Failed finding the release date of {0}:{1} {2}.
+ The response status code is not ok: {4}""".format(group_id,
+ artifact_id,
+ version,
+
str(response.status_code)))
+ return None
+ response_data = response.json()
+ release_timestamp_mills = response_data['response']['docs'][0]['timestamp']
+ release_date = datetime.fromtimestamp(release_timestamp_mills/1000).date()
+ return release_date
+ except Exception as e:
+ logging.error("Errors while extracting the release date: " + str(e))
+ return None
+
+
+def find_release_time_from_python_compatibility_checking_service(dep_name,
version):
+ """
+ Query release dates by using Python compatibility checking service.
+ Args:
+ dep_name:
+ version:
+ Return:
+ release date
+ """
+ url = 'http://104.197.8.72/?package={0}=={1}&python-version=2'.format(
+ dep_name,
+ version
+ )
+ logging.info('Finding release time of {0} {1} from the python compatibility
checking service.').format(
+ dep_name,
+ version
+ )
+ try:
+ response = request_session_with_retries().get(url)
+ if not response.ok:
+ logging.error("""Failed finding the release date of {0} {2}.
+ The response status code is not ok: {3}""".format(dep_name,
+ version,
+
str(response.status_code)))
+ return None
+ response_data = response.json()
+ release_datetime =
response_data['dependency_info'][dep_name]['installed_version_time']
+ release_date = datetime.strptime(release_datetime,
'%Y-%m-%dT%H:%M:%S').date()
+ return release_date
+ except Exception as e:
+ logging.error("Errors while extracting the release date: " + str(e))
+ return None
+
+
+def request_session_with_retries():
+ """
+ Create a http session with retries
+ """
+ session = requests.Session()
+ retries = Retry(total=3)
+ session.mount('http://', HTTPAdapter(max_retries=retries))
+ session.mount('https://', HTTPAdapter(max_retries=retries))
+ return session
+
+
+def query_dependency_release_dates_from_bigquery(bigquery_client, dep_name,
curr_ver_in_beam, latest_ver):
"""
Query release dates of current version and the latest version from BQ tables.
Args:
diff --git
a/.test-infra/jenkins/dependency_check/dependency_check_report_generator_test.py
b/.test-infra/jenkins/dependency_check/dependency_check_report_generator_test.py
index b89907581f5..c53f469c49c 100644
---
a/.test-infra/jenkins/dependency_check/dependency_check_report_generator_test.py
+++
b/.test-infra/jenkins/dependency_check/dependency_check_report_generator_test.py
@@ -28,7 +28,7 @@
_PROJECT_ID = 'mock-apache-beam-testing'
_DATASET_ID = 'mock-beam_dependency_states'
_TABLE_ID = 'mock-java_dependency_states'
-_SDK_TYPE = 'JAVA'
+_SDK_TYPE = 'Java'
# initialize current/latest version release dates for low-priority (LP) and
high-priority (HP) dependencies
_LP_CURR_VERSION_DATE = datetime.strptime('2000-01-01', '%Y-%m-%d')
@@ -40,6 +40,7 @@
@patch('jira_utils.jira_manager.JiraManager')
@patch('jira_utils.jira_manager.JiraClient')
@patch('jira_utils.jira_manager.JiraManager.run')
+@patch('dependency_check.bigquery_client_utils.BigQueryClientUtils.clean_stale_records_from_table')
class DependencyCheckReportGeneratorTest(unittest.TestCase):
"""Tests for `dependency_check_report_generator.py`."""
@@ -58,11 +59,11 @@ def test_empty_dep_input(self, *args):
self.assertEqual(len(report), 0)
-
@patch('dependency_check.bigquery_client_utils.BigQueryClientUtils.query_dep_info_by_version',
- side_effect = [(_LP_CURR_VERSION_DATE, True), (_LATEST_VERSION_DATE,
False),
- (_LP_CURR_VERSION_DATE, True), (_LATEST_VERSION_DATE,
False),
- (_HP_CURR_VERSION_DATE, True), (_LATEST_VERSION_DATE,
False),
- (_LP_CURR_VERSION_DATE, True), (_LATEST_VERSION_DATE,
False),])
+
@patch('dependency_check.dependency_check_report_generator.find_release_time_from_maven_central',
+ side_effect = [_LP_CURR_VERSION_DATE, _LATEST_VERSION_DATE,
+ _LP_CURR_VERSION_DATE, _LATEST_VERSION_DATE,
+ _HP_CURR_VERSION_DATE, _LATEST_VERSION_DATE,
+ _LP_CURR_VERSION_DATE, _LATEST_VERSION_DATE,])
def test_normal_dep_input(self, *args):
"""
Test on a normal outdated dependencies set.
@@ -82,9 +83,9 @@ def test_normal_dep_input(self, *args):
self.assertIn('group3:artifact3', report[2])
-
@patch('dependency_check.bigquery_client_utils.BigQueryClientUtils.query_dep_info_by_version',
- side_effect = [(_LP_CURR_VERSION_DATE, True),
- (_LATEST_VERSION_DATE, False),])
+
@patch('dependency_check.dependency_check_report_generator.find_release_time_from_maven_central',
+ side_effect = [_LP_CURR_VERSION_DATE,
+ _LATEST_VERSION_DATE,])
def test_dep_with_nondigit_major_versions(self, *args):
"""
Test on a outdated dependency with non-digit major number.
@@ -97,9 +98,9 @@ def test_dep_with_nondigit_major_versions(self, *args):
self.assertIn('group1:artifact1', report[0])
-
@patch('dependency_check.bigquery_client_utils.BigQueryClientUtils.query_dep_info_by_version',
- side_effect = [(_LP_CURR_VERSION_DATE, True),
- (_LATEST_VERSION_DATE, False),])
+
@patch('dependency_check.dependency_check_report_generator.find_release_time_from_maven_central',
+ side_effect = [_LP_CURR_VERSION_DATE,
+ _LATEST_VERSION_DATE,])
def test_dep_with_nondigit_minor_versions(self, *args):
"""
Test on a outdated dependency with non-digit minor number.
@@ -112,11 +113,8 @@ def test_dep_with_nondigit_minor_versions(self, *args):
self.assertIn('group1:artifact1', report[0])
-
@patch('dependency_check.bigquery_client_utils.BigQueryClientUtils.insert_dep_to_table')
-
@patch('dependency_check.bigquery_client_utils.BigQueryClientUtils.delete_dep_from_table')
-
@patch('dependency_check.bigquery_client_utils.BigQueryClientUtils.query_currently_used_dep_info_in_db',
side_effect = [(None, None)])
-
@patch('dependency_check.bigquery_client_utils.BigQueryClientUtils.query_dep_info_by_version',
- side_effect = [(_HP_CURR_VERSION_DATE, True), (_LATEST_VERSION_DATE,
False),])
+
@patch('dependency_check.dependency_check_report_generator.find_release_time_from_maven_central',
+ side_effect = [_HP_CURR_VERSION_DATE,_LATEST_VERSION_DATE,])
def test_invalid_dep_input(self, *args):
"""
Test on a invalid outdated dependencies format.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 134788)
Time Spent: 4h 10m (was: 4h)
> Using the restful API in Beam dependency check system, get rid of bigquery
> --------------------------------------------------------------------------
>
> Key: BEAM-5071
> URL: https://issues.apache.org/jira/browse/BEAM-5071
> Project: Beam
> Issue Type: Bug
> Components: dependencies
> Reporter: yifan zou
> Assignee: yifan zou
> Priority: Major
> Time Spent: 4h 10m
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)