This is an automated email from the ASF dual-hosted git repository.

jevans pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet-ci.git


The following commit(s) were added to refs/heads/master by this push:
     new 2a3f3eb  deprecate PredictLabels (#40)
2a3f3eb is described below

commit 2a3f3eb0fdd75199b5f482e055ab62a892eb35e3
Author: waytrue17 <[email protected]>
AuthorDate: Thu Sep 30 15:49:09 2021 -0700

    deprecate PredictLabels (#40)
    
    Co-authored-by: Wei Chu <[email protected]>
---
 services/github-bots/PredictLabels/DataFetcher.py  | 135 -----
 services/github-bots/PredictLabels/Dockerfile      |  26 -
 .../github-bots/PredictLabels/Dockerrun.aws.json   |   7 -
 .../PredictLabels/LabelBotPredictionService.ipynb  | 612 ---------------------
 services/github-bots/PredictLabels/Predictor.py    | 137 -----
 services/github-bots/PredictLabels/README.md       |  42 --
 .../github-bots/PredictLabels/SentenceParser.py    | 134 -----
 services/github-bots/PredictLabels/Trainer.py      | 102 ----
 services/github-bots/PredictLabels/application.py  | 120 ----
 services/github-bots/PredictLabels/cron.yaml       |  21 -
 .../github-bots/PredictLabels/model_handler.py     | 119 ----
 .../github-bots/PredictLabels/plot_piechart.py     |  47 --
 .../github-bots/PredictLabels/requirements.txt     |  33 --
 services/github-bots/PredictLabels/stopwords.txt   |   1 -
 .../github-bots/PredictLabels/test_datafetcher.py  | 119 ----
 .../github-bots/PredictLabels/test_predictor.py    |  98 ----
 .../PredictLabels/test_sentenceparse.py            |  66 ---
 services/github-bots/PredictLabels/test_trainer.py |  66 ---
 18 files changed, 1885 deletions(-)

diff --git a/services/github-bots/PredictLabels/DataFetcher.py 
b/services/github-bots/PredictLabels/DataFetcher.py
deleted file mode 100644
index 26aadc5..0000000
--- a/services/github-bots/PredictLabels/DataFetcher.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This scipt is served to fetch GitHub issues into a json file
-from __future__ import print_function
-import os
-import requests
-import json
-import re
-import pandas as pd
-import logging
-
-
-class DataFetcher:
-
-    def __init__(self,
-                 github_user=os.environ.get("github_user"),
-                 github_oauth_token=os.environ.get("github_oauth_token"),
-                 repo=os.environ.get("repo")):
-        """
-        This DataFetcher serves to fetch issues data
-        Args:
-            github_user(str): the github id. ie: "CathyZhang0822"
-            github_oauth_token(str): the github oauth token, paired with 
github_user to realize authorization
-            repo(str): the repo name
-        """
-        self.github_user = github_user
-        self.github_oauth_token = github_oauth_token
-        self.repo = repo
-        self.auth = (self.github_user, self.github_oauth_token)
-        self.json_data = None
-
-    def cleanstr(self, raw_string, sub_string):
-        """
-        This method is to convert all non-alphanumeric charaters from 
-        raw_string into substring
-        """
-        clean = re.sub("[^0-9a-zA-Z]", sub_string, raw_string)
-        return clean.lower()
-
-    def count_pages(self, state):
-        """
-        This method is to count how many pages of issues/labels in total
-        state can be "open"/"closed"/"all"
-        """
-        url = 'https://api.github.com/repos/%s/issues' % self.repo
-        response = requests.get(url, {'state': state},
-                                auth=self.auth)
-        assert response.status_code == 200, "Authorization failed"
-        if "link" not in response.headers:
-            return 1
-        return int(self.cleanstr(response.headers['link'], " ").split()[-3])
-    
-    def fetch_issues(self, issue_nums):
-        """
-        This method is to fetch issues data
-        issue_num: a list of issue ids
-        return issues' data in pandas dataframe format
-        """
-        assert issue_nums != [], "Empty Input!"
-        logging.info("Reading issues:{}".format(", ".join([str(num) for num in 
issue_nums])))
-        data = []
-        for number in issue_nums:
-            url = 'https://api.github.com/repos/' + self.repo + '/issues/' + 
str(number)
-            response = requests.get(url, auth=self.auth)
-            item = response.json()
-            assert 'title' in item, "{} issues doesn't 
exist!".format(str(number))
-            data += [{'id': str(number), 'title': item['title'], 'body': 
item['body']}]
-        return pd.DataFrame(data)
-
-    def data2json(self, state, labels=None, other_labels=False):
-        """
-        This method is to store issues' data into a json file, return json 
file's name
-        state can be either "open"/"closed"/"all"
-        labels is a list of target labels we are interested in
-        other_labels can be either "True"/"False"
-        """
-        assert state in set(['all', 'open', 'closed']), "Invalid State!"
-        logging.info("Reading {} issues..".format(state))
-        pages = self.count_pages(state)
-        data = []
-        for x in range(1, pages+1):
-            url = 'https://api.github.com/repos/' + self.repo + 
'/issues?page=' + str(x) \
-                  + '&per_page=30'.format(repo=self.repo)
-            response = requests.get(url,
-                                    {'state': state,
-                                     'base': 'master',
-                                     'sort': 'created'},
-                                    auth=self.auth)
-            for item in response.json():
-                if "pull_request" in item:
-                    continue
-                if "labels" in item:
-                    issue_labels=list(set([item['labels'][i]['name'] for i in 
range(len(item['labels']))]))
-                else:
-                    continue
-                if labels is not None:
-                    # fetch issue which has at least one target label
-                    for label in labels:
-                        if label in issue_labels:
-                            if other_labels:
-                                # besides target labels, we still want other 
labels
-                                data += [{'id': item['number'],'title': 
item['title'], 'body': item['body'], 'labels': issue_labels}]
-                            else:
-                                # only record target labels
-                                if(label in set(["Feature", "Call for 
Contribution", "Feature request"])):
-                                    label = "Feature"
-                                data += [{'id': item['number'], 'title': 
item['title'], 'body': item['body'], 'labels': label}]
-                            # if have this break, then we only pick up the 
first target label
-                            break
-                else:
-                    # fetch all issues
-                    data += [{'id': item['number'], 'title': item['title'], 
'body': item['body'], 'labels': issue_labels}]
-        self.json_data = data
-        s_labels = "_".join(labels) if labels is not None else "all_labels"
-        filename = "{}_data.json_{}".format(state, s_labels)
-        logging.info("Writing json file..")
-        with open(filename, 'w') as write_file:
-            json.dump(data, write_file)
-        logging.info("{} json file is ready!".format(filename))
-        return filename
diff --git a/services/github-bots/PredictLabels/Dockerfile 
b/services/github-bots/PredictLabels/Dockerfile
deleted file mode 100644
index 16f9c31..0000000
--- a/services/github-bots/PredictLabels/Dockerfile
+++ /dev/null
@@ -1,26 +0,0 @@
-FROM python:3.6.6
-
-# Update packages
-RUN apt-get update
-
-# Install prerequisite for matplotlib
-RUN apt-get -y install libxft-dev libfreetype6 libfreetype6-dev
-
-# Bundle app source
-COPY . /src
-
-EXPOSE 8000
-WORKDIR /src
-
-#install Python modules
-RUN pip install -r requirements.txt
-
-# Environment Variables
-ENV github_user your_github_id
-ENV github_oauth_token your_github_read_only_token
-ENV repo repo_name
-
-# Run it
-ENTRYPOINT ["python", "application.py"]
-
-
diff --git a/services/github-bots/PredictLabels/Dockerrun.aws.json 
b/services/github-bots/PredictLabels/Dockerrun.aws.json
deleted file mode 100755
index 73464c7..0000000
--- a/services/github-bots/PredictLabels/Dockerrun.aws.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "AWSEBDockerrunVersion": "1",
-  "Logging": "/tmp/sample-app",
-  "Image": {
-       "Update": "false"
-  }
-}
diff --git a/services/github-bots/PredictLabels/LabelBotPredictionService.ipynb 
b/services/github-bots/PredictLabels/LabelBotPredictionService.ipynb
deleted file mode 100644
index 66c22c2..0000000
--- a/services/github-bots/PredictLabels/LabelBotPredictionService.ipynb
+++ /dev/null
@@ -1,612 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Label Bot Prediction Service #"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Sentence Parser ###"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Licensed to the Apache Software Foundation (ASF) under one\n",
-    "# or more contributor license agreements.  See the NOTICE file\n",
-    "# distributed with this work for additional information\n",
-    "# regarding copyright ownership.  The ASF licenses this file\n",
-    "# to you under the Apache License, Version 2.0 (the\n",
-    "# \"License\"); you may not use this file except in compliance\n",
-    "# with the License.  You may obtain a copy of the License at\n",
-    "#\n",
-    "#   http://www.apache.org/licenses/LICENSE-2.0\n";,
-    "#\n",
-    "# Unless required by applicable law or agreed to in writing,\n",
-    "# software distributed under the License is distributed on an\n",
-    "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
-    "# KIND, either express or implied.  See the License for the\n",
-    "# specific language governing permissions and limitations\n",
-    "# under the License.\n",
-    "\n",
-    "# This script serves to do data cleaning\n",
-    "from bs4 import BeautifulSoup\n",
-    "import nltk\n",
-    "# fix ssl certificate errors\n",
-    "import ssl\n",
-    "try:\n",
-    "    _create_unverified_https_context = ssl._create_unverified_context\n",
-    "except AttributeError:\n",
-    "    pass\n",
-    "else:\n",
-    "    ssl._create_default_https_context = 
_create_unverified_https_context\n",
-    "import os.path\n",
-    "import pandas as pd\n",
-    "import re\n",
-    "import sys\n",
-    "import logging\n",
-    "\n",
-    "\n",
-    "class SentenceParser:\n",
-    "\n",
-    "    regex_str = [\n",
-    "        r'<[^>]+>',                                                       
              # HTML tags\n",
-    "        r'(?:@[\\w_]+)',                                                  
               # @-mentions\n",
-    "        r\"(?:\\#+[\\w_]+[\\w\\'_\\-]*[\\w_]+)\",                         
                      # hash-tags\n",
-    "        
r'http[s]?://(?:[a-z]|[0-9]|[$-_@.&amp;+]|[!*\\(\\),]|(?:%[0-9a-f][0-9a-f]))+', 
  # URLs\n",
-    "        r'(?:(?:\\d+,?)+(?:\\.?\\d+)?)',                                  
                 # numbers\n",
-    "        r\"(?:[a-z][a-z'\\-_]+[a-z])\",                                   
                 # words with - and '\n",
-    "        r'(?:[\\w_]+)',                                                   
               # other words\n",
-    "        r'(?:\\S)'                                                        
               # anything else\n",
-    "    ]\n",
-    "    # English Stopwords\n",
-    "    with open('stopwords.txt') as file:\n",
-    "        stopwords = file.read().split()\n",
-    "    file.close()\n",
-    "\n",
-    "    def __init__(self):\n",
-    "        \"\"\"\n",
-    "        SentenceParser serves to clean text content\n",
-    "        \"\"\"\n",
-    "        self.data = None\n",
-    "        # extract words stem\n",
-    "        self.porter = nltk.PorterStemmer()\n",
-    "        # a set of stopwords\n",
-    "        self.stops = set(self.stopwords)\n",
-    "\n",
-    "    def read_file(self, filepath, filetype, encod='ISO-8859-1', 
header=None):\n",
-    "        \"\"\"\n",
-    "        This method is to read csv/json/xlsx files\n",
-    "        \"\"\"\n",
-    "        logging.info('Start reading File')\n",
-    "        if not os.path.isfile(filepath):\n",
-    "            logging.error(\"File Not Exist!\")\n",
-    "            sys.exit()\n",
-    "        if filetype == 'csv':\n",
-    "            df = pd.read_csv(filepath, encoding=encod, header=header)\n",
-    "        elif filetype == 'json':\n",
-    "            df = pd.read_json(filepath, encoding=encod, lines=False)\n",
-    "        elif filetype == 'xlsx':\n",
-    "            df = pd.read_excel(filepath, encoding=encod, 
header=header)\n",
-    "        else:\n",
-    "            logging.error(\"Extension Type not Accepted!\")\n",
-    "            sys.exit()\n",
-    "\n",
-    "        logging.debug(df)\n",
-    "        self.data = df\n",
-    "\n",
-    "    def merge_column(self, columns, name):\n",
-    "        \"\"\"\n",
-    "        This method is to merge columns of a pandas dataframe\n",
-    "        \"\"\"\n",
-    "        logging.info('Merge headers %s to %s', str(columns), name)\n",
-    "        self.data[name] = ''\n",
-    "        for header in columns:\n",
-    "            self.data[name] += ' ' + self.data[header]\n",
-    "  \n",
-    "    def clean_body(self, column, remove_template=True, 
remove_code=True):\n",
-    "        \"\"\"\n",
-    "        This methods is to remove template and code from issue's body\n",
-    "        \"\"\"\n",
-    "        logging.info(\"Start Removing Templates..\")\n",
-    "        for i in range(len(self.data)):\n",
-    "            # remove 'Environment info' part\n",
-    "            if remove_template and \"## Environment info\" in 
self.data[column][i]:\n",
-    "                index = self.data.loc[i, column].find(\"## Environment 
info\")\n",
-    "                self.data.loc[i, column] = self.data.loc[i, 
column][:index]\n",
-    "            # remove code\n",
-    "            if remove_code and \"```\" in self.data[column][i]:\n",
-    "                sample = self.data[column][i].split(\"```\")\n",
-    "                sample = [sample[i*2] for i in range(0, 
int((len(sample)+1)/2))]\n",
-    "                self.data.loc[i, column] = \" \".join(sample)\n",
-    "\n",
-    "    def process_text(self, column, remove_symbol=True, 
remove_stopwords=False, stemming=False):\n",
-    "        \"\"\"\n",
-    "        This method is to remove symbols/remove stopwords/extract words 
stem\n",
-    "        \"\"\"\n",
-    "        logging.info(\"Start Data Cleaning...\")\n",
-    "        # remove some symbols\n",
-    "        self.data[column] = 
self.data[column].str.replace(r'[\\n\\r\\t]+', ' ')\n",
-    "        # remove URLs\n",
-    "        self.data[column] = 
self.data[column].str.replace(self.regex_str[3], ' ')\n",
-    "        tempcol = self.data[column].values.tolist()\n",
-    "\n",
-    "        for i in range(len(tempcol)):\n",
-    "            row = BeautifulSoup(tempcol[i], 
'html.parser').get_text().lower()\n",
-    "            # remove symbols\n",
-    "            if remove_symbol:\n",
-    "                row = re.sub('[^a-zA-Z]', ' ', row)\n",
-    "            words = row.split()\n",
-    "            # remove stopwords\n",
-    "            if remove_stopwords:\n",
-    "                words = [w for w in words if w not in self.stops and not 
w.replace('.', '', 1).isdigit()]\n",
-    "            # extract words stem\n",
-    "            if stemming:\n",
-    "                words = [self.porter.stem(w) for w in words] \n",
-    "            row = ' '.join(words)\n",
-    "            tempcol[i] = row.lower()\n",
-    "        return tempcol\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### DataFetcher ###"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Licensed to the Apache Software Foundation (ASF) under one\n",
-    "# or more contributor license agreements.  See the NOTICE file\n",
-    "# distributed with this work for additional information\n",
-    "# regarding copyright ownership.  The ASF licenses this file\n",
-    "# to you under the Apache License, Version 2.0 (the\n",
-    "# \"License\"); you may not use this file except in compliance\n",
-    "# with the License.  You may obtain a copy of the License at\n",
-    "#\n",
-    "#   http://www.apache.org/licenses/LICENSE-2.0\n";,
-    "#\n",
-    "# Unless required by applicable law or agreed to in writing,\n",
-    "# software distributed under the License is distributed on an\n",
-    "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
-    "# KIND, either express or implied.  See the License for the\n",
-    "# specific language governing permissions and limitations\n",
-    "# under the License.\n",
-    "\n",
-    "# This scipt is served to fetch GitHub issues into a json file\n",
-    "from __future__ import print_function\n",
-    "import os\n",
-    "import requests\n",
-    "import json\n",
-    "import re\n",
-    "import pandas as pd\n",
-    "import logging\n",
-    "\n",
-    "\n",
-    "class DataFetcher:\n",
-    "\n",
-    "    def __init__(self,\n",
-    "                 github_user=os.environ.get(\"github_user\"),\n",
-    "                 
github_oauth_token=os.environ.get(\"github_oauth_token\"),\n",
-    "                 repo=os.environ.get(\"repo\")):\n",
-    "        \"\"\"\n",
-    "        This DataFetcher serves to fetch issues data\n",
-    "        Args:\n",
-    "            github_user(str): the github id. ie: \"CathyZhang0822\"\n",
-    "            github_oauth_token(str): the github oauth token, paired with 
github_user to realize authorization\n",
-    "            repo(str): the repo name\n",
-    "        \"\"\"\n",
-    "        self.github_user = github_user\n",
-    "        self.github_oauth_token = github_oauth_token\n",
-    "        self.repo = repo\n",
-    "        self.auth = (self.github_user, self.github_oauth_token)\n",
-    "        self.json_data = None\n",
-    "\n",
-    "    def cleanstr(self, raw_string, sub_string):\n",
-    "        \"\"\"\n",
-    "        This method is to convert all non-alphanumeric charaters from \n",
-    "        raw_string into substring\n",
-    "        \"\"\"\n",
-    "        clean = re.sub(\"[^0-9a-zA-Z]\", sub_string, raw_string)\n",
-    "        return clean.lower()\n",
-    "\n",
-    "    def count_pages(self, state):\n",
-    "        \"\"\"\n",
-    "        This method is to count how many pages of issues/labels in 
total\n",
-    "        state can be \"open\"/\"closed\"/\"all\"\n",
-    "        \"\"\"\n",
-    "        url = 'https://api.github.com/repos/%s/issues' % self.repo\n",
-    "        response = requests.get(url, {'state': state},\n",
-    "                                auth=self.auth)\n",
-    "        assert response.status_code == 200, \"Authorization failed\"\n",
-    "        if \"link\" not in response.headers:\n",
-    "            return 1\n",
-    "        return int(self.cleanstr(response.headers['link'], \" 
\").split()[-3])\n",
-    "    \n",
-    "    def fetch_issues(self, issue_nums):\n",
-    "        \"\"\"\n",
-    "        This method is to fetch issues data\n",
-    "        issue_num: a list of issue ids\n",
-    "        return issues' data in pandas dataframe format\n",
-    "        \"\"\"\n",
-    "        assert issue_nums != [], \"Empty Input!\"\n",
-    "        logging.info(\"Reading issues:{}\".format(\", \".join([str(num) 
for num in issue_nums])))\n",
-    "        data = []\n",
-    "        for number in issue_nums:\n",
-    "            url = 'https://api.github.com/repos/' + self.repo + 
'/issues/' + str(number)\n",
-    "            response = requests.get(url, auth=self.auth)\n",
-    "            item = response.json()\n",
-    "            assert 'title' in item, \"{} issues doesn't 
exist!\".format(str(number))\n",
-    "            data += [{'id': str(number), 'title': item['title'], 'body': 
item['body']}]\n",
-    "        return pd.DataFrame(data)\n",
-    "\n",
-    "    def data2json(self, state, labels=None, other_labels=False):\n",
-    "        \"\"\"\n",
-    "        This method is to store issues' data into a json file, return 
json file's name\n",
-    "        state can be either \"open\"/\"closed\"/\"all\"\n",
-    "        labels is a list of target labels we are interested in\n",
-    "        other_labels can be either \"True\"/\"False\"\n",
-    "        \"\"\"\n",
-    "        assert state in set(['all', 'open', 'closed']), \"Invalid 
State!\"\n",
-    "        logging.info(\"Reading {} issues..\".format(state))\n",
-    "        pages = self.count_pages(state)\n",
-    "        data = []\n",
-    "        for x in range(1, pages+1):\n",
-    "            url = 'https://api.github.com/repos/' + self.repo + 
'/issues?page=' + str(x) \\\n",
-    "                  + '&per_page=30'.format(repo=self.repo)\n",
-    "            response = requests.get(url,\n",
-    "                                    {'state': state,\n",
-    "                                     'base': 'master',\n",
-    "                                     'sort': 'created'},\n",
-    "                                    auth=self.auth)\n",
-    "            for item in response.json():\n",
-    "                if \"pull_request\" in item:\n",
-    "                    continue\n",
-    "                if \"labels\" in item:\n",
-    "                    issue_labels=list(set([item['labels'][i]['name'] for 
i in range(len(item['labels']))]))\n",
-    "                else:\n",
-    "                    continue\n",
-    "                if labels is not None:\n",
-    "                    # fetch issue which has at least one target label\n",
-    "                    for label in labels:\n",
-    "                        if label in issue_labels:\n",
-    "                            if other_labels:\n",
-    "                                # besides target labels, we still want 
other labels\n",
-    "                                data += [{'id': item['number'],'title': 
item['title'], 'body': item['body'], 'labels': issue_labels}]\n",
-    "                            else:\n",
-    "                                # only record target labels\n",
-    "                                if(label in set([\"Feature\", \"Call for 
Contribution\", \"Feature request\"])):\n",
-    "                                    label = \"Feature\"\n",
-    "                                data += [{'id': item['number'], 'title': 
item['title'], 'body': item['body'], 'labels': label}]\n",
-    "                            # if have this break, then we only pick up 
the first target label\n",
-    "                            break\n",
-    "                else:\n",
-    "                    # fetch all issues\n",
-    "                    data += [{'id': item['number'], 'title': 
item['title'], 'body': item['body'], 'labels': issue_labels}]\n",
-    "        self.json_data = data\n",
-    "        s_labels = \"_\".join(labels) if labels is not None else 
\"all_labels\"\n",
-    "        filename = \"{}_data.json_{}\".format(state, s_labels)\n",
-    "        logging.info(\"Writing json file..\")\n",
-    "        with open(filename, 'w') as write_file:\n",
-    "            json.dump(data, write_file)\n",
-    "        logging.info(\"{} json file is ready!\".format(filename))\n",
-    "        return filename\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Trainer ###"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Licensed to the Apache Software Foundation (ASF) under one\n",
-    "# or more contributor license agreements.  See the NOTICE file\n",
-    "# distributed with this work for additional information\n",
-    "# regarding copyright ownership.  The ASF licenses this file\n",
-    "# to you under the Apache License, Version 2.0 (the\n",
-    "# \"License\"); you may not use this file except in compliance\n",
-    "# with the License.  You may obtain a copy of the License at\n",
-    "#\n",
-    "#   http://www.apache.org/licenses/LICENSE-2.0\n";,
-    "#\n",
-    "# Unless required by applicable law or agreed to in writing,\n",
-    "# software distributed under the License is distributed on an\n",
-    "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
-    "# KIND, either express or implied.  See the License for the\n",
-    "# specific language governing permissions and limitations\n",
-    "# under the License.\n",
-    "\n",
-    "# This script is served to train Machine Learning models\n",
-    "\n",
-    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
-    "from sklearn.svm import SVC\n",
-    "from sklearn.preprocessing import LabelEncoder\n",
-    "import tempfile\n",
-    "import pickle\n",
-    "import logging\n",
-    "import os\n",
-    "\n",
-    "\n",
-    "class Trainer:\n",
-    "    # target labels that we are interested in\n",
-    "    labels = [\"Performance\", \"Test\", \"Question\",\n",
-    "               \"Feature request\", \"Call for contribution\",\n",
-    "               \"Feature\", \"Example\", \"Doc\",\n",
-    "               \"Installation\", \"Build\", \"Bug\"]\n",
-    "\n",
-    "    def __init__(self, \n",
-    "                 tv=TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), 
max_features=10000), \n",
-    "                 clf=SVC(gamma=0.5, C=100, probability=True),\n",
-    "                 tmp_dir = tempfile.TemporaryDirectory()\n",
-    "                 ):\n",
-    "        \"\"\"\n",
-    "        Trainer is to train issues using Machine Learning methods.\n",
-    "        self.labels(list): a list of target labels\n",
-    "        self.tv: TFIDF model (trigram, max_features = 10000)\n",
-    "        self.clf: Classifier (SVC, kenerl = 'rbf')\n",
-    "        self.tmp_tv_file: tempfile to store Vectorizer\n",
-    "        self.tmp_clf_file: tempfile to store Classifier\n",
-    "        self.tmp_labels_file: tempfile to store Labels\n",
-    "        \"\"\"\n",
-    "        self.tv = tv\n",
-    "        self.clf = clf\n",
-    "        self.tmp_dir = tmp_dir\n",
-    "\n",
-    "    def train(self):\n",
-    "        \"\"\"\n",
-    "        This method is to train and save models.\n",
-    "        It has 5 steps:\n",
-    "        1. Fetch issues\n",
-    "        2. Clean data\n",
-    "        3. Word embedding\n",
-    "        4. Train models\n",
-    "        5. Save models\n",
-    "        \"\"\"\n",
-    "        logging.info(\"Start training issues of general labels\")\n",
-    "        # Step1: Fetch issues with general labels\n",
-    "        logging.info(\"Fetching Data..\")\n",
-    "        DF = DataFetcher()\n",
-    "        filename = DF.data2json('all', self.labels, False)\n",
-    "        # Step2: Clean data\n",
-    "        logging.info(\"Cleaning Data..\")\n",
-    "        SP = SentenceParser()\n",
-    "        SP.read_file(filename, 'json')\n",
-    "        SP.clean_body('body', True, True)\n",
-    "        SP.merge_column(['title', 'title', 'title', 'body'], 'train')\n",
-    "        text = SP.process_text('train', True, False, True)\n",
-    "        df = SP.data\n",
-    "        # Step3: Word Embedding\n",
-    "        logging.info(\"Word Embedding..\")\n",
-    "        # tv = TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), 
max_features=10000)\n",
-    "        tv = self.tv\n",
-    "        X = tv.fit_transform(text).toarray()\n",
-    "        # Labels\n",
-    "        labels = SP.data['labels']\n",
-    "        le = LabelEncoder()\n",
-    "        Y = le.fit_transform(labels)\n",
-    "        # Step4: Train Classifier\n",
-    "        # SVC, kernel = 'rbf'\n",
-    "        logging.info(\"Training Data..\")\n",
-    "        # clf = SVC(gamma=0.5, C=100, probability=True)\n",
-    "        clf = self.clf\n",
-    "        clf.fit(X, Y)\n",
-    "        # Step5: save models\n",
-    "        logging.info(\"Saving Models..\")\n",
-    "        with open(os.path.join(self.tmp_dir.name,'Vectorizer.p'), 'wb') 
as tv_file:\n",
-    "            pickle.dump(tv, tv_file)\n",
-    "        with open(os.path.join(self.tmp_dir.name,'Classifier.p'), 'wb') 
as clf_file:\n",
-    "            pickle.dump(clf, clf_file)\n",
-    "        with open(os.path.join(self.tmp_dir.name,'Labels.p'), 'wb') as 
labels_file:\n",
-    "            pickle.dump(labels, labels_file)\n",
-    "        logging.info(\"Completed!\")\n",
-    "        return self.tmp_dir\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Predictor ###"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Licensed to the Apache Software Foundation (ASF) under one\n",
-    "# or more contributor license agreements.  See the NOTICE file\n",
-    "# distributed with this work for additional information\n",
-    "# regarding copyright ownership.  The ASF licenses this file\n",
-    "# to you under the Apache License, Version 2.0 (the\n",
-    "# \"License\"); you may not use this file except in compliance\n",
-    "# with the License.  You may obtain a copy of the License at\n",
-    "#\n",
-    "#   http://www.apache.org/licenses/LICENSE-2.0\n";,
-    "#\n",
-    "# Unless required by applicable law or agreed to in writing,\n",
-    "# software distributed under the License is distributed on an\n",
-    "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
-    "# KIND, either express or implied.  See the License for the\n",
-    "# specific language governing permissions and limitations\n",
-    "# under the License.\n",
-    "\n",
-    "from sklearn.preprocessing import LabelEncoder\n",
-    "import numpy as np\n",
-    "import pickle\n",
-    "import re\n",
-    "import logging\n",
-    "import os\n",
-    "\n",
-    "\n",
-    "class Predictor:\n",
-    "    # keywords will be used to apply rule-based algorithms\n",
-    "    keywords = {\"ci\": [\"ci\", \"ccache\", \"jenkins\"],\n",
-    "                \"flaky\": [\"flaky\"],\n",
-    "                \"gluon\": [\"gluon\"],\n",
-    "                \"coda\": [\"cuda\", \"cudnn\"],\n",
-    "                \"scala\": [\"scala\"],\n",
-    "                \"mkldnn\": [\"mkldnn, mkl\"],\n",
-    "                \"onnx\": [\"onnx\"]}\n",
-    "\n",
-    "    def __init__(self):\n",
-    "        \"\"\"\n",
-    "        Predictor serves to apply rule-based and ML algorithms to predict 
labels\n",
-    "        \"\"\"\n",
-    "        self.tv = None\n",
-    "        self.labels = None\n",
-    "        self.clf = None\n",
-    "\n",
-    "    def reload(self, tmp_dir):\n",
-    "        \"\"\"\n",
-    "        This method is to load models\n",
-    "        \"\"\"\n",
-    "        with open(os.path.join(tmp_dir.name,'Vectorizer.p'), \"rb\") as 
tv:\n",
-    "            self.tv = pickle.load(tv)\n",
-    "        with open(os.path.join(tmp_dir.name,'Classifier.p'), \"rb\") as 
clf:\n",
-    "            self.clf = pickle.load(clf)\n",
-    "        with open(os.path.join(tmp_dir.name,'Labels.p'), \"rb\") as 
labels:\n",
-    "            self.labels = pickle.load(labels)\n",
-    "\n",
-    "    def tokenize(self, row):\n",
-    "        \"\"\"\n",
-    "        This method is to tokenize a sentence into a list of words\n",
-    "        Args:\n",
-    "            row(string): a sentence\n",
-    "        Return:\n",
-    "            words(list): a list of words\n",
-    "        \"\"\"\n",
-    "        row = re.sub('[^a-zA-Z0-9]', ' ', row).lower()\n",
-    "        words = set(row.split())\n",
-    "        return words\n",
-    "\n",
-    "    def rule_based(self, issues):\n",
-    "        \"\"\"\n",
-    "        This method applies rule_based algorithms to predict labels\n",
-    "        Args:\n",
-    "            issues(list): a list of issue numbers\n",
-    "        Return:\n",
-    "            rule_based_predictions(list of lists): labels which satisfy 
rules\n",
-    "        \"\"\"\n",
-    "        DF = DataFetcher()\n",
-    "        df_test = DF.fetch_issues(issues)\n",
-    "        rule_based_predictions = []\n",
-    "        for i in range(len(issues)):\n",
-    "            # extract every issue's title\n",
-    "            row = df_test.loc[i, 'title']\n",
-    "            # apply rule-based algorithms\n",
-    "            single_issue_predictions = []\n",
-    "            if \"feature request\" in row.lower():\n",
-    "                single_issue_predictions.append(\"Feature\")\n",
-    "            if \"c++\" in row.lower():\n",
-    "                single_issue_predictions.append(\"C++\")\n",
-    "            tokens = self.tokenize(row)\n",
-    "            for k, v in self.keywords.items():\n",
-    "                for keyword in v:\n",
-    "                    if keyword in tokens:\n",
-    "                        single_issue_predictions.append(k)\n",
-    "            rule_based_predictions.append(single_issue_predictions)\n",
-    "        return rule_based_predictions\n",
-    "\n",
-    "    def ml_predict(self, issues, threshold=0.3):\n",
-    "        \"\"\"\n",
-    "        This method applies machine learning algorithms to predict 
labels\n",
-    "        Args:\n",
-    "            issues(list): a list of issue numbers\n",
-    "            threshold(float): threshold of probability\n",
-    "        Return:\n",
-    "            ml_predictions(list of lists): predictions\n",
-    "        \"\"\"\n",
-    "        # step1: fetch data\n",
-    "        DF = DataFetcher()\n",
-    "        df_test = DF.fetch_issues(issues)\n",
-    "        # step2: data cleaning\n",
-    "        SP = SentenceParser()\n",
-    "        SP.data = df_test\n",
-    "        SP.clean_body('body', True, True)\n",
-    "        SP.merge_column(['title', 'title', 'title', 'body'], 'train')\n",
-    "        test_text = SP.process_text('train', True, False, True)\n",
-    "        # step3: word embedding\n",
-    "        test_data_tfidf = self.tv.transform(test_text).toarray()\n",
-    "        le = LabelEncoder()\n",
-    "        le.fit_transform(self.labels)\n",
-    "        # step4: classification\n",
-    "        probs = self.clf.predict_proba(test_data_tfidf)\n",
-    "        # pick up top 2 predictions which exceeds threshold\n",
-    "        best_n = np.argsort(probs, axis=1)[:, -2:]\n",
-    "        ml_predictions = []\n",
-    "        for i in range(len(best_n)):\n",
-    "            # 
INFO:Predictor:issue:11919,Performance:0.47353076240017744,Question:0.2440056213336274\n",
-    "            logging.info(\"issue:{}, {}:{}, 
{}:{}\".format(str(issues[i]), str(le.classes_[best_n[i][-1]]), 
str(probs[i][best_n[i][-1]]),\n",
-    "                        str(le.classes_[best_n[i][-2]]), 
str(probs[i][best_n[i][-2]])))\n",
-    "            single_issue_predictions = [le.classes_[best_n[i][j]] for j 
in range(-1, -3, -1) if probs[i][best_n[i][j]] > threshold]\n",
-    "            ml_predictions.append(single_issue_predictions)\n",
-    "        return ml_predictions\n",
-    "\n",
-    "    def predict(self, issues):\n",
-    "        # return predictions of both rule_base algorithms and machine 
learning methods\n",
-    "        rule_based_predictions = self.rule_based(issues)\n",
-    "        ml_predictions = self.ml_predict(issues)\n",
-    "        predictions = 
[list(set(rule_based_predictions[i]+ml_predictions[i])) for i in 
range(len(ml_predictions))]\n",
-    "        return predictions\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "conda_python3",
-   "language": "python",
-   "name": "conda_python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/services/github-bots/PredictLabels/Predictor.py 
b/services/github-bots/PredictLabels/Predictor.py
deleted file mode 100644
index 3b42761..0000000
--- a/services/github-bots/PredictLabels/Predictor.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from sklearn.preprocessing import LabelEncoder
-from SentenceParser import SentenceParser
-from DataFetcher import DataFetcher
-import numpy as np
-import pickle
-import re
-import logging
-import os
-
-
-class Predictor:
-    # keywords will be used to apply rule-based algorithms
-    keywords = {"ci": ["ci", "ccache", "jenkins"],
-                "flaky": ["flaky"],
-                "gluon": ["gluon"],
-                "coda": ["cuda", "cudnn"],
-                "scala": ["scala"],
-                "mkldnn": ["mkldnn, mkl"],
-                "onnx": ["onnx"]}
-
-    def __init__(self):
-        """
-        Predictor serves to apply rule-based and ML algorithms to predict 
labels
-        """
-        self.tv = None
-        self.labels = None
-        self.clf = None
-
-    def reload(self, tmp_dir):
-        """
-        This method is to load models
-        """
-        with open(os.path.join(tmp_dir.name,'Vectorizer.p'), "rb") as tv:
-            self.tv = pickle.load(tv)
-        with open(os.path.join(tmp_dir.name,'Classifier.p'), "rb") as clf:
-            self.clf = pickle.load(clf)
-        with open(os.path.join(tmp_dir.name,'Labels.p'), "rb") as labels:
-            self.labels = pickle.load(labels)
-
-    def tokenize(self, row):
-        """
-        This method is to tokenize a sentence into a list of words
-        Args:
-            row(string): a sentence
-        Return:
-            words(list): a list of words
-        """
-        row = re.sub('[^a-zA-Z0-9]', ' ', row).lower()
-        words = set(row.split())
-        return words
-
-    def rule_based(self, issues):
-        """
-        This method applies rule_based algorithms to predict labels
-        Args:
-            issues(list): a list of issue numbers
-        Return:
-            rule_based_predictions(list of lists): labels which satisfy rules
-        """
-        DF = DataFetcher()
-        df_test = DF.fetch_issues(issues)
-        rule_based_predictions = []
-        for i in range(len(issues)):
-            # extract every issue's title
-            row = df_test.loc[i, 'title']
-            # apply rule-based algorithms
-            single_issue_predictions = []
-            if "feature request" in row.lower():
-                single_issue_predictions.append("Feature request")
-            if "c++" in row.lower():
-                single_issue_predictions.append("C++")
-            tokens = self.tokenize(row)
-            for k, v in self.keywords.items():
-                for keyword in v:
-                    if keyword in tokens:
-                        single_issue_predictions.append(k)
-            rule_based_predictions.append(single_issue_predictions)
-        return rule_based_predictions
-
-    def ml_predict(self, issues, threshold=0.3):
-        """
-        This method applies machine learning algorithms to predict labels
-        Args:
-            issues(list): a list of issue numbers
-            threshold(float): threshold of probability
-        Return:
-            ml_predictions(list of lists): predictions
-        """
-        # step1: fetch data
-        DF = DataFetcher()
-        df_test = DF.fetch_issues(issues)
-        # step2: data cleaning
-        SP = SentenceParser()
-        SP.data = df_test
-        SP.clean_body('body', True, True)
-        SP.merge_column(['title', 'title', 'title', 'body'], 'train')
-        test_text = SP.process_text('train', True, False, True)
-        # step3: word embedding
-        test_data_tfidf = self.tv.transform(test_text).toarray()
-        le = LabelEncoder()
-        le.fit_transform(self.labels)
-        # step4: classification
-        probs = self.clf.predict_proba(test_data_tfidf)
-        # pick up top 2 predictions which exceeds threshold
-        best_n = np.argsort(probs, axis=1)[:, -2:]
-        ml_predictions = []
-        for i in range(len(best_n)):
-            # 
INFO:Predictor:issue:11919,Performance:0.47353076240017744,Question:0.2440056213336274
-            logging.info("issue:{}, {}:{}, {}:{}".format(str(issues[i]), 
str(le.classes_[best_n[i][-1]]), str(probs[i][best_n[i][-1]]),
-                        str(le.classes_[best_n[i][-2]]), 
str(probs[i][best_n[i][-2]])))
-            single_issue_predictions = [le.classes_[best_n[i][j]] for j in 
range(-1, -3, -1) if probs[i][best_n[i][j]] > threshold]
-            ml_predictions.append(single_issue_predictions)
-        return ml_predictions
-
-    def predict(self, issues):
-        # return predictions of both rule_base algorithms and machine learning 
methods
-        rule_based_predictions = self.rule_based(issues)
-        ml_predictions = self.ml_predict(issues)
-        predictions = [list(set(rule_based_predictions[i]+ml_predictions[i])) 
for i in range(len(ml_predictions))]
-        return predictions
diff --git a/services/github-bots/PredictLabels/README.md 
b/services/github-bots/PredictLabels/README.md
deleted file mode 100644
index a6a69e2..0000000
--- a/services/github-bots/PredictLabels/README.md
+++ /dev/null
@@ -1,42 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# Elastic Beanstalk Web Server
-
-A web server built on [AWS Elastic 
Beanstalk](https://aws.amazon.com/elasticbeanstalk/) which can response to 
GET/POST requests and realize self-maintenance. It mainly has 2 features:
-  * Train models: it will retrain Machine Learning models every 24 hours 
automatically using latest data.
-  * Predict labels: once it receives GET/POST requests with issues ID, it will 
send predictions back.
-
-## Set up
-*Make sure you are in current directory.*
-* Configure Dockerfile: In `Dockerfile`. Set environment variables (last 3 
lines) with real `github_user`, `github_oauth_token`(READ only token) and 
`repo`.
-* Open terminal, run:
-```bash
-zip eb.zip application.py cron.yaml DataFetcher.py \
-Dockerfile Dockerrun.aws.json plot_piechart.py Predictor.py SentenceParser.py 
Trainer.py \
-requirements.txt stopwords.txt
-```
-It will zip all needed files into `eb.zip`
-* Manually create a new Elastic Beanstalk application.
-    1. Go to AWS Elastic Beanstalk console, click ***Create New 
Application***. Fill in *Application Name* and *Description*, click 
***Create***.
-    2. Under ***Select environment tier***, select ***Web server 
environment***, click ***Select***.
-    3. Under **Base configuration**, select **Preconfigured platform**. In its 
dropdown, select **Docker**. Then select ***Upload your code***, upload 
`eb.zip`.
-    4. Click ***Configure more options***. Modify Intances, in the dropdown of 
Instance type, select t2.large. Click ***Create Environment*** (No need to 
select a security group, EB will create one.)
-    5. It will take about 10 minutes to setup the environment. 
-    6. Once the environment is setup, it will take 5-10 minutes to generate 
models. 
-    7. Write down URL. (ie: 
http://labelbot-env.pgc55xzpte.us-east-1.elasticbeanstalk.com)
-    
\ No newline at end of file
diff --git a/services/github-bots/PredictLabels/SentenceParser.py 
b/services/github-bots/PredictLabels/SentenceParser.py
deleted file mode 100644
index 786d4a9..0000000
--- a/services/github-bots/PredictLabels/SentenceParser.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This script serves to do data cleaning
-from bs4 import BeautifulSoup
-import nltk
-# fix ssl certificate errors
-import ssl
-try:
-    _create_unverified_https_context = ssl._create_unverified_context
-except AttributeError:
-    pass
-else:
-    ssl._create_default_https_context = _create_unverified_https_context
-import os.path
-import pandas as pd
-import re
-import sys
-import logging
-
-
-class SentenceParser:
-
-    regex_str = [
-        r'<[^>]+>',                                                            
         # HTML tags
-        r'(?:@[\w_]+)',                                                        
         # @-mentions
-        r"(?:\#+[\w_]+[\w\'_\-]*[\w_]+)",                                      
         # hash-tags
-        
r'http[s]?://(?:[a-z]|[0-9]|[$-_@.&amp;+]|[!*\(\),]|(?:%[0-9a-f][0-9a-f]))+',   
# URLs
-        r'(?:(?:\d+,?)+(?:\.?\d+)?)',                                          
         # numbers
-        r"(?:[a-z][a-z'\-_]+[a-z])",                                           
         # words with - and '
-        r'(?:[\w_]+)',                                                         
         # other words
-        r'(?:\S)'                                                              
         # anything else
-    ]
-    # English Stopwords
-    with open('stopwords.txt') as file:
-        stopwords = file.read().split()
-    file.close()
-
-    def __init__(self):
-        """
-        SentenceParser serves to clean text content
-        """
-        self.data = None
-        # extract words stem
-        self.porter = nltk.PorterStemmer()
-        # a set of stopwords
-        self.stops = set(self.stopwords)
-
-    def read_file(self, filepath, filetype, encod='ISO-8859-1', header=None):
-        """
-        This method is to read csv/json/xlsx files
-        """
-        logging.info('Start reading File')
-        if not os.path.isfile(filepath):
-            logging.error("File Not Exist!")
-            sys.exit()
-        if filetype == 'csv':
-            df = pd.read_csv(filepath, encoding=encod, header=header)
-        elif filetype == 'json':
-            df = pd.read_json(filepath, encoding=encod, lines=False)
-        elif filetype == 'xlsx':
-            df = pd.read_excel(filepath, encoding=encod, header=header)
-        else:
-            logging.error("Extension Type not Accepted!")
-            sys.exit()
-
-        logging.debug(df)
-        self.data = df
-
-    def merge_column(self, columns, name):
-        """
-        This method is to merge columns of a pandas dataframe
-        """
-        logging.info('Merge headers %s to %s', str(columns), name)
-        self.data[name] = ''
-        for header in columns:
-            self.data[name] += ' ' + self.data[header]
-  
-    def clean_body(self, column, remove_template=True, remove_code=True):
-        """
-        This methods is to remove template and code from issue's body
-        """
-        logging.info("Start Removing Templates..")
-        for i in range(len(self.data)):
-            # remove 'Environment info' part
-            if remove_template and "## Environment info" in 
self.data[column][i]:
-                index = self.data.loc[i, column].find("## Environment info")
-                self.data.loc[i, column] = self.data.loc[i, column][:index]
-            # remove code
-            if remove_code and "```" in self.data[column][i]:
-                sample = self.data[column][i].split("```")
-                sample = [sample[i*2] for i in range(0, 
int((len(sample)+1)/2))]
-                self.data.loc[i, column] = " ".join(sample)
-
-    def process_text(self, column, remove_symbol=True, remove_stopwords=False, 
stemming=False):
-        """
-        This method is to remove symbols/remove stopwords/extract words stem
-        """
-        logging.info("Start Data Cleaning...")
-        # remove some symbols
-        self.data[column] = self.data[column].str.replace(r'[\n\r\t]+', ' ')
-        # remove URLs
-        self.data[column] = self.data[column].str.replace(self.regex_str[3], ' 
')
-        tempcol = self.data[column].values.tolist()
-
-        for i in range(len(tempcol)):
-            row = BeautifulSoup(tempcol[i], 'html.parser').get_text().lower()
-            # remove symbols
-            if remove_symbol:
-                row = re.sub('[^a-zA-Z]', ' ', row)
-            words = row.split()
-            # remove stopwords
-            if remove_stopwords:
-                words = [w for w in words if w not in self.stops and not 
w.replace('.', '', 1).isdigit()]
-            # extract words stem
-            if stemming:
-                words = [self.porter.stem(w) for w in words] 
-            row = ' '.join(words)
-            tempcol[i] = row.lower()
-        return tempcol
diff --git a/services/github-bots/PredictLabels/Trainer.py 
b/services/github-bots/PredictLabels/Trainer.py
deleted file mode 100644
index 839709d..0000000
--- a/services/github-bots/PredictLabels/Trainer.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This script is served to train Machine Learning models
-from DataFetcher import DataFetcher
-from SentenceParser import SentenceParser
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.svm import SVC
-from sklearn.preprocessing import LabelEncoder
-import tempfile
-import pickle
-import logging
-import os
-
-
-class Trainer:
-    # target labels that we are interested in
-    labels = ["Performance", "Test", "Question",
-               "Feature request", "Call for contribution",
-               "Feature", "Example", "Doc",
-               "Installation", "Build", "Bug"]
-
-    def __init__(self, 
-                 tv=TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), 
max_features=10000), 
-                 clf=SVC(gamma=0.5, C=100, probability=True),
-                 tmp_dir = tempfile.TemporaryDirectory()
-                 ):
-        """
-        Trainer is to train issues using Machine Learning methods.
-        self.labels(list): a list of target labels
-        self.tv: TFIDF model (trigram, max_features = 10000)
-        self.clf: Classifier (SVC, kenerl = 'rbf')
-        self.tmp_tv_file: tempfile to store Vectorizer
-        self.tmp_clf_file: tempfile to store Classifier
-        self.tmp_labels_file: tempfile to store Labels
-        """
-        self.tv = tv
-        self.clf = clf
-        self.tmp_dir = tmp_dir
-
-    def train(self):
-        """
-        This method is to train and save models.
-        It has 5 steps:
-        1. Fetch issues
-        2. Clean data
-        3. Word embedding
-        4. Train models
-        5. Save models
-        """
-        logging.info("Start training issues of general labels")
-        # Step1: Fetch issues with general labels
-        logging.info("Fetching Data..")
-        DF = DataFetcher()
-        filename = DF.data2json('all', self.labels, False)
-        # Step2: Clean data
-        logging.info("Cleaning Data..")
-        SP = SentenceParser()
-        SP.read_file(filename, 'json')
-        SP.clean_body('body', True, True)
-        SP.merge_column(['title', 'title', 'title', 'body'], 'train')
-        text = SP.process_text('train', True, False, True)
-        df = SP.data
-        # Step3: Word Embedding
-        logging.info("Word Embedding..")
-        # tv = TfidfVectorizer(min_df=0.00009, ngram_range=(1, 3), 
max_features=10000)
-        tv = self.tv
-        X = tv.fit_transform(text).toarray()
-        # Labels
-        labels = SP.data['labels']
-        le = LabelEncoder()
-        Y = le.fit_transform(labels)
-        # Step4: Train Classifier
-        # SVC, kernel = 'rbf'
-        logging.info("Training Data..")
-        # clf = SVC(gamma=0.5, C=100, probability=True)
-        clf = self.clf
-        clf.fit(X, Y)
-        # Step5: save models
-        logging.info("Saving Models..")
-        with open(os.path.join(self.tmp_dir.name,'Vectorizer.p'), 'wb') as 
tv_file:
-            pickle.dump(tv, tv_file)
-        with open(os.path.join(self.tmp_dir.name,'Classifier.p'), 'wb') as 
clf_file:
-            pickle.dump(clf, clf_file)
-        with open(os.path.join(self.tmp_dir.name,'Labels.p'), 'wb') as 
labels_file:
-            pickle.dump(labels, labels_file)
-        logging.info("Completed!")
-        return self.tmp_dir
diff --git a/services/github-bots/PredictLabels/application.py 
b/services/github-bots/PredictLabels/application.py
deleted file mode 100644
index 57bf064..0000000
--- a/services/github-bots/PredictLabels/application.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This is a web server built based on Flask framework and AWS Elastic 
Beanstalk service 
-# It will response to http GET/POST requests
-from flask import Flask, jsonify, request, send_file
-from apscheduler.schedulers.background import BackgroundScheduler
-from apscheduler.triggers.interval import IntervalTrigger
-from Predictor import Predictor
-from Trainer import Trainer
-import plot_piechart
-import timeit
-import atexit
-import logging
-import os.path
-
-logging.getLogger().setLevel(logging.INFO)
-
-application = Flask(__name__)
-
-predictor = Predictor()
-
-
-# GET '/'
[email protected]('/')
-def index():
-    return "Hello!  -Bot"
-
-
-# GET '/issues/<issue>'
-# return predictions of an issue
[email protected]('/issues/<issue>')
-def get_prediction(issue):
-    l = predictor.predict([issue])
-    return " ".join(l[0])
-
-
-# POST '/predict'
-# return predictions of issues
[email protected]('/predict', methods=['POST'])
-def predict():
-    # get prediction results of multiple issues
-    # data would be a json file {"issues":[1,2,3]}
-    data = request.get_json()["issues"]
-    # predictions = predict_labels.predict(data)
-    predictions = []
-    if len(data) != 0:
-        predictions = predictor.predict(data)
-    response = []
-    for i in range(len(data)):
-        response.append({"number": data[i], "predictions": predictions[i]})
-    return jsonify(response)
-
-
-# POST '/draw'
-# return an image's binary code
[email protected]('/draw', methods=['POST'])
-def plot():
-    # requests.post(url,json={"fracs":[], "labels":[]})
-    data = request.get_json()
-    fracs = data["fracs"]
-    labels = data["labels"]
-    filename = plot_piechart.draw_pie(fracs, labels)
-    return send_file(filename, mimetype='image/png')
-
-
-# helper function
-def train_models():
-    start = timeit.default_timer()
-    trainer = Trainer()
-    tmp_dir = trainer.train()
-    stop = timeit.default_timer()
-    # reload models
-    predictor.reload(tmp_dir=tmp_dir)
-    time = int(stop - start)
-    logging.info("Training completed! Time cost: {} min, {} 
seconds".format(str(int(time/60)), str(time % 60)))
-    return 
-
-
-# Once the server is running, it will retrain ML models every 24 hours
[email protected]_first_request
-def initialize():
-    scheduler = BackgroundScheduler()
-    scheduler.start()
-    scheduler.add_job(
-        func=train_models,
-        trigger=IntervalTrigger(hours=24),
-        id='Training_Job',
-        name='Update models every 24 hours',
-        replace_existing=True)
-    # Shut down the scheduler when exiting the app
-    atexit.register(lambda: scheduler.shutdown())
-
-
-# train initial models
-train_models()
-
-initialize()
-
-# run the app.
-if __name__ == "__main__":
-    # Set debug to True enables debug output.
-    # This 'application.debug = True' should be removed before deploying a 
production app.
-    application.debug = True
-    application.threaded = True
-    application.run('0.0.0.0', 8000)
diff --git a/services/github-bots/PredictLabels/cron.yaml 
b/services/github-bots/PredictLabels/cron.yaml
deleted file mode 100644
index f47da88..0000000
--- a/services/github-bots/PredictLabels/cron.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-version: 1
-cron: 
-  - name: "task1"
-    url: "/scheduled"
-    schedule: "* * * * *"
diff --git a/services/github-bots/PredictLabels/model_handler.py 
b/services/github-bots/PredictLabels/model_handler.py
deleted file mode 100644
index 8c3bd9a..0000000
--- a/services/github-bots/PredictLabels/model_handler.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# custom service file
-
-# model_handler.py
-
-"""
-ModelHandler defines a base model handler.
-"""
-import logging
-import data_transformer
-import keras
-import sys
-import numpy as np
-import defs
-import mxnet as mx
-
-
-class ModelHandler(object):
-    """
-    A base Model handler implementation.
-    """
-
-    def __init__(self):
-        self.error = None
-        self._context = None
-        self._batch_size = 0
-        self.initialized = False
-        self.mod = None
-
-    def initialize(self, context):
-        """
-        Initialize model. This will be called during model loading time
-        :param context: Initial context contains model server system 
properties.
-        :return:
-        """
-        self._context = context
-        self._batch_size = context.system_properties["batch_size"]
-        self.initialized = True
-
-        sym, arg_params, aux_params = 
mx.model.load_checkpoint(prefix='./prog', epoch=0)
-        self.mod = mx.mod.Module(symbol=sym,
-                            data_names=['/dropout_1_input1'],
-                            context=mx.cpu(),
-                            label_names=None)
-        self.mod.bind(for_training=False,
-                 data_shapes=[('/dropout_1_input1', (1, 2048, 70), 'float32', 
'NTC')],
-                 label_shapes=self.mod._label_shapes)
-
-        self.mod.set_params(arg_params, aux_params)
-
-    def preprocess(self, batch):
-        """
-        Transform raw input into model input data.
-        :param batch: list of raw requests, should match batch size
-        :return: list of preprocessed model input data
-        """
-        assert self._batch_size == len(batch), "Invalid input batch size: 
{}".format(len(batch))
-        #with open('tmp_file','wb') as f:
-        #     f.write(batch[0].get('body'))
-        #return mx.nd.array(data_transformer.file_to_vec('tmp_file', 
file_vector_size=defs.file_chars_trunc_limit))
-        return mx.nd.array(data_transformer.file_to_vec(batch[0].get('body'), 
file_vector_size=defs.file_chars_trunc_limit))
-
-    def inference(self, model_input):
-        """
-        Internal inference methods
-        :param model_input: transformed model input data
-        :return: list of inference output in NDArray
-        """
-        return self.mod.predict(model_input)
-
-    def postprocess(self, inference_output):
-        """
-        Return predict result in batch.
-        :param inference_output: list of inference output
-        :return: list of predict results
-        """
-        y = inference_output
-        results = []
-        for i in range(0, len(defs.langs)):
-            results.append("{} - {}:     {}%".format(' ' if (y[0][i] < 0.5) 
else '*', defs.langs[i],
-                                                     (100 * 
y[0][i])).strip('<NDArray 1 @cpu(0)>%'))
-        return [results]
-
-    def handle(self, data, context):
-        """
-        Custom service entry point function.
-        :param data: list of objects, raw input from request
-        :param context: model server context
-        :return: list of outputs to be send back to client
-        """
-
-        try:
-            data = self.preprocess(data)
-            data = self.inference(data)
-            data = self.postprocess(data)
-            print("after", data)
-            return data
-        except Exception as e:
-            logging.error(e, exc_info=True)
-            request_processor = context.request_processor
-            request_processor.report_status(500, "Unknown inference error")
-            return [str(e)] * self._batch_size
-
diff --git a/services/github-bots/PredictLabels/plot_piechart.py 
b/services/github-bots/PredictLabels/plot_piechart.py
deleted file mode 100644
index 6f3cd20..0000000
--- a/services/github-bots/PredictLabels/plot_piechart.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import datetime
-import matplotlib
-# set 'agg' as matplotlib backend
-matplotlib.use('agg', warn=False, force=True)
-from matplotlib import pyplot as plt
-import logging
-
-
-def make_autopct(values):
-    def my_autopct(pct):
-        total = sum(values)
-        val = int(round(pct * total / 100.0))
-        return '{p:.2f}% ({v:d})'.format(p=pct, v=val)
-
-    return my_autopct
-
-
-def draw_pie(fracs, labels):
-    """
-    This method is to plot the pie chart of labels, then save it into '/tmp/' 
folder
-    """
-    logging.info("Drawing the pie chart..")
-    fig = plt.figure()
-    plt.pie(fracs, labels=labels, autopct=make_autopct(fracs), shadow=True)
-    plt.title("Top 10 labels for newly opened issues")
-    figname = 
"piechart_{}_{}.png".format(str(datetime.datetime.today().date()),
-                                          
str(datetime.datetime.today().time()))
-    fig.savefig("/tmp/{}".format(figname))
-    pic_path = "/tmp/{}".format(figname)
-    return pic_path
diff --git a/services/github-bots/PredictLabels/requirements.txt 
b/services/github-bots/PredictLabels/requirements.txt
deleted file mode 100644
index 0da49d0..0000000
--- a/services/github-bots/PredictLabels/requirements.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-APScheduler==3.5.1
-beautifulsoup4==4.6.0
-boto3==1.7.59
-botocore==1.10.59
-bs4==0.0.1
-certifi==2018.4.16
-chardet==3.0.4
-click==6.7
-cycler==0.10.0
-DateTime==4.2
-docutils==0.14
-Flask==1.0.2
-idna==2.7
-itsdangerous==0.24
-Jinja2==2.10.1
-jmespath==0.9.3
-kiwisolver==1.0.1
-matplotlib==2.2.2
-MarkupSafe==1.0
-nltk==3.4.5
-numpy==1.14.5
-pandas==0.23.3
-pyparsing==2.2.0
-python-dateutil==2.7.3
-pytz==2018.5
-requests==2.20.1
-scikit-learn==0.19.2
-scipy==1.1.0
-six==1.11.0
-sklearn==0.0
-urllib3==1.24.2
-Werkzeug==0.15.3
-zope.interface==4.5.0
diff --git a/services/github-bots/PredictLabels/stopwords.txt 
b/services/github-bots/PredictLabels/stopwords.txt
deleted file mode 100644
index c41ef55..0000000
--- a/services/github-bots/PredictLabels/stopwords.txt
+++ /dev/null
@@ -1 +0,0 @@
-i me my myself we our ours ourselves you you're you've you'll you'd your yours 
yourself yourselves he him his himself she she's her hers herself it it's its 
itself they them their theirs themselves what which who whom this that that'll 
these those am is are was were be been being have has had having do does did 
doing a an the and but if or because as until while of at by for with about 
against between into through during before after above below to from up down in 
out on off over under a [...]
\ No newline at end of file
diff --git a/services/github-bots/PredictLabels/test_datafetcher.py 
b/services/github-bots/PredictLabels/test_datafetcher.py
deleted file mode 100644
index e2da9a1..0000000
--- a/services/github-bots/PredictLabels/test_datafetcher.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import requests
-import boto3
-from botocore.exceptions import ClientError
-from botocore.exceptions import NoCredentialsError
-from DataFetcher import DataFetcher
-import unittest
-import pandas as pd
-from DataFetcher import DataFetcher
-from pandas.util.testing import assert_frame_equal
-# some version issue
-try:
-    from unittest.mock import patch
-except ImportError:
-    from mock import patch
-
-# test coverage: 93%
-class TestLabelBot(unittest.TestCase):
-
-    def setUp(self):
-        self.df = DataFetcher()
-        self.df.repo = "apache/incubator-mxnet"
-        self.df.github_user = "cathy"
-        self.df.github_oauth_token = "123"
-
-    def tearDown(self):
-        pass
-
-    def test_cleanstr(self):
-        new_string = self.df.cleanstr("a_b", "")
-        self.assertEqual(new_string, "ab")
-
-    def test_count_pages(self):
-        with patch('DataFetcher.requests.get') as mocked_get:
-            mocked_get.return_value.status_code = 200
-            mocked_get.return_value.json.return_value = [{
-                                                "body": "issue's body",
-                                                "created_at": 
"2018-07-28T18:27:17Z",
-                                                "comments": "0",
-                                                "number": 11925,
-                                                "labels": [{'name': 'Doc'}],
-                                                "state": "open",
-                                                "title": "issue's title",
-                                                "html_url": 
"https://github.com/apache/incubator-mxnet/issues/11925";,
-                                                },
-                                               {"body": "issue's body",
-                                                "created_at": 
"2018-07-28T18:27:17Z",
-                                                "comments": "0",
-                                                "number": 11924,
-                                                "labels": [],
-                                                "state": "closed",
-                                                "title": "issue's title",
-                                                "html_url": 
"https://github.com/apache/incubator-mxnet/issues/11925";,
-                                              }]
-            page = self.df.count_pages('all')
-            self.assertEqual(page, 1)
-
-    def test_fetch_issues(self):
-        with patch('DataFetcher.requests.get') as mocked_get:
-            mocked_get.return_value.status_code = 200
-            mocked_get.return_value.json.return_value = {
-                                                "body": "issue's body",
-                                                "created_at":  
"2018-07-28T18:27:17Z",
-                                                "comments": "0",
-                                                "number": 11925,
-                                                "labels": [{'name': 
'Feature'}],
-                                                "state": "open",
-                                                "title": "issue's title",
-                                                "html_url": 
"https://github.com/apache/incubator-mxnet/issues/11925";,
-                                              }
-            data = self.df.fetch_issues([11925])
-            expected_data = [{'id': "11925", 'title': "issue's title", 'body': 
"issue's body"}]
-            assert_frame_equal(data, pd.DataFrame(expected_data))
-
-    def test_data2json(self):
-        with patch('DataFetcher.requests.get') as mocked_get:
-            mocked_get.return_value.status_code = 200
-            mocked_get.return_value.json.return_value = [{
-                                                "body": "issue's body",
-                                                "created_at": 
"2018-07-28T18:27:17Z",
-                                                "comments": "0",
-                                                "number": 11925,
-                                                "labels": [{'name': 
'Feature'}],
-                                                "state": "open",
-                                                "title": "issue's title",
-                                                "html_url": 
"https://github.com/apache/incubator-mxnet/issues/11925";,
-                                              },
-                                               {"body": "issue's body",
-                                                "created_at": 
"2018-07-28T18:27:17Z",
-                                                "comments": "0",
-                                                "number": 11924,
-                                                "labels": [],
-                                                "state": "closed",
-                                                "title": "issue's title",
-                                                "html_url": 
"https://github.com/apache/incubator-mxnet/issues/11925";,
-                                                }]
-            self.df.data2json('all', labels=["Feature"], other_labels=False)
-            expected_data = [{'id': 11925, 'title': "issue's title", 'body': 
"issue's body", 'labels': 'Feature'}]
-            self.assertEqual(expected_data, self.df.json_data)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/services/github-bots/PredictLabels/test_predictor.py 
b/services/github-bots/PredictLabels/test_predictor.py
deleted file mode 100644
index 502c80d..0000000
--- a/services/github-bots/PredictLabels/test_predictor.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import requests
-import boto3
-from botocore.exceptions import ClientError
-from botocore.exceptions import NoCredentialsError
-from DataFetcher import DataFetcher
-import unittest
-from Predictor import Predictor
-# some version issue
-try:
-    from unittest.mock import patch
-except ImportError:
-    from mock import patch
-
-
-# test coverage: 100%
-class TestLabelBot(unittest.TestCase):
-
-    def setUp(self):
-        self.pr = Predictor()
-
-    def test_tokenize(self):
-        words = self.pr.tokenize("hello_world")
-        self.assertEqual(words, set(['hello','world']))
-
-    def test_rule_based(self):
-        with patch('DataFetcher.requests.get') as mocked_get:
-            mocked_get.return_value.status_code = 200
-            mocked_get.return_value.json.return_value = {
-                                                "body": "issue's body",
-                                                "created_at": 
"2018-07-28T18:27:17Z",
-                                                "comments": "0",
-                                                "number": 11925,
-                                                "labels": [{'name': 'Doc'}],
-                                                "state": "open",
-                                                "title": "a feature requests 
for scala package",
-                                                "html_url": 
"https://github.com/apache/incubator-mxnet/issues/11925";,
-                                              }
-            predictions = self.pr.rule_based([11925])
-            self.assertEqual([['Feature', 'scala']], predictions)
-
-    def test_ml_predict(self):
-        self.pr.reload(tv_file='Vectorizer.p', 
-                       clf_file='Classifier.p',
-                       labels_file='Labels.p')
-        with patch('DataFetcher.requests.get') as mocked_get:
-            mocked_get.return_value.status_code = 200
-            mocked_get.return_value.json.return_value = {
-                                                "body": "test",
-                                                "created_at": 
"2018-07-28T18:27:17Z",
-                                                "comments": "0",
-                                                "number": 11925,
-                                                "labels": [{'name': 'Doc'}],
-                                                "state": "open",
-                                                "title": "a feature requests 
for scala package",
-                                                "html_url": 
"https://github.com/apache/incubator-mxnet/issues/11925";,
-                                              }
-            predictions = self.pr.ml_predict([11925])
-            self.assertEqual([['Feature']], predictions)
-
-    def test_predict(self):
-        self.pr.reload(tv_file='Vectorizer.p', 
-                       clf_file='Classifier.p',
-                       labels_file='Labels.p')
-        with patch('DataFetcher.requests.get') as mocked_get:
-            mocked_get.return_value.status_code = 200
-            mocked_get.return_value.json.return_value = {
-                                                "body": "test",
-                                                "created_at": 
"2018-07-28T18:27:17Z",
-                                                "comments": "0",
-                                                "number": 11925,
-                                                "labels": [{'name': 'Doc'}],
-                                                "state": "open",
-                                                "title": "a feature requests 
for scala package",
-                                                "html_url": 
"https://github.com/apache/incubator-mxnet/issues/11925";,
-                                              }
-            predictions = self.pr.predict([11925])
-            self.assertEqual([['Feature', 'scala']], predictions)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/services/github-bots/PredictLabels/test_sentenceparse.py 
b/services/github-bots/PredictLabels/test_sentenceparse.py
deleted file mode 100644
index a81b3c4..0000000
--- a/services/github-bots/PredictLabels/test_sentenceparse.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import requests
-import boto3
-from botocore.exceptions import ClientError
-from botocore.exceptions import NoCredentialsError
-import unittest
-import pandas as pd
-from SentenceParser import SentenceParser
-from pandas.util.testing import assert_frame_equal
-# some version issue
-try:
-    from unittest.mock import patch
-except ImportError:
-    from mock import patch
-
-# test coverage: 88%
-class TestSentenceParser(unittest.TestCase):
-
-    def setUp(self):
-        self.sp = SentenceParser()
-        self.sp.data = pd.DataFrame([{'id': 11925, 'title': "issue's title",
-                                      'body': " bug ``` import pandas``` ## 
Environment info",
-                                      'labels': ['Doc']}])
-
-    def test_read_file(self):
-        self.sp.read_file('all_data.json_Feature', 'json')
-        expected_data = [{'id': 11925, 'title': "issue's title", 'body': 
"issue's body", 'labels': ['Doc']},
-                         {'id': 11924, 'title': "issue's title", 'body': 
"issue's body", 'labels': []}]
-        assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
-
-    def test_merge_column(self):
-        self.sp.merge_column(['title', 'body'], 'train')
-        expected_data = [{'id': 11925, 'title': "issue's title", 'body': " bug 
``` import pandas``` ## Environment info",
-                          'labels': ['Doc'],
-                          'train': " issue's title  bug ``` import pandas``` 
## Environment info"}]
-        assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
-
-    def test_clean_body(self):
-        self.sp.clean_body('body', True, True)
-        expected_data = [{'id': 11925, 'title': "issue's title", 'body': " bug 
  ", 'labels': ['Doc']}]
-        assert_frame_equal(self.sp.data, pd.DataFrame(expected_data))
-
-    def test_process_text(self):
-        data = self.sp.process_text('body', True, True, True)
-        expected_data = ['bug import panda environ info']
-        self.assertEqual(data, expected_data)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/services/github-bots/PredictLabels/test_trainer.py 
b/services/github-bots/PredictLabels/test_trainer.py
deleted file mode 100644
index 0d94d47..0000000
--- a/services/github-bots/PredictLabels/test_trainer.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-import unittest
-import requests
-import boto3
-from botocore.exceptions import ClientError
-from botocore.exceptions import NoCredentialsError
-from DataFetcher import DataFetcher
-from Trainer import Trainer
-# some version issue
-try:
-    from unittest.mock import patch
-except ImportError:
-    from mock import patch
-
-
-# test coverage: 100%
-class TestTrainer(unittest.TestCase):
-
-    def setUp(self):
-        self.trainer = Trainer()
-
-    def test_train(self):
-        with patch('DataFetcher.requests.get') as mocked_get:
-            mocked_get.return_value.status_code = 200
-            mocked_get.return_value.json.return_value = [{
-                                                                               
                "body": "I was looking at the mxnet.\
-                                                metric source code and 
documentation",
-                                                "created_at": 
"2018-07-28T18:27:17Z",
-                                                "comments": "0",
-                                                "number": 11925,
-                                                "labels": [{'name': 'Doc'}],
-                                                "state": "open",
-                                                "title": "Confusion in 
documentation/implementation of F1, MCC metrics",
-                                                "html_url": 
"https://github.com/apache/incubator-mxnet/issues/11925";,
-                                              },
-                                              { "body": "I train a CNN with 
python under mxnet gluon mys C++ code crash when i call MXPredsetInput.",
-                                                "created_at": 
"2018-07-28T18:27:17Z",
-                                                "comments": "0",
-                                                "number": 11924,
-                                                "labels": [{'name':'Bug'}],
-                                                "state": "closed",
-                                                "title": "Issue in exporting 
gluon model",
-                                                "html_url": 
"https://github.com/apache/incubator-mxnet/issues/11924";,
-                                              }]
-            self.trainer.train()
-
-
-if __name__ == "__main__":
-    unittest.main()

Reply via email to