Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-sortinghat for openSUSE:Factory checked in at 2022-10-08 01:26:12 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-sortinghat (Old) and /work/SRC/openSUSE:Factory/.python-sortinghat.new.2275 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-sortinghat" Sat Oct 8 01:26:12 2022 rev:4 rq:1008876 version:0.7.20 Changes: -------- --- /work/SRC/openSUSE:Factory/python-sortinghat/python-sortinghat.changes 2022-05-12 23:00:13.556785974 +0200 +++ /work/SRC/openSUSE:Factory/.python-sortinghat.new.2275/python-sortinghat.changes 2022-10-08 01:26:34.870416282 +0200 @@ -1,0 +2,9 @@ +Fri Oct 7 16:13:30 UTC 2022 - Yogalakshmi Arunachalam <yarunacha...@suse.com> + +- Update to version 0.7.20 + Bug fixes: + * [gitdm] Skip invalid format lines + * Gitdm parser won't fail reading files with an invalid format. Instead, + * it will ignore invalid content. + +------------------------------------------------------------------- Old: ---- sortinghat-0.7.19.tar.gz New: ---- sortinghat-0.7.20.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-sortinghat.spec ++++++ --- /var/tmp/diff_new_pack.IIo3ns/_old 2022-10-08 01:26:35.258417172 +0200 +++ /var/tmp/diff_new_pack.IIo3ns/_new 2022-10-08 01:26:35.266417191 +0200 @@ -21,7 +21,7 @@ %define skip_python2 1 %define skip_python36 1 Name: python-sortinghat -Version: 0.7.19 +Version: 0.7.20 Release: 0 Summary: A tool to manage identities License: GPL-3.0-only ++++++ sortinghat-0.7.19.tar.gz -> sortinghat-0.7.20.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/AUTHORS new/sortinghat-0.7.20/AUTHORS --- old/sortinghat-0.7.19/AUTHORS 2022-03-18 17:52:34.000000000 +0100 +++ new/sortinghat-0.7.20/AUTHORS 2022-06-02 19:00:13.235980300 +0200 @@ -1,8 +1,8 @@ Santiago Due??as <sdue...@bitergia.com> - Contributors: Andy Grunwald <andygrunw...@gmail.com> Jesus M. Gonzalez-Barahona <j...@gsyc.es> Luis Ca??as D??az <lca...@bitergia.com> Miguel Angel Fernandez <mafe...@bitergia.com> Valerio Cosentino <val...@bitergia.com> + Quan Zhou <q...@bitergia.com> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/MANIFEST.in new/sortinghat-0.7.20/MANIFEST.in --- old/sortinghat-0.7.19/MANIFEST.in 2022-03-18 17:52:34.000000000 +0100 +++ new/sortinghat-0.7.20/MANIFEST.in 1970-01-01 01:00:00.000000000 +0100 @@ -1,6 +0,0 @@ -include README.md -include NEWS -include LICENSE -include AUTHORS -include tests/*.py -recursive-include tests/data/ * diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/NEWS new/sortinghat-0.7.20/NEWS --- old/sortinghat-0.7.19/NEWS 2022-03-18 17:52:34.000000000 +0100 +++ new/sortinghat-0.7.20/NEWS 2022-06-02 19:00:13.235980300 +0200 @@ -1,5 +1,14 @@ # Releases +## Sorting Hat 0.7.20 - (2022-06-02) + +**Bug fixes:** + + * [gitdm] Skip invalid format lines\ + Gitdm parser won't fail reading files with an invalid format. Instead, + it will ignore invalid content. + + ## Sorting Hat 0.7 - (2018-10-02) **NOTICE: Database schema generated by SortingHat < 0.7.0 is still @@ -251,3 +260,4 @@ This has been modified to accept any type of aliases. Thus, the input file passed to `gidm2sh` script will be a list of valid aliases instead of email aliases. + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/PKG-INFO new/sortinghat-0.7.20/PKG-INFO --- old/sortinghat-0.7.19/PKG-INFO 2022-03-18 17:52:36.000000000 +0100 +++ new/sortinghat-0.7.20/PKG-INFO 2022-06-02 19:00:27.531397800 +0200 @@ -1,23 +1,33 @@ Metadata-Version: 2.1 Name: sortinghat -Version: 0.7.19 -Summary: A tool to manage identities -Home-page: https://github.com/grimoirelab/sortinghat -Author: Bitergia -Author-email: sdue...@bitergia.com -License: GPLv3 -Keywords: development repositories analytics -Platform: UNKNOWN +Version: 0.7.20 +Summary: A tool to manage identities. +Home-page: https://chaoss.github.io/grimoirelab/ +License: GPL-3.0+ +Keywords: development,grimoirelab +Author: GrimoireLab Developers +Requires-Python: >=3.7,<4.0 Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers -Classifier: Topic :: Software Development Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+) Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Topic :: Software Development +Requires-Dist: PyMySQL (>=0.7.0) +Requires-Dist: jinja2 (>=3.0.1,<3.1.0) +Requires-Dist: numpy (<1.21.1) +Requires-Dist: pandas (>=0.22,<=0.25.3) +Requires-Dist: python-dateutil (>=2.6.0,<3.0.0) +Requires-Dist: pyyaml (>=3.12) +Requires-Dist: requests (>=2.9,<3.0) +Requires-Dist: sqlalchemy (>=1.3.0,<1.4.0) +Requires-Dist: urllib3 (>=1.22,<2.0) +Project-URL: Bug Tracker, https://github.com/chaoss/grimoirelab-sortinghat/issues +Project-URL: Repository, https://github.com/chaoss/grimoirelab-sortinghat Description-Content-Type: text/markdown -License-File: LICENSE -License-File: AUTHORS # Sorting Hat [](https://github.com/chaoss/grimoirelab-sortinghat/actions?query=workflow:tests+branch:master+event:push) [](https://coveralls.io/r/chaoss/grimoirelab-sortinghat?branch=master) @@ -696,4 +706,3 @@ Licensed under GNU General Public License (GPL), version 3 or later. - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/pyproject.toml new/sortinghat-0.7.20/pyproject.toml --- old/sortinghat-0.7.19/pyproject.toml 2022-03-18 17:52:34.000000000 +0100 +++ new/sortinghat-0.7.20/pyproject.toml 2022-06-02 19:00:13.235980300 +0200 @@ -1,6 +1,6 @@ [tool.poetry] name = "sortinghat" -version = "0.7.18" +version = "0.7.20" description = "A tool to manage identities." authors = [ "GrimoireLab Developers" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/setup.cfg new/sortinghat-0.7.20/setup.cfg --- old/sortinghat-0.7.19/setup.cfg 2022-03-18 17:52:36.000000000 +0100 +++ new/sortinghat-0.7.20/setup.cfg 1970-01-01 01:00:00.000000000 +0100 @@ -1,12 +0,0 @@ -[metadata] -description-file = README.md - -[flake8] -exclude = .git, .eggs, __pycache__, build, dist, docs, docker -ignore = E129, E402, F841, C901, W504, E731 -max-line-length = 130 - -[egg_info] -tag_build = -tag_date = 0 - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/setup.py new/sortinghat-0.7.20/setup.py --- old/sortinghat-0.7.19/setup.py 2022-03-18 17:52:34.000000000 +0100 +++ new/sortinghat-0.7.20/setup.py 2022-06-02 19:00:27.529466600 +0200 @@ -1,121 +1,57 @@ -#!/usr/bin/env python3 # -*- coding: utf-8 -*- -# -# Copyright (C) 2014-2019 Bitergia -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -# Authors: -# Santiago Due??as <sdue...@bitergia.com> -# - -import codecs -import os.path -import re -import sys -import unittest - -# Always prefer setuptools over distutils from setuptools import setup -from setuptools.command.test import test as TestClass -here = os.path.abspath(os.path.dirname(__file__)) -readme_md = os.path.join(here, 'README.md') -version_py = os.path.join(here, 'sortinghat', '_version.py') - -# Get the package description from the README.md file -with codecs.open(readme_md, encoding='utf-8') as f: - long_description = f.read() - -with codecs.open(version_py, 'r', encoding='utf-8') as fd: - version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', - fd.read(), re.MULTILINE).group(1) - - -class TestCommand(TestClass): - - user_options = [] - __dir__ = os.path.dirname(os.path.realpath(__file__)) - - def initialize_options(self): - super().initialize_options() - sys.path.insert(0, os.path.join(self.__dir__, 'tests')) - - def run_tests(self): - test_suite = unittest.TestLoader().discover('.', pattern='test*.py') - result = unittest.TextTestRunner(buffer=True).run(test_suite) - sys.exit(not result.wasSuccessful()) - - -cmdclass = {'test': TestCommand} - -setup(name="sortinghat", - description="A tool to manage identities", - long_description=long_description, - long_description_content_type='text/markdown', - url="https://github.com/grimoirelab/sortinghat", - version=version, - author="Bitergia", - author_email="sdue...@bitergia.com", - license="GPLv3", - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Topic :: Software Development', - 'License :: OSI Approved :: ' - 'GNU General Public License v3 or later (GPLv3+)', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8'], - keywords="development repositories analytics", - packages=['sortinghat', 'sortinghat.db', 'sortinghat.cmd', - 'sortinghat.matching', 'sortinghat.parsing', - 'sortinghat.templates', 'sortinghat.data', - 'sortinghat.bin', 'sortinghat.misc'], - package_data={'sortinghat.templates': ['*.tmpl'], - 'sortinghat.data': ['*'], - }, - entry_points={ - 'console_scripts': [ - 'sortinghat=sortinghat.bin.sortinghat:main', - 'mg2sh=sortinghat.bin.mg2sh:main', - 'sh2mg=sortinghat.bin.sh2mg:main', - 'eclipse2sh=sortinghat.misc.eclipse2sh:main', - 'gitdm2sh=sortinghat.misc.gitdm2sh:main', - 'grimoirelab2sh=sortinghat.misc.grimoirelab2sh:main', - 'mailmap2sh=sortinghat.misc.mailmap2sh:main', - 'mozilla2sh=sortinghat.misc.mozilla2sh:main', - 'stackalytics2sh=sortinghat.misc.stackalytics2sh:main' - ] - }, - setup_requires=[ - 'wheel', - 'pandoc'], - tests_require=[ - 'httpretty>=0.9.5' - ], - install_requires=[ - 'PyMySQL>=0.7.0', - 'sqlalchemy>=1.2,<1.4', - 'jinja2>=3.0.3,<3.1.0', - 'python-dateutil>=2.6.0', - 'pandas>=0.22.0,<=0.25.3', - 'numpy<1.21.1', - 'pyyaml>=3.12', - 'requests>=2.9', - 'urllib3>=1.22' - ], - cmdclass=cmdclass, - zip_safe=False - ) +packages = \ +['sortinghat', + 'sortinghat.bin', + 'sortinghat.cmd', + 'sortinghat.db', + 'sortinghat.matching', + 'sortinghat.misc', + 'sortinghat.parsing', + 'tests'] + +package_data = \ +{'': ['*'], 'sortinghat': ['data/*', 'templates/*'], 'tests': ['data/*']} + +install_requires = \ +['PyMySQL>=0.7.0', + 'jinja2>=3.0.1,<3.1.0', + 'numpy<1.21.1', + 'pandas>=0.22,<=0.25.3', + 'python-dateutil>=2.6.0,<3.0.0', + 'pyyaml>=3.12', + 'requests>=2.9,<3.0', + 'sqlalchemy>=1.3.0,<1.4.0', + 'urllib3>=1.22,<2.0'] + +entry_points = \ +{'console_scripts': ['eclipse2sh = sortinghat.misc.eclipse2sh:main', + 'gitdm2sh = sortinghat.misc.gitdm2sh:main', + 'grimoirelab2sh = sortinghat.misc.grimoirelab2sh:main', + 'mailmap2sh = sortinghat.misc.mailmap2sh:main', + 'mg2sh = sortinghat.bin.mg2sh:main', + 'mozilla2sh = sortinghat.misc.mozilla2sh:main', + 'sh2mg = sortinghat.bin.sh2mg:main', + 'sortinghat = sortinghat.bin.sortinghat:main', + 'stackalytics2sh = sortinghat.misc.stackalytics2sh:main']} + +setup_kwargs = { + 'name': 'sortinghat', + 'version': '0.7.20', + 'description': 'A tool to manage identities.', + 'long_description': '# Sorting Hat [](https://github.com/chaoss/grimoirelab-sortinghat/actions?query=workflow:tests+branch:master+event:push) [](https://coveralls.io/r/chaoss/grimoirelab-sortinghat?branch=master)\n\n## Description\n\nA tool to manage identities.\n\nSorting Hat maintains an SQL database of unique identities of communities members across (potentially) many different sources. Identities corresponding to the same real person can be merged in the same unique identity with a unique uuid. For each unique identity, a profile can be defined, with the name and other data shown for the corresponding person by default.\n\nIn addition, each unique identity can be related to one or more affiliations, for different time periods. This will usually correspond to different organizations in which the person was employed during those time periods.\n\nSorting Hat is a part of the [GrimoireLab toolset](https://grimoirelab.github.io), which provides Python modules and scripts to analyze data sources with information about software development, and allows the production of interactive dashboards to visualize that information.\n\nIn the context of GrimoireLab, Sorting Hat is usually run after data is retrieved with [Perceval](https://github.com/chaoss/grimoirelab-perceval), to store the identities obtained into its database, and later merge them into unique identities (and maybe affiliate them).\n\n## Source code and contributions\n\nAll the source code is available in the [Sorting Hat GitHub repository](https://github.com/chaoss/grimoirelab-sortinghat). Please, submit pull requests if you have proposals to change the source code, and open an issue if you want to report a bug, ask for a new feature, or just provide feedback.\n\n## Usage\n\n```\nusage: sortinghat [--help] [-c <file>] [-u <us er>] [-p <password>]\n [--host <host>] [--port <port>] [-d <name>]\n command [<cmd_args>]\n\nThe most commonly used sortinghat commands are:\n\n add Add identities\n affiliate Affiliate identities\n autogender Auto complete gender data\n autoprofile Auto complete profiles\n blacklist List, add or delete entries from the blacklist\n config Get and set configuration parameters\n countries List information about countries\n enroll Enroll identities into organizations\n export Export data (i.e identities) from the registry\n init Create an empty registry\n load Import data (i.e identities, organizations) on the registry\n merge Merge unique identities\n mv Move an identity into a unique identity\n log List enrollment information available in the registry\n orgs List, add or delete organizations and domains\n profile Edit profile\n rm Remove identities from the registry\n show Show information about a unique identity\n unify Merge identities using a matching algorithm\n withdraw Remove identities from organizations\n\nGeneral options:\n -h, --help show this help message and exit\n -c FILE, --config FILE\n set configuration file\n -u USER, --user USER database user name\n -p PASSWORD, --password PASSWORD\n database user password\n -d DATABASE, --database DATABASE\n name of the database where the registry will be stored\n --host HOST name of the host where the database server is running\n --port PORT port of the host where the database server is running\n\nRun \'sortinghat <command> --help\' to get information about a specific command.\n```\n\n## Installation\n\n### From pypi\n\nYou can install sortinghat as a package from the pypi repository:\n\n` ``\n$ pip install sortinghat\n```\n\n### Native\n\nYou can install sortinghat just by running setup.py script:\n\n```\n$ python setup.py install\n```\n\nThis will install it in the python default directories in your system.\n\nIf you don\'t install sortinghat with root privileges, or don\'t want to install it in the default directories, you can also use the source code directory, as cloned from the main git repo. It is enough to\nconfigure your `$PATH` and `$PYTHONPATH` so that sortinghat, and the Python modules it needs, are found.\n\nAdd to your `$PATH` the directory which contains the sortinghat executables:\n\n```\n$ export PATH=$PATH:sortinghatdir/bin\n```\n\nIn `$PYTHONPATH`, you need to include sortinghat as well. If sortinghatdir is the path where sortinghat is installed:\n\n```\n$ export PYTHONPATH=$PYTHONPATH:sortinghatdir\n```\n\nYou are ready to use sortinghat!\n\n### Docker\n\nYou can use our image from [DockerHub](https://hub.docker.com/r/grimoirelab/sortinghat/) (`gri moirelab/sortinghat`) and skip the `docker build` step.\nIf you prefer to build the image yourself execute:\n\n```sh\n$ docker build -t grimoirelab/sortinghat .\n```\n\nNext step would be to start a MySQL docker container for data storage:\n\n```sh\n$ docker run --name mysql \\\n -e MYSQL_ROOT_PASSWORD=sortinghat \\\n -d mysql\n```\n\nRun the sortinghat docker container in interactive mode:\n\n```sh\n$ docker run -i -t --rm \\\n --link mysql:mysql \\\n -e SORTINGHAT_DB_HOST=mysql \\\n -e SORTINGHAT_DB_PASSWORD=sortinghat \\\n -e SORTINGHAT_DB_DATABASE=sortinghat \\\n grimoirelab/sortinghat \\\n /bin/bash\n```\n\nNow you can initialize sortinghat with the database name `sortinghat`:\n\n```\n$ sortinghat init sortinghat\n```\n\nYou are ready to use sortinghat and explore the commands documented below.\nHave fun!\n\n## Configuration\n\nSet the database parameters via the `config` command:\n\ n```\n $ sortinghat config set db.host <mysql-host>\n $ sortinghat config set db.user <user>\n $ sortinghat config set db.password <password>\n $ sortinghat config set db.database <name>\n $ sortinghat config set db.port <port>\n```\n\nAlternatively you can set environment variables:\n\n```\n $ export SORTINGHAT_DB_HOST=<mysql-host>\n $ export SORTINGHAT_DB_USER=<user>\n $ export SORTINGHAT_DB_PASSWORD=<password>\n $ export SORTINGHAT_DB_DATABASE=<name>\n $ export SORTINGHAT_DB_PORT=<port>\n```\n\nAfter this initialize a new database:\n\n```\n $ sortinghat init <name>\n```\n\n## Compatibility between versions\n\nPython 2.7 is no longer supported. Any code using this version will\nnot work. Please update your code to 3.4 or newer versions.\n\nSortingHat databases previous to 0.7.0 are compatible but UTF-8 encoded 4-bytes\ncharacters will not be inserted in the database and will cause errors. For this\nreason, it is recommended to update its schema. The fastest way is to\nd ump the data into a file, regenerate the database with `init` command\nand restore the data from the dump.\n\nSortingHat databases previous to 0.6.0 are no longer compatible.\nThe database schema changed in `profiles` table to add the fields `gender`\nand `gender_acc`.\n\nThe next MySQL statements should be run to update the schema\n\n```\nmysql> ALTER TABLE profiles ADD COLUMN gender VARCHAR(32) DEFAULT NULL\nmysql> ALTER TABLE profiles ADD COLUMN gender_acc INT(11) DEFAULT NULL\n```\n\nSortingHat databases previous to 0.5.0 are no longer compatible. The\ndatabase schema changed in `uidentites` and `identities` tables to add the\nfield `last_modified` to log when a record was updated.\n\nThe next MySQL statements should be run to update the schema\n\n```\nmysql> ALTER TABLE uidentities ADD COLUMN last_modified DATETIME(6) DEFAULT NULL\nmysql> ALTER TABLE identities ADD COLUMN last_modified DATETIME(6) DEFAULT NULL\n```\n\nSortingHat databases previous to 0.3.0 are no longer compati ble. The\nseed used to generate identities UUIDs changed and for that reason, these\nids should be re-generated.\n\nThe next steps will restore the database generating new UUIDs for each identity\nbut keeping the data and relationships between them.\n\n1. Export data\n```\n$ sortinghat export --orgs orgs.json\n$ sortinghat export --identities identities.json\n```\n1. Remove the database and/or create a new one with `sortinghat init`\n1. Load data, this will regenerate the UUIDs\n```\n$ sortinghat load orgs.json\n$ sortinghat load identities.json\n```\n\n## Basic commands\n\n* Add some unique identities\n```\n $ sortinghat add --name "John Smith" --email "jsm...@example.com" --username "jsmith" --source scm\n New identity a9b403e150dd4af8953a52a4bb841051e4b705d9 to a9b403e150dd4af8953a52a4bb841051e4b705d9\n\n $ sortinghat add --name "John Doe" --email "j...@example.com" --source scm\n New identity 3de180633322e853861f9ee5f50a87e007b51058 added to 3de180633322e853861f9ee5f50a87e00 7b51058\n```\n\n* Set a profile\n```\n $ sortinghat profile --name "John Smith" --email "jsm...@example.com" --country US a9b403e150dd4af8953a52a4bb841051e4b705d9\n unique identity a9b403e150dd4af8953a52a4bb841051e4b705d9\n\n Profile:\n * Name: John Smith\n * E-Mail: jsm...@example.com\n * Bot: No\n * Country: US - United States of America\n```\n\n* Add an identity to an existing unique identity\n```\n $ sortinghat add --username "jsmith" --source mls --uuid a9b403e150dd4af8953a52a4bb841051e4b705d9\n New identity 2612aad107cae121b45c1f46041650abc8e39421 added to a9b403e150dd4af8953a52a4bb841051e4b705d9\n```\n\n* Merge two identities\n```\n $ sortinghat merge a7637bb1737bc2a83f3a3e25b9b441cba62d97c2 a9b403e150dd4af8953a52a4bb841051e4b705d9\n Unique identity 3de180633322e853861f9ee5f50a87e007b51058 merged on a9b403e150dd4af8953a52a4bb841051e4b705d9\n```\n\n* Move an identity into a unique identity\n```\n $ sortinghat mv 3de180633322e853861f9ee5f50a87e007b51 058 3de180633322e853861f9ee5f50a87e007b51058\n New unique identity 3de180633322e853861f9ee5f50a87e007b51058 created. Identity moved\n```\n\n* Remove a unique identity\n```\n $ sortinghat rm 3de180633322e853861f9ee5f50a87e007b51058\n Unique identity 3de180633322e853861f9ee5f50a87e007b51058 removed\n```\n\n* Show identities information\n```\n $ sortinghat show\n unique identity a9b403e150dd4af8953a52a4bb841051e4b705d9\n\n Profile:\n * Name: John Smith\n * E-Mail: jsm...@example.com\n * Bot: No\n * Country: US - United States of America\n\n Identities:\n 2612aad107cae121b45c1f46041650abc8e39421\t-\t-\tjsmith\tmls\n a9b403e150dd4af8953a52a4bb841051e4b705d9\tJohn Smith\tjsm...@example.com\tjsmith\tscm\n\n No enrollments\n```\n\n* Add some organizations\n```\n $ sortinghat orgs -a Example\n $ sortinghat orgs -a Bitergia\n $ sortinghat orgs -a Individual\n```\n\n* Add some domains to the organizations\n```\n $ sortinghat orgs -a Example example.com --t op-domain\n $ sortinghat orgs -a Example web.example.com\n $ sortinghat orgs -a Bitergia bitergia.com --top-domain\n```\n\n* List organizations\n```\n $ sortinghat orgs\n Bitergia\tbitergia.com *\n Example\texample.com *\n Example\tweb.example.com\n Individual\n```\n\n* Remove domains\n```\n $ sortinghat orgs -d Example web.example.com\n```\n\n* Remove organizations\n```\n $ sortinghat orgs -d Bitergia\n```\n\n* Enroll\n```\n $ sortinghat enroll --from 2014-06-01 --to 2015-09-01 a9b403e150dd4af8953a52a4bb841051e4b705d9 Example\n $ sortinghat enroll --from 2015-09-01 a9b403e150dd4af8953a52a4bb841051e4b705d9 Individual\n```\n\n* Show enrollments information\n```\n $ sortinghat show a9b403e150dd4af8953a52a4bb841051e4b705d9\n unique identity a9b403e150dd4af8953a52a4bb841051e4b705d9\n\n Profile:\n * Name: John Smith\n * E-Mail: jsm...@example.com\n * Bot: No\n * Country: US - United States of America\n\n Identities:\n 2612aad107cae121b45c1f46041650abc 8e39421\t-\t-\tjsmith\tmls\n a9b403e150dd4af8953a52a4bb841051e4b705d9\tJohn Smith\tjsm...@example.com\tjsmith\tscm\n\n Enrollments:\n Example\t2014-06-01 00:00:00\t2015-09-01 00:00:00\n Individual\t2015-09-01 00:00:00\t2100-01-01 00:00:00\n```\n\n* Withdraw\n```\n $ sortinghat withdraw --from 2014-06-01 --to 2015-09-01 a9b403e150dd4af8953a52a4bb841051e4b705d9 Example\n```\n\n## Basic API calls\n\nSortinghat can be integrated on your Python scripts by leveraging on its API. Each API call requires as a parameter\nthe database in which the operations will be performed. A database object should thus be created by specifying\nthe `user`, `password`, `database` and optional `host` and `port`.\n```\nfrom sortinghat import api\nfrom sortinghat.db.database import Database\n\ndb = Database(\'root\', \'*****\', \'test_db\')\n```\n \n#### Key terms\n\n* `identity_id`: Identifier assigned to the identity.\n* `entity`: Entity can be any term, word or value to blacklist.\n* `from_date` : Starting date which is a datetime objects. The method `str_to_datetime` can be used to convert the\n string date and time parameter to datetime object. \n* `matcher`: Criteria used to match identities.\n* `source`: Source of the identities.\n* `term`: Term to match with an attribute(e.g organization, country name). \n* `to_date`: Ending date which is a datetime objects. The method `str_to_datetime` can be used to convert the\n string date and time parameter to datetime object. \n* `uuid`: Unique identifier for the identity.\n \n#### Usage\n \n* Add a unique identity to the registry\n```\napi.add_unique_identity(db=db, uuid=\'a9b403e150dd4af8953a52a4bb841051e4b705d9\')\n```\n\n* Add an identity to the registry\n ```\nsource = \'git\'\nemail = \'jsm...@example.com\'\nname = \'John Smith\'\nusername = \'jsmith\'\nuuid = \'a9b403e150dd4af8953a52a4bb841051e4b705d9\'\n \napi.add_identity(db=db, source=source, email=email, name=name, username=username, uuid=uuid)\n```\n\n* Add a n organization to the registry\n```\napi.add_organization(db=db, organization=\'ExampleOrg\')\n```\n\n* Add a new domain to the given organization\n\n To set the domain as the top domain pass `is_top_domain = True`. The domain for an organization can be updated by\n passing `overwrite=True`. \n```\napi.add_domain(db=db, organization=\'ExampleOrg\', domain=\'example.com\', is_top_domain=True, overwrite=False)\n```\n\n* Enroll a unique identity to an organization\n```\nfrom sortinghat.utils import str_to_datetime\n \nuuid = \'a9b403e150dd4af8953a52a4bb841051e4b705d9\'\norganization = \'ExampleOrg\'\nfrom_date = str_to_datetime(\'2020-04-01\')\nto_date = str_to_datetime(\'2020-04-05\')\n \napi.add_enrollment(db=db, uuid=uuid, organization=organization, from_date=from_date, to_date=to_date)\n```\n\n* Add entity to the matching blacklist\n```\napi.add_to_matching_blacklist(db=db, entity=\'example\')\n```\n\n* List the blacklisted entities available in the registry\n\n The API returns a list of blacklisted entities sorted by their name.\n```\napi.blacklist(db=db, term=\'example\')\n```\n\n* List the countries available in the registry\n\n The API returns a list of countries sorted by their country id.\n```\napi.countries(db=db, code=\'US\', term=\'United States of America\')\n```\n\n* Remove a unique identity from the registry\n```\napi.delete_unique_identity(db=db, uuid=\'a9b403e150dd4af8953a52a4bb841051e4b705d9\')\n```\n\n* Remove an identity from the registry\n```\napi.delete_identity(db=db, identity_id=\'a9b403e150dd4af8953a52a4bb841051e4b705d9\')\n```\n\n* Remove an organization from the registry\n```\napi.delete_organization(db=db, organization=\'ExampleOrg\')\n```\n\n* Remove the given organization domain from the registry\n```\napi.delete_domain(db=db, organization=\'ExampleOrg\', domain=\'example.com\')\n```\n\n* Withdraw a unique identity from an organization\n```\nfrom sortinghat.utils import str_to_datetime\n \nuuid = \'a9b403e150dd4 af8953a52a4bb841051e4b705d9\'\norganization = \'ExampleOrg\'\nfrom_date = str_to_datetime(\'2020-04-01\')\nto_date = str_to_datetime(\'2020-04-05\')\n \napi.delete_enrollment(db=db, uuid=uuid, organization=organization, from_date=from_date, to_date=to_date)\n```\n\n* Remove a blacklisted entity from the registry\n```\napi.delete_from_matching_blacklist(db=db, entity=\'example\')\n```\n\n* List the domains available in the registry\n\n The API returns a list of domains.\n```\napi.domains(db=db, domain=\'example.com\')\n```\n\n* Edit unique identity profile\n\n The allowed keywords are, `name`, `email`,`gender`, `gender_acc`, `is_bot` and `country_code`. Any other keyword will be\n ignored. \n```\nkwargs = {\n \'name\': \'John Doe\',\n \'email\': \'d...@example.com\',\n \'gender\': \'Female\',\n \'gender_acc\': 50,\n \'is_bot\': False,\n \'country_code\': \'IN\'\n}\napi.edit_profile(db=db, uuid=\'a9b403e150dd4af8953a52a4bb841051e4b705d9\', **kwargs)\n``` \n\n* List the enrollment information available in the registry\n\n The API returns a list of enrollments sorted by uuid or by organization.\n```\nfrom sortinghat.utils import str_to_datetime\n \nuuid = \'a9b403e150dd4af8953a52a4bb841051e4b705d9\'\norganization = \'ExampleOrg\'\nfrom_date = str_to_datetime(\'2020-04-01\')\nto_date = str_to_datetime(\'2020-04-05\')\n \napi.enrollments(db=db, uuid=uuid, organization=organization, from_date=from_date, to_date=to_date)\n```\n\n* Search for similar unique identities\n\n The API requires a Matcher object to be passed a parameter. The object can be created using the\n `create_identity_matcher` method.\n```\nfrom sortinghat.matcher import create_identity_matcher\n\nmatcher = create_identity_matcher()\n\napi.match_identities(db=db, uuid=\'a9b403e150dd4af8953a52a4bb841051e4b705d9\', matcher=matcher)\n```\n\n* Merge one unique identity into another\n```\nfrom_uuid = \'a9b403e150dd4af8953a52a4bb841051e4b705d9\'\nto_uuid = \'3de18 0633322e853861f9ee5f50a87e007b51058\'\n \napi.merge_unique_identities(db=db, from_uuid=from_uuid, to_uuid=to_uuid)\n```\n\n* Merge overlapping enrollments\n```\napi.merge_enrollments(db=db, uuid=\'a9b403e150dd4af8953a52a4bb841051e4b705d9\', organization=\'ExampleOrg\')\n```\n\n* Move an identity to a unique identity\n```\nfrom_id = \'a9b403e150dd4af8953a52a4bb841051e4b705d9\'\nto_uuid = \'a9b403e150dd4af8953a52a4bb841051e4b705d9\'\n \napi.move_identity(db=db, from_id=from_id, to_uuid=to_uuid )\n```\n\n* List the organizations available in the registry\n\n The API returns a list of organizations sorted by their name.\n```\napi.registry(db=db, term=\'example\')\n```\n\n* Search for the uuids of identities modified on or after a given date\n\n The API returns a list of uuids of identities modified.\n```\napi.search_last_modified_identities(db=db, after=\'2020-04-01\')\n```\n\n* Search for the uuids of unique identities modified on or after a given date\n\n The API re turns a list of uuids of unique identities modified.\n```\napi.search_last_modified_unique_identities(db=db, after=\'2020-04-01\')\n```\n\n* List unique identities profiles\n \n The API returns a list of profile entities. To return only the entities having no gender set `no_gender=True`.\n```\napi.search_profiles(db, no_gender=False)\n```\n\n* Search for unique identities\n\n The API returns a list of unique identities. The term will be compared with name, email, username and source values\n of each identity. When `source` is given, this search will be only performed on identities linked to this source.\n```\napi.search_unique_identities(db=db, term=\'example\', source=\'scm\')\n```\n\n* Search for unique identities using slicing\n\n The API returns a list of unique identities starting from `offset` and limiting a maximum number of identities specified by\n `limit`. The term will be compared with name, email, username and source values of each identity.\n```\napi.s earch_unique_identities_slice(db=db, term=\'example\', offset=4, limit=20)\n```\n\n* List the unique identities available in the registry\n\n The function returns a list of unique identities.\n```\napi.unique_identities(db=db, uuid=\'a9b403e150dd4af8953a52a4bb841051e4b705d9\', source=\'scm\')\n```\n\n## Import / Export\n\n* Import data from a Sorting Hat JSON file\n```\n $ sortinghat load sh.json\n Loading blacklist...\n Entry added to the blacklist\n 1/1 blacklist entries loaded\n Loading unique identities...\n + 00000ba7f563234e5f239e912f2df1021695122e (old 00000ba7f563234e5f239e912f2df1021695122e) loaded\n + 00003e37e7586be36c64ce4f9eafa89f11be2448 (old 00003e37e7586be36c64ce4f9eafa89f11be2448) loaded\n ...\n + fa84729382093928570aef849483948489238498 (old fa84729382093928570aef849483948489238498) loaded\n 100/100 unique identities loaded\n```\n\n* Export identities\n```\n $ sortinghat export --identities sh_ids.json\n```\n\n* Export organizations\n```\n $ sorting hat export --orgs sh_orgs.json\n```\n\n## Requirements\n\n* Python >= 3.4\n* MySQL >= 5.6 or MariaDB 10.0\n* SQLAlchemy >= 1.2\n* Jinja2 >= 2.7\n* python-dateutil >= 2.6\n* python-yaml >= 3.12\n* requests >= 2.9\n* urllib3 >= 1.22\n\nYou will also need a MySQL Python driver to connect with the database server. We recommend using one these packages:\n\n* PyMySQL\n\nOptionally, you can install Pandas library to speed up the matching process:\n\n* python-pandas >= 0.15\n\n## Running tests\n\nSortingHat comes with a comprehensive list of unit tests.\nTo run them, copy the file \'tests/tests.conf.sample\' to \'tests/tests.conf\'\nand edit it to suit your configuration:\n\n* `name`: Name of the database to use for testing\n* `host`, `port`: How to access the database server (MySQL, MariaDB)\n* `user`, `password`: Credentials for the database server\n* `create`: Whether the database for testing will be created (`True`)\n or not (`False`, by default). If `True`, tests will fail if database \n already exists. If `False`, tests will fail if database does not exist.\n\nYou can run the tests through `setup.py` (no need to install dependencies\n or something else, `setup.py` will take care of that):\n\n```\n$ python3 setup.py test\n```\n\n## Troubleshooting\n\nOnce SortingHat has been installed, some errors may pop up when running the test suite due to the underlying MySQL\ndatabase configuration.\n\nMySQL command should be executed without superuser privilege (sudo):\n```\nmysql> GRANT ALL PRIVILEGES ON *.* TO \'root\'@\'localhost\' WITH GRANT OPTION;\nmysql> FLUSH PRIVILEGES;\n```\n\nMySQL strict mode should be disabled:\n```\nmysql> SET @@global.sql_mode= \'\';\n```\n\n## License\n\nLicensed under GNU General Public License (GPL), version 3 or later.\n', + 'author': 'GrimoireLab Developers', + 'author_email': None, + 'maintainer': None, + 'maintainer_email': None, + 'url': 'https://chaoss.github.io/grimoirelab/', + 'packages': packages, + 'package_data': package_data, + 'install_requires': install_requires, + 'entry_points': entry_points, + 'python_requires': '>=3.7,<4.0', +} + + +setup(**setup_kwargs) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/sortinghat/_version.py new/sortinghat-0.7.20/sortinghat/_version.py --- old/sortinghat-0.7.19/sortinghat/_version.py 2022-03-18 17:52:34.000000000 +0100 +++ new/sortinghat-0.7.20/sortinghat/_version.py 2022-06-02 19:00:13.235980300 +0200 @@ -1,2 +1,2 @@ -# Versions compliant with PEP 440 https://www.python.org/dev/peps/pep-0440 -__version__ = "0.7.19" +# File auto-generated by semverup on 2022-06-02 16:57:42.559387 +__version__ = "0.7.20" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/sortinghat/parsing/gitdm.py new/sortinghat-0.7.20/sortinghat/parsing/gitdm.py --- old/sortinghat-0.7.19/sortinghat/parsing/gitdm.py 2022-03-18 17:52:34.000000000 +0100 +++ new/sortinghat-0.7.20/sortinghat/parsing/gitdm.py 2022-06-02 19:00:13.235980300 +0200 @@ -296,8 +296,9 @@ m = re.match(self.VALID_LINE_REGEX, line, re.UNICODE) if not m: - cause = "line %s: invalid format" % str(nline) - raise InvalidFormatError(cause=cause) + cause = "Skip: '%s' -> line %s: invalid line format" % (line, str(nline)) + logger.warning(cause) + continue try: result = parse_line(m.group(1), m.group(2)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/sortinghat.egg-info/PKG-INFO new/sortinghat-0.7.20/sortinghat.egg-info/PKG-INFO --- old/sortinghat-0.7.19/sortinghat.egg-info/PKG-INFO 2022-03-18 17:52:36.000000000 +0100 +++ new/sortinghat-0.7.20/sortinghat.egg-info/PKG-INFO 1970-01-01 01:00:00.000000000 +0100 @@ -1,699 +0,0 @@ -Metadata-Version: 2.1 -Name: sortinghat -Version: 0.7.19 -Summary: A tool to manage identities -Home-page: https://github.com/grimoirelab/sortinghat -Author: Bitergia -Author-email: sdue...@bitergia.com -License: GPLv3 -Keywords: development repositories analytics -Platform: UNKNOWN -Classifier: Development Status :: 5 - Production/Stable -Classifier: Intended Audience :: Developers -Classifier: Topic :: Software Development -Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+) -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Description-Content-Type: text/markdown -License-File: LICENSE -License-File: AUTHORS - -# Sorting Hat [](https://github.com/chaoss/grimoirelab-sortinghat/actions?query=workflow:tests+branch:master+event:push) [](https://coveralls.io/r/chaoss/grimoirelab-sortinghat?branch=master) - -## Description - -A tool to manage identities. - -Sorting Hat maintains an SQL database of unique identities of communities members across (potentially) many different sources. Identities corresponding to the same real person can be merged in the same unique identity with a unique uuid. For each unique identity, a profile can be defined, with the name and other data shown for the corresponding person by default. - -In addition, each unique identity can be related to one or more affiliations, for different time periods. This will usually correspond to different organizations in which the person was employed during those time periods. - -Sorting Hat is a part of the [GrimoireLab toolset](https://grimoirelab.github.io), which provides Python modules and scripts to analyze data sources with information about software development, and allows the production of interactive dashboards to visualize that information. - -In the context of GrimoireLab, Sorting Hat is usually run after data is retrieved with [Perceval](https://github.com/chaoss/grimoirelab-perceval), to store the identities obtained into its database, and later merge them into unique identities (and maybe affiliate them). - -## Source code and contributions - -All the source code is available in the [Sorting Hat GitHub repository](https://github.com/chaoss/grimoirelab-sortinghat). Please, submit pull requests if you have proposals to change the source code, and open an issue if you want to report a bug, ask for a new feature, or just provide feedback. - -## Usage - -``` -usage: sortinghat [--help] [-c <file>] [-u <user>] [-p <password>] - [--host <host>] [--port <port>] [-d <name>] - command [<cmd_args>] - -The most commonly used sortinghat commands are: - - add Add identities - affiliate Affiliate identities - autogender Auto complete gender data - autoprofile Auto complete profiles - blacklist List, add or delete entries from the blacklist - config Get and set configuration parameters - countries List information about countries - enroll Enroll identities into organizations - export Export data (i.e identities) from the registry - init Create an empty registry - load Import data (i.e identities, organizations) on the registry - merge Merge unique identities - mv Move an identity into a unique identity - log List enrollment information available in the registry - orgs List, add or delete organizations and domains - profile Edit profile - rm Remove identities from the registry - show Show information about a unique identity - unify Merge identities using a matching algorithm - withdraw Remove identities from organizations - -General options: - -h, --help show this help message and exit - -c FILE, --config FILE - set configuration file - -u USER, --user USER database user name - -p PASSWORD, --password PASSWORD - database user password - -d DATABASE, --database DATABASE - name of the database where the registry will be stored - --host HOST name of the host where the database server is running - --port PORT port of the host where the database server is running - -Run 'sortinghat <command> --help' to get information about a specific command. -``` - -## Installation - -### From pypi - -You can install sortinghat as a package from the pypi repository: - -``` -$ pip install sortinghat -``` - -### Native - -You can install sortinghat just by running setup.py script: - -``` -$ python setup.py install -``` - -This will install it in the python default directories in your system. - -If you don't install sortinghat with root privileges, or don't want to install it in the default directories, you can also use the source code directory, as cloned from the main git repo. It is enough to -configure your `$PATH` and `$PYTHONPATH` so that sortinghat, and the Python modules it needs, are found. - -Add to your `$PATH` the directory which contains the sortinghat executables: - -``` -$ export PATH=$PATH:sortinghatdir/bin -``` - -In `$PYTHONPATH`, you need to include sortinghat as well. If sortinghatdir is the path where sortinghat is installed: - -``` -$ export PYTHONPATH=$PYTHONPATH:sortinghatdir -``` - -You are ready to use sortinghat! - -### Docker - -You can use our image from [DockerHub](https://hub.docker.com/r/grimoirelab/sortinghat/) (`grimoirelab/sortinghat`) and skip the `docker build` step. -If you prefer to build the image yourself execute: - -```sh -$ docker build -t grimoirelab/sortinghat . -``` - -Next step would be to start a MySQL docker container for data storage: - -```sh -$ docker run --name mysql \ - -e MYSQL_ROOT_PASSWORD=sortinghat \ - -d mysql -``` - -Run the sortinghat docker container in interactive mode: - -```sh -$ docker run -i -t --rm \ - --link mysql:mysql \ - -e SORTINGHAT_DB_HOST=mysql \ - -e SORTINGHAT_DB_PASSWORD=sortinghat \ - -e SORTINGHAT_DB_DATABASE=sortinghat \ - grimoirelab/sortinghat \ - /bin/bash -``` - -Now you can initialize sortinghat with the database name `sortinghat`: - -``` -$ sortinghat init sortinghat -``` - -You are ready to use sortinghat and explore the commands documented below. -Have fun! - -## Configuration - -Set the database parameters via the `config` command: - -``` - $ sortinghat config set db.host <mysql-host> - $ sortinghat config set db.user <user> - $ sortinghat config set db.password <password> - $ sortinghat config set db.database <name> - $ sortinghat config set db.port <port> -``` - -Alternatively you can set environment variables: - -``` - $ export SORTINGHAT_DB_HOST=<mysql-host> - $ export SORTINGHAT_DB_USER=<user> - $ export SORTINGHAT_DB_PASSWORD=<password> - $ export SORTINGHAT_DB_DATABASE=<name> - $ export SORTINGHAT_DB_PORT=<port> -``` - -After this initialize a new database: - -``` - $ sortinghat init <name> -``` - -## Compatibility between versions - -Python 2.7 is no longer supported. Any code using this version will -not work. Please update your code to 3.4 or newer versions. - -SortingHat databases previous to 0.7.0 are compatible but UTF-8 encoded 4-bytes -characters will not be inserted in the database and will cause errors. For this -reason, it is recommended to update its schema. The fastest way is to -dump the data into a file, regenerate the database with `init` command -and restore the data from the dump. - -SortingHat databases previous to 0.6.0 are no longer compatible. -The database schema changed in `profiles` table to add the fields `gender` -and `gender_acc`. - -The next MySQL statements should be run to update the schema - -``` -mysql> ALTER TABLE profiles ADD COLUMN gender VARCHAR(32) DEFAULT NULL -mysql> ALTER TABLE profiles ADD COLUMN gender_acc INT(11) DEFAULT NULL -``` - -SortingHat databases previous to 0.5.0 are no longer compatible. The -database schema changed in `uidentites` and `identities` tables to add the -field `last_modified` to log when a record was updated. - -The next MySQL statements should be run to update the schema - -``` -mysql> ALTER TABLE uidentities ADD COLUMN last_modified DATETIME(6) DEFAULT NULL -mysql> ALTER TABLE identities ADD COLUMN last_modified DATETIME(6) DEFAULT NULL -``` - -SortingHat databases previous to 0.3.0 are no longer compatible. The -seed used to generate identities UUIDs changed and for that reason, these -ids should be re-generated. - -The next steps will restore the database generating new UUIDs for each identity -but keeping the data and relationships between them. - -1. Export data -``` -$ sortinghat export --orgs orgs.json -$ sortinghat export --identities identities.json -``` -1. Remove the database and/or create a new one with `sortinghat init` -1. Load data, this will regenerate the UUIDs -``` -$ sortinghat load orgs.json -$ sortinghat load identities.json -``` - -## Basic commands - -* Add some unique identities -``` - $ sortinghat add --name "John Smith" --email "jsm...@example.com" --username "jsmith" --source scm - New identity a9b403e150dd4af8953a52a4bb841051e4b705d9 to a9b403e150dd4af8953a52a4bb841051e4b705d9 - - $ sortinghat add --name "John Doe" --email "j...@example.com" --source scm - New identity 3de180633322e853861f9ee5f50a87e007b51058 added to 3de180633322e853861f9ee5f50a87e007b51058 -``` - -* Set a profile -``` - $ sortinghat profile --name "John Smith" --email "jsm...@example.com" --country US a9b403e150dd4af8953a52a4bb841051e4b705d9 - unique identity a9b403e150dd4af8953a52a4bb841051e4b705d9 - - Profile: - * Name: John Smith - * E-Mail: jsm...@example.com - * Bot: No - * Country: US - United States of America -``` - -* Add an identity to an existing unique identity -``` - $ sortinghat add --username "jsmith" --source mls --uuid a9b403e150dd4af8953a52a4bb841051e4b705d9 - New identity 2612aad107cae121b45c1f46041650abc8e39421 added to a9b403e150dd4af8953a52a4bb841051e4b705d9 -``` - -* Merge two identities -``` - $ sortinghat merge a7637bb1737bc2a83f3a3e25b9b441cba62d97c2 a9b403e150dd4af8953a52a4bb841051e4b705d9 - Unique identity 3de180633322e853861f9ee5f50a87e007b51058 merged on a9b403e150dd4af8953a52a4bb841051e4b705d9 -``` - -* Move an identity into a unique identity -``` - $ sortinghat mv 3de180633322e853861f9ee5f50a87e007b51058 3de180633322e853861f9ee5f50a87e007b51058 - New unique identity 3de180633322e853861f9ee5f50a87e007b51058 created. Identity moved -``` - -* Remove a unique identity -``` - $ sortinghat rm 3de180633322e853861f9ee5f50a87e007b51058 - Unique identity 3de180633322e853861f9ee5f50a87e007b51058 removed -``` - -* Show identities information -``` - $ sortinghat show - unique identity a9b403e150dd4af8953a52a4bb841051e4b705d9 - - Profile: - * Name: John Smith - * E-Mail: jsm...@example.com - * Bot: No - * Country: US - United States of America - - Identities: - 2612aad107cae121b45c1f46041650abc8e39421 - - jsmith mls - a9b403e150dd4af8953a52a4bb841051e4b705d9 John Smith jsm...@example.com jsmith scm - - No enrollments -``` - -* Add some organizations -``` - $ sortinghat orgs -a Example - $ sortinghat orgs -a Bitergia - $ sortinghat orgs -a Individual -``` - -* Add some domains to the organizations -``` - $ sortinghat orgs -a Example example.com --top-domain - $ sortinghat orgs -a Example web.example.com - $ sortinghat orgs -a Bitergia bitergia.com --top-domain -``` - -* List organizations -``` - $ sortinghat orgs - Bitergia bitergia.com * - Example example.com * - Example web.example.com - Individual -``` - -* Remove domains -``` - $ sortinghat orgs -d Example web.example.com -``` - -* Remove organizations -``` - $ sortinghat orgs -d Bitergia -``` - -* Enroll -``` - $ sortinghat enroll --from 2014-06-01 --to 2015-09-01 a9b403e150dd4af8953a52a4bb841051e4b705d9 Example - $ sortinghat enroll --from 2015-09-01 a9b403e150dd4af8953a52a4bb841051e4b705d9 Individual -``` - -* Show enrollments information -``` - $ sortinghat show a9b403e150dd4af8953a52a4bb841051e4b705d9 - unique identity a9b403e150dd4af8953a52a4bb841051e4b705d9 - - Profile: - * Name: John Smith - * E-Mail: jsm...@example.com - * Bot: No - * Country: US - United States of America - - Identities: - 2612aad107cae121b45c1f46041650abc8e39421 - - jsmith mls - a9b403e150dd4af8953a52a4bb841051e4b705d9 John Smith jsm...@example.com jsmith scm - - Enrollments: - Example 2014-06-01 00:00:00 2015-09-01 00:00:00 - Individual 2015-09-01 00:00:00 2100-01-01 00:00:00 -``` - -* Withdraw -``` - $ sortinghat withdraw --from 2014-06-01 --to 2015-09-01 a9b403e150dd4af8953a52a4bb841051e4b705d9 Example -``` - -## Basic API calls - -Sortinghat can be integrated on your Python scripts by leveraging on its API. Each API call requires as a parameter -the database in which the operations will be performed. A database object should thus be created by specifying -the `user`, `password`, `database` and optional `host` and `port`. -``` -from sortinghat import api -from sortinghat.db.database import Database - -db = Database('root', '*****', 'test_db') -``` - -#### Key terms - -* `identity_id`: Identifier assigned to the identity. -* `entity`: Entity can be any term, word or value to blacklist. -* `from_date`: Starting date which is a datetime objects. The method `str_to_datetime` can be used to convert the - string date and time parameter to datetime object. -* `matcher`: Criteria used to match identities. -* `source`: Source of the identities. -* `term`: Term to match with an attribute(e.g organization, country name). -* `to_date`: Ending date which is a datetime objects. The method `str_to_datetime` can be used to convert the - string date and time parameter to datetime object. -* `uuid`: Unique identifier for the identity. - -#### Usage - -* Add a unique identity to the registry -``` -api.add_unique_identity(db=db, uuid='a9b403e150dd4af8953a52a4bb841051e4b705d9') -``` - -* Add an identity to the registry - ``` -source = 'git' -email = 'jsm...@example.com' -name = 'John Smith' -username = 'jsmith' -uuid = 'a9b403e150dd4af8953a52a4bb841051e4b705d9' - -api.add_identity(db=db, source=source, email=email, name=name, username=username, uuid=uuid) -``` - -* Add an organization to the registry -``` -api.add_organization(db=db, organization='ExampleOrg') -``` - -* Add a new domain to the given organization - - To set the domain as the top domain pass `is_top_domain = True`. The domain for an organization can be updated by - passing `overwrite=True`. -``` -api.add_domain(db=db, organization='ExampleOrg', domain='example.com', is_top_domain=True, overwrite=False) -``` - -* Enroll a unique identity to an organization -``` -from sortinghat.utils import str_to_datetime - -uuid = 'a9b403e150dd4af8953a52a4bb841051e4b705d9' -organization = 'ExampleOrg' -from_date = str_to_datetime('2020-04-01') -to_date = str_to_datetime('2020-04-05') - -api.add_enrollment(db=db, uuid=uuid, organization=organization, from_date=from_date, to_date=to_date) -``` - -* Add entity to the matching blacklist -``` -api.add_to_matching_blacklist(db=db, entity='example') -``` - -* List the blacklisted entities available in the registry - - The API returns a list of blacklisted entities sorted by their name. -``` -api.blacklist(db=db, term='example') -``` - -* List the countries available in the registry - - The API returns a list of countries sorted by their country id. -``` -api.countries(db=db, code='US', term='United States of America') -``` - -* Remove a unique identity from the registry -``` -api.delete_unique_identity(db=db, uuid='a9b403e150dd4af8953a52a4bb841051e4b705d9') -``` - -* Remove an identity from the registry -``` -api.delete_identity(db=db, identity_id='a9b403e150dd4af8953a52a4bb841051e4b705d9') -``` - -* Remove an organization from the registry -``` -api.delete_organization(db=db, organization='ExampleOrg') -``` - -* Remove the given organization domain from the registry -``` -api.delete_domain(db=db, organization='ExampleOrg', domain='example.com') -``` - -* Withdraw a unique identity from an organization -``` -from sortinghat.utils import str_to_datetime - -uuid = 'a9b403e150dd4af8953a52a4bb841051e4b705d9' -organization = 'ExampleOrg' -from_date = str_to_datetime('2020-04-01') -to_date = str_to_datetime('2020-04-05') - -api.delete_enrollment(db=db, uuid=uuid, organization=organization, from_date=from_date, to_date=to_date) -``` - -* Remove a blacklisted entity from the registry -``` -api.delete_from_matching_blacklist(db=db, entity='example') -``` - -* List the domains available in the registry - - The API returns a list of domains. -``` -api.domains(db=db, domain='example.com') -``` - -* Edit unique identity profile - - The allowed keywords are, `name`, `email`,`gender`, `gender_acc`, `is_bot` and `country_code`. Any other keyword will be - ignored. -``` -kwargs = { - 'name': 'John Doe', - 'email': 'd...@example.com', - 'gender': 'Female', - 'gender_acc': 50, - 'is_bot': False, - 'country_code': 'IN' -} -api.edit_profile(db=db, uuid='a9b403e150dd4af8953a52a4bb841051e4b705d9', **kwargs) -``` - -* List the enrollment information available in the registry - - The API returns a list of enrollments sorted by uuid or by organization. -``` -from sortinghat.utils import str_to_datetime - -uuid = 'a9b403e150dd4af8953a52a4bb841051e4b705d9' -organization = 'ExampleOrg' -from_date = str_to_datetime('2020-04-01') -to_date = str_to_datetime('2020-04-05') - -api.enrollments(db=db, uuid=uuid, organization=organization, from_date=from_date, to_date=to_date) -``` - -* Search for similar unique identities - - The API requires a Matcher object to be passed a parameter. The object can be created using the - `create_identity_matcher` method. -``` -from sortinghat.matcher import create_identity_matcher - -matcher = create_identity_matcher() - -api.match_identities(db=db, uuid='a9b403e150dd4af8953a52a4bb841051e4b705d9', matcher=matcher) -``` - -* Merge one unique identity into another -``` -from_uuid = 'a9b403e150dd4af8953a52a4bb841051e4b705d9' -to_uuid = '3de180633322e853861f9ee5f50a87e007b51058' - -api.merge_unique_identities(db=db, from_uuid=from_uuid, to_uuid=to_uuid) -``` - -* Merge overlapping enrollments -``` -api.merge_enrollments(db=db, uuid='a9b403e150dd4af8953a52a4bb841051e4b705d9', organization='ExampleOrg') -``` - -* Move an identity to a unique identity -``` -from_id = 'a9b403e150dd4af8953a52a4bb841051e4b705d9' -to_uuid = 'a9b403e150dd4af8953a52a4bb841051e4b705d9' - -api.move_identity(db=db, from_id=from_id, to_uuid=to_uuid ) -``` - -* List the organizations available in the registry - - The API returns a list of organizations sorted by their name. -``` -api.registry(db=db, term='example') -``` - -* Search for the uuids of identities modified on or after a given date - - The API returns a list of uuids of identities modified. -``` -api.search_last_modified_identities(db=db, after='2020-04-01') -``` - -* Search for the uuids of unique identities modified on or after a given date - - The API returns a list of uuids of unique identities modified. -``` -api.search_last_modified_unique_identities(db=db, after='2020-04-01') -``` - -* List unique identities profiles - - The API returns a list of profile entities. To return only the entities having no gender set `no_gender=True`. -``` -api.search_profiles(db, no_gender=False) -``` - -* Search for unique identities - - The API returns a list of unique identities. The term will be compared with name, email, username and source values - of each identity. When `source` is given, this search will be only performed on identities linked to this source. -``` -api.search_unique_identities(db=db, term='example', source='scm') -``` - -* Search for unique identities using slicing - - The API returns a list of unique identities starting from `offset` and limiting a maximum number of identities specified by - `limit`. The term will be compared with name, email, username and source values of each identity. -``` -api.search_unique_identities_slice(db=db, term='example', offset=4, limit=20) -``` - -* List the unique identities available in the registry - - The function returns a list of unique identities. -``` -api.unique_identities(db=db, uuid='a9b403e150dd4af8953a52a4bb841051e4b705d9', source='scm') -``` - -## Import / Export - -* Import data from a Sorting Hat JSON file -``` - $ sortinghat load sh.json - Loading blacklist... - Entry added to the blacklist - 1/1 blacklist entries loaded - Loading unique identities... - + 00000ba7f563234e5f239e912f2df1021695122e (old 00000ba7f563234e5f239e912f2df1021695122e) loaded - + 00003e37e7586be36c64ce4f9eafa89f11be2448 (old 00003e37e7586be36c64ce4f9eafa89f11be2448) loaded - ... - + fa84729382093928570aef849483948489238498 (old fa84729382093928570aef849483948489238498) loaded - 100/100 unique identities loaded -``` - -* Export identities -``` - $ sortinghat export --identities sh_ids.json -``` - -* Export organizations -``` - $ sortinghat export --orgs sh_orgs.json -``` - -## Requirements - -* Python >= 3.4 -* MySQL >= 5.6 or MariaDB 10.0 -* SQLAlchemy >= 1.2 -* Jinja2 >= 2.7 -* python-dateutil >= 2.6 -* python-yaml >= 3.12 -* requests >= 2.9 -* urllib3 >= 1.22 - -You will also need a MySQL Python driver to connect with the database server. We recommend using one these packages: - -* PyMySQL - -Optionally, you can install Pandas library to speed up the matching process: - -* python-pandas >= 0.15 - -## Running tests - -SortingHat comes with a comprehensive list of unit tests. -To run them, copy the file 'tests/tests.conf.sample' to 'tests/tests.conf' -and edit it to suit your configuration: - -* `name`: Name of the database to use for testing -* `host`, `port`: How to access the database server (MySQL, MariaDB) -* `user`, `password`: Credentials for the database server -* `create`: Whether the database for testing will be created (`True`) - or not (`False`, by default). If `True`, tests will fail if database - already exists. If `False`, tests will fail if database does not exist. - -You can run the tests through `setup.py` (no need to install dependencies - or something else, `setup.py` will take care of that): - -``` -$ python3 setup.py test -``` - -## Troubleshooting - -Once SortingHat has been installed, some errors may pop up when running the test suite due to the underlying MySQL -database configuration. - -MySQL command should be executed without superuser privilege (sudo): -``` -mysql> GRANT ALL PRIVILEGES ON *.* TO 'root'@'localhost' WITH GRANT OPTION; -mysql> FLUSH PRIVILEGES; -``` - -MySQL strict mode should be disabled: -``` -mysql> SET @@global.sql_mode= ''; -``` - -## License - -Licensed under GNU General Public License (GPL), version 3 or later. - - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/sortinghat.egg-info/SOURCES.txt new/sortinghat-0.7.20/sortinghat.egg-info/SOURCES.txt --- old/sortinghat-0.7.19/sortinghat.egg-info/SOURCES.txt 2022-03-18 17:52:36.000000000 +0100 +++ new/sortinghat-0.7.20/sortinghat.egg-info/SOURCES.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,196 +0,0 @@ -AUTHORS -LICENSE -MANIFEST.in -NEWS -README.md -pyproject.toml -setup.cfg -setup.py -sortinghat/__init__.py -sortinghat/_version.py -sortinghat/api.py -sortinghat/command.py -sortinghat/exceptions.py -sortinghat/matcher.py -sortinghat/parser.py -sortinghat/utils.py -sortinghat.egg-info/PKG-INFO -sortinghat.egg-info/SOURCES.txt -sortinghat.egg-info/dependency_links.txt -sortinghat.egg-info/entry_points.txt -sortinghat.egg-info/not-zip-safe -sortinghat.egg-info/requires.txt -sortinghat.egg-info/top_level.txt -sortinghat/bin/mg2sh.py -sortinghat/bin/sh2mg.py -sortinghat/bin/sortinghat.py -sortinghat/cmd/__init__.py -sortinghat/cmd/add.py -sortinghat/cmd/affiliate.py -sortinghat/cmd/autogender.py -sortinghat/cmd/autoprofile.py -sortinghat/cmd/blacklist.py -sortinghat/cmd/config.py -sortinghat/cmd/countries.py -sortinghat/cmd/enroll.py -sortinghat/cmd/export.py -sortinghat/cmd/init.py -sortinghat/cmd/load.py -sortinghat/cmd/log.py -sortinghat/cmd/merge.py -sortinghat/cmd/move.py -sortinghat/cmd/organizations.py -sortinghat/cmd/profile.py -sortinghat/cmd/remove.py -sortinghat/cmd/show.py -sortinghat/cmd/unify.py -sortinghat/cmd/withdraw.py -sortinghat/data/countries.csv -sortinghat/db/__init__.py -sortinghat/db/api.py -sortinghat/db/database.py -sortinghat/db/model.py -sortinghat/matching/__init__.py -sortinghat/matching/email.py -sortinghat/matching/email_name.py -sortinghat/matching/github.py -sortinghat/matching/username.py -sortinghat/misc/eclipse2sh.py -sortinghat/misc/gitdm2sh.py -sortinghat/misc/grimoirelab2sh.py -sortinghat/misc/mailmap2sh.py -sortinghat/misc/mozilla2sh.py -sortinghat/misc/stackalytics2sh.py -sortinghat/parsing/__init__.py -sortinghat/parsing/eclipse.py -sortinghat/parsing/gitdm.py -sortinghat/parsing/grimoirelab.py -sortinghat/parsing/mailmap.py -sortinghat/parsing/mozilla.py -sortinghat/parsing/sh.py -sortinghat/parsing/stackalytics.py -sortinghat/templates/add.tmpl -sortinghat/templates/affiliate.tmpl -sortinghat/templates/autogender.tmpl -sortinghat/templates/autoprofile.tmpl -sortinghat/templates/blacklist.tmpl -sortinghat/templates/config.tmpl -sortinghat/templates/countries.tmpl -sortinghat/templates/load_blacklist.tmpl -sortinghat/templates/load_domains.tmpl -sortinghat/templates/log.tmpl -sortinghat/templates/match.tmpl -sortinghat/templates/merge.tmpl -sortinghat/templates/move.tmpl -sortinghat/templates/organizations.tmpl -sortinghat/templates/profile.tmpl -sortinghat/templates/remove.tmpl -sortinghat/templates/show.tmpl -sortinghat/templates/unify.tmpl -tests/__init__.py -tests/base.py -tests/run_tests.py -tests/test_api.py -tests/test_cmd_add.py -tests/test_cmd_affiliate.py -tests/test_cmd_autogender.py -tests/test_cmd_autoprofile.py -tests/test_cmd_blacklist.py -tests/test_cmd_config.py -tests/test_cmd_countries.py -tests/test_cmd_enroll.py -tests/test_cmd_export.py -tests/test_cmd_init.py -tests/test_cmd_load.py -tests/test_cmd_log.py -tests/test_cmd_merge.py -tests/test_cmd_move.py -tests/test_cmd_organizations.py -tests/test_cmd_profile.py -tests/test_cmd_remove.py -tests/test_cmd_show.py -tests/test_cmd_unify.py -tests/test_cmd_withdraw.py -tests/test_db_api.py -tests/test_exceptions.py -tests/test_matcher.py -tests/test_matcher_email.py -tests/test_matcher_email_name.py -tests/test_matcher_github.py -tests/test_matcher_username.py -tests/test_model.py -tests/test_parser_eclipse.py -tests/test_parser_gitdm.py -tests/test_parser_grimoirelab.py -tests/test_parser_mailmap.py -tests/test_parser_mozilla.py -tests/test_parser_sh.py -tests/test_parser_stackalytics.py -tests/test_utils.py -tests/data/eclipse_ids_missing_keys.json -tests/data/eclipse_invalid.json -tests/data/eclipse_invalid_datetime.json -tests/data/eclipse_orgs_missing_keys.json -tests/data/eclipse_valid.json -tests/data/gitdm_email_aliases_valid.txt -tests/data/gitdm_email_to_employer_invalid.txt -tests/data/gitdm_email_to_employer_valid.txt -tests/data/gitdm_orgs_invalid_comments.txt -tests/data/gitdm_orgs_invalid_entries.txt -tests/data/gitdm_orgs_invalid_entries_alt.txt -tests/data/gitdm_orgs_valid.txt -tests/data/gitdm_orgs_valid_alt.txt -tests/data/grimoire_identities_missing_keys.json -tests/data/grimoire_identities_valid.json -tests/data/grimoirelab_invalid.yml -tests/data/grimoirelab_invalid_blacklist_empty_entry.yml -tests/data/grimoirelab_invalid_blacklist_no_list.yml -tests/data/grimoirelab_invalid_datetime.yml -tests/data/grimoirelab_invalid_email.yml -tests/data/grimoirelab_invalid_enrollment_periods.yml -tests/data/grimoirelab_invalid_missing_accounts.yml -tests/data/grimoirelab_invalid_missing_organization_name.yml -tests/data/grimoirelab_invalid_missing_profile.yml -tests/data/grimoirelab_invalid_missing_profile_name_isbot.yml -tests/data/grimoirelab_invalid_structure.yml -tests/data/grimoirelab_orgs_invalid_domains_list_with_empty_value.yml -tests/data/grimoirelab_orgs_invalid_empty_domains.yml -tests/data/grimoirelab_orgs_invalid_key_with_no_value.yml -tests/data/grimoirelab_orgs_invalid_missing_key.yml -tests/data/grimoirelab_orgs_invalid_wrong_domains_type.yml -tests/data/grimoirelab_orgs_valid.yml -tests/data/grimoirelab_valid.yml -tests/data/identities_format_not_supported.json -tests/data/mailmap_identities.txt -tests/data/mailmap_invalid.txt -tests/data/mailmap_orgs.txt -tests/data/mock_config_file.cfg -tests/data/mozillians_ids_missing_keys.json -tests/data/mozillians_invalid.json -tests/data/mozillians_valid.json -tests/data/sortinghat_blacklist_empty_strings.json -tests/data/sortinghat_identities_profiles.json -tests/data/sortinghat_identities_source.json -tests/data/sortinghat_identities_valid.json -tests/data/sortinghat_ids_dates_out_of_bounds.json -tests/data/sortinghat_ids_invalid_date.json -tests/data/sortinghat_ids_invalid_is_bot.json -tests/data/sortinghat_ids_invalid_range_gender_acc.json -tests/data/sortinghat_ids_invalid_type_gender_acc.json -tests/data/sortinghat_ids_missing_keys.json -tests/data/sortinghat_invalid.json -tests/data/sortinghat_no_strict_valid.json -tests/data/sortinghat_orgs.json -tests/data/sortinghat_orgs_invalid_json.json -tests/data/sortinghat_orgs_invalid_top.json -tests/data/sortinghat_orgs_missing_keys.json -tests/data/sortinghat_orgs_valid.json -tests/data/sortinghat_orgs_valid_alt.json -tests/data/sortinghat_valid.json -tests/data/sortinghat_valid_no_gender.json -tests/data/sortinghat_valid_updated.json -tests/data/stackalytics_ids_missing_keys.json -tests/data/stackalytics_invalid.json -tests/data/stackalytics_orgs_missing_keys.json -tests/data/stackalytics_valid.json -tests/data/unify_matches.log \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/sortinghat.egg-info/dependency_links.txt new/sortinghat-0.7.20/sortinghat.egg-info/dependency_links.txt --- old/sortinghat-0.7.19/sortinghat.egg-info/dependency_links.txt 2022-03-18 17:52:36.000000000 +0100 +++ new/sortinghat-0.7.20/sortinghat.egg-info/dependency_links.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/sortinghat.egg-info/entry_points.txt new/sortinghat-0.7.20/sortinghat.egg-info/entry_points.txt --- old/sortinghat-0.7.19/sortinghat.egg-info/entry_points.txt 2022-03-18 17:52:36.000000000 +0100 +++ new/sortinghat-0.7.20/sortinghat.egg-info/entry_points.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,11 +0,0 @@ -[console_scripts] -eclipse2sh = sortinghat.misc.eclipse2sh:main -gitdm2sh = sortinghat.misc.gitdm2sh:main -grimoirelab2sh = sortinghat.misc.grimoirelab2sh:main -mailmap2sh = sortinghat.misc.mailmap2sh:main -mg2sh = sortinghat.bin.mg2sh:main -mozilla2sh = sortinghat.misc.mozilla2sh:main -sh2mg = sortinghat.bin.sh2mg:main -sortinghat = sortinghat.bin.sortinghat:main -stackalytics2sh = sortinghat.misc.stackalytics2sh:main - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/sortinghat.egg-info/not-zip-safe new/sortinghat-0.7.20/sortinghat.egg-info/not-zip-safe --- old/sortinghat-0.7.19/sortinghat.egg-info/not-zip-safe 2022-03-18 17:52:36.000000000 +0100 +++ new/sortinghat-0.7.20/sortinghat.egg-info/not-zip-safe 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/sortinghat.egg-info/requires.txt new/sortinghat-0.7.20/sortinghat.egg-info/requires.txt --- old/sortinghat-0.7.19/sortinghat.egg-info/requires.txt 2022-03-18 17:52:36.000000000 +0100 +++ new/sortinghat-0.7.20/sortinghat.egg-info/requires.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,9 +0,0 @@ -PyMySQL>=0.7.0 -sqlalchemy<1.4,>=1.2 -jinja2<3.1.0,>=3.0.3 -python-dateutil>=2.6.0 -pandas<=0.25.3,>=0.22.0 -numpy<1.21.1 -pyyaml>=3.12 -requests>=2.9 -urllib3>=1.22 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/sortinghat.egg-info/top_level.txt new/sortinghat-0.7.20/sortinghat.egg-info/top_level.txt --- old/sortinghat-0.7.19/sortinghat.egg-info/top_level.txt 2022-03-18 17:52:36.000000000 +0100 +++ new/sortinghat-0.7.20/sortinghat.egg-info/top_level.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -sortinghat diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/tests/data/gitdm_email_invalid_lines.txt new/sortinghat-0.7.20/tests/data/gitdm_email_invalid_lines.txt --- old/sortinghat-0.7.19/tests/data/gitdm_email_invalid_lines.txt 1970-01-01 01:00:00.000000000 +0100 +++ new/sortinghat-0.7.20/tests/data/gitdm_email_invalid_lines.txt 2022-06-02 19:00:13.239980500 +0200 @@ -0,0 +1,6 @@ +# +# Gitdm invalid lines example +# + +jsmith.example.com Example Company < 2010-01-01# +jsmith.example.com Example# diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/tests/data/gitdm_orgs_invalid_lines.txt new/sortinghat-0.7.20/tests/data/gitdm_orgs_invalid_lines.txt --- old/sortinghat-0.7.19/tests/data/gitdm_orgs_invalid_lines.txt 1970-01-01 01:00:00.000000000 +0100 +++ new/sortinghat-0.7.20/tests/data/gitdm_orgs_invalid_lines.txt 2022-06-02 19:00:13.239980500 +0200 @@ -0,0 +1,6 @@ +# +# Gitdm invalid lines example +# + +bitergia.com B#itergia +bitergia.com B# Bitergia diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/tests/test_parser_gitdm.py new/sortinghat-0.7.20/tests/test_parser_gitdm.py --- old/sortinghat-0.7.19/tests/test_parser_gitdm.py 2022-03-18 17:52:34.000000000 +0100 +++ new/sortinghat-0.7.20/tests/test_parser_gitdm.py 2022-06-02 19:00:13.243980400 +0200 @@ -30,7 +30,6 @@ sys.path.insert(0, '..') from sortinghat.db.model import UniqueIdentity, Identity, Enrollment, Organization, Domain -from sortinghat.exceptions import InvalidFormatError from sortinghat.parsing.gitdm import GitdmParser from tests.base import datadir @@ -450,12 +449,7 @@ self.assertEqual(dom.is_top_domain, False) def test_not_valid_organizations_stream(self): - """Check whether it raises an error when parsing invalid streams""" - - with self.assertRaisesRegex(InvalidFormatError, - DOMAINS_INVALID_FORMAT_ERROR % {'line': '10'}): - stream = self.read_file(datadir('gitdm_orgs_invalid_comments.txt')) - GitdmParser(domain_to_employer=stream) + """Check whether it skips an error when parsing invalid organization streams""" expected_log = [ "Skip: 'example.org ' -> line 8: invalid organization format: ' '" @@ -466,6 +460,45 @@ self.assertEqual(len(captured.records), 1) self.assertEqual(captured.records[0].getMessage(), expected_log[0]) + expected_log = [ + "Skip: 'bitergia.com Bitergia# Comment' -> line 10: invalid line format" + ] + with self.assertLogs() as captured: + stream = self.read_file(datadir('gitdm_orgs_invalid_comments.txt')) + parser = GitdmParser(domain_to_employer=stream) + + # Parsed unique organizations + orgs = parser.organizations + self.assertEqual(len(orgs), 3) + self.assertEqual(len(captured.records), 1) + self.assertEqual(captured.records[0].getMessage(), expected_log[0]) + + def test_skip_lines(self): + """Check whether it skips when parsing invalid lines streams""" + + expected_log = [ + "Skip: 'jsmith.example.com Example Company < 2010-01-01#' -> line 5: invalid line format", + "Skip: 'jsmith.example.com Example#' -> line 6: invalid line format" + ] + with self.assertLogs() as captured: + stream = self.read_file(datadir('gitdm_email_invalid_lines.txt')) + GitdmParser(email_to_employer=stream, + source='unknown', email_validation=True) + self.assertEqual(len(captured.records), 2) + self.assertEqual(captured.records[0].getMessage(), expected_log[0]) + self.assertEqual(captured.records[1].getMessage(), expected_log[1]) + + expected_log = [ + "Skip: 'bitergia.com B#itergia' -> line 5: invalid line format", + "Skip: 'bitergia.com B# Bitergia' -> line 6: invalid line format" + ] + with self.assertLogs() as captured: + stream = self.read_file(datadir('gitdm_orgs_invalid_lines.txt')) + GitdmParser(domain_to_employer=stream) + self.assertEqual(len(captured.records), 2) + self.assertEqual(captured.records[0].getMessage(), expected_log[0]) + self.assertEqual(captured.records[1].getMessage(), expected_log[1]) + class TestGitdmRegEx(unittest.TestCase): """Test regular expressions used while parsing Gitdm inputs""" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sortinghat-0.7.19/tests/tests.conf.sample new/sortinghat-0.7.20/tests/tests.conf.sample --- old/sortinghat-0.7.19/tests/tests.conf.sample 1970-01-01 01:00:00.000000000 +0100 +++ new/sortinghat-0.7.20/tests/tests.conf.sample 2022-06-02 19:00:13.243980400 +0200 @@ -0,0 +1,10 @@ +[Database] +name=testhat + +host=127.0.0.1 +port=3306 + +user=root +password=root + +create=False