Re: [gentoo-portage-dev] [PATCH] repoman: add --include-profiles=PROFILES

2019-11-20 Thread Zac Medico
On 11/18/19 11:41 PM, Sergei Trofimovich wrote:
> On Mon, 18 Nov 2019 16:45:58 -0800
> Zac Medico  wrote:
> 
>> On 11/18/19 4:21 PM, Sergei Trofimovich wrote:
>>> repoman slows down ~linearly with amount of profiles being scanned.
>>> In case of amd64 we have 28 stable profiles.
>>>
>>> To speed up processing and fit into time budged of various CIs we can
>>> split the work across different processes that handle different profiles.
>>>
>>> Example benchmark on ::haskell overlay:
>>> $ ./repoman full --include-arches=amd64
>>> ~65 minutes
>>> $ ./repoman full --include-profiles=default/linux/amd64/17.0
>>> ~4 minutes
>>> This allows for a crude sharding of work across processes and allows for
>>> cheap tree-wide scans for early failures.
>>>
>>> Bug: https://bugs.gentoo.org/700456
>>> Signed-off-by: Sergei Trofimovich 
>>> ---
>>>  repoman/lib/repoman/actions.py  | 4 
>>>  repoman/lib/repoman/argparser.py| 7 +++
>>>  repoman/lib/repoman/modules/scan/depend/__init__.py | 3 ++-
>>>  repoman/lib/repoman/modules/scan/depend/profile.py  | 9 +++--
>>>  repoman/lib/repoman/scanner.py  | 5 +
>>>  repoman/man/repoman.1   | 4 
>>>  6 files changed, 29 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/repoman/lib/repoman/actions.py b/repoman/lib/repoman/actions.py
>>> index 1c9989a72..92d4d4e94 100644
>>> --- a/repoman/lib/repoman/actions.py
>>> +++ b/repoman/lib/repoman/actions.py
>>> @@ -412,6 +412,10 @@ the whole commit message to abort.
>>> report_options.append(
>>> "--include-arches=\"%s\"" %
>>> " ".join(sorted(self.scanner.include_arches)))
>>> +   if self.scanner.include_profiles is not None:
>>> +   report_options.append(
>>> +   "--include-profiles=\"%s\"" %
>>> +   " ".join(sorted(self.scanner.include_profiles)))
>>>  
>>> if portage_version is None:
>>> sys.stderr.write("Failed to insert portage version in 
>>> message!\n")
>>> diff --git a/repoman/lib/repoman/argparser.py 
>>> b/repoman/lib/repoman/argparser.py
>>> index fa0e6ff90..670a0e91d 100644
>>> --- a/repoman/lib/repoman/argparser.py
>>> +++ b/repoman/lib/repoman/argparser.py
>>> @@ -164,6 +164,13 @@ def parse_args(argv, repoman_default_opts):
>>> 'A space separated list of arches used to '
>>> 'filter the selection of profiles for dependency 
>>> checks'))
>>>  
>>> +   parser.add_argument(
>>> +   '--include-profiles',
>>> +   dest='include_profiles', metavar='PROFILES', action='append',
>>> +   help=(
>>> +   'A space separated list of profiles used to '
>>> +   'define the selection of profiles for dependency 
>>> checks'))
>>> +
>>> parser.add_argument(
>>> '-d', '--include-dev', dest='include_dev', action='store_true',
>>> default=False,
>>> diff --git a/repoman/lib/repoman/modules/scan/depend/__init__.py 
>>> b/repoman/lib/repoman/modules/scan/depend/__init__.py
>>> index c3cc0ddeb..9068760bb 100644
>>> --- a/repoman/lib/repoman/modules/scan/depend/__init__.py
>>> +++ b/repoman/lib/repoman/modules/scan/depend/__init__.py
>>> @@ -19,7 +19,8 @@ module_spec = {
>>> 'func_desc': {
>>> },
>>> 'mod_kwargs': ['qatracker', 'portdb', 'profiles', 
>>> 'options',
>>> -   'repo_metadata', 'repo_settings', 
>>> 'include_arches', 'caches',
>>> +   'repo_metadata', 'repo_settings', 
>>> 'include_arches',
>>> +   'include_profiles', 'caches',
>>> 'repoman_incrementals', 'env', 'have', 
>>> 'dev_keywords'
>>> ],
>>> 'func_kwargs': {
>>> diff --git a/repoman/lib/repoman/modules/scan/depend/profile.py 
>>> b/repoman/lib/repoman/modules/scan/depend/profile.py
>>> index d980f4eca..0b1d74483 100644
>>> --- a/repoman/lib/repoman/modules/scan/depend/profile.py
>>> +++ b/repoman/lib/repoman/modules/scan/depend/profile.py
>>> @@ -33,6 +33,7 @@ class ProfileDependsChecks(ScanBase):
>>> @param options: cli options
>>> @param repo_settings: repository settings instance
>>> @param include_arches: set
>>> +   @param include_profiles: set
>>> @param caches: dictionary of our caches
>>> @param repoman_incrementals: tuple
>>> @param env: the environment
>>> @@ -46,6 +47,7 @@ class ProfileDependsChecks(ScanBase):
>>> self.options = kwargs.get('options')
>>> self.repo_settings = kwargs.get('repo_settings')
>>> self.include_arches = kwargs.get('include_arches')
>>> +   self.include_profiles = kwargs.get('include_profiles')
>>>

Re: [gentoo-portage-dev] [PATCH] repoman: add --include-profiles=PROFILES

2019-11-20 Thread Zac Medico
On 11/18/19 4:21 PM, Sergei Trofimovich wrote:
> diff --git a/repoman/lib/repoman/modules/scan/depend/profile.py 
> b/repoman/lib/repoman/modules/scan/depend/profile.py
> index d980f4eca..0b1d74483 100644
> --- a/repoman/lib/repoman/modules/scan/depend/profile.py
> +++ b/repoman/lib/repoman/modules/scan/depend/profile.py
> @@ -33,6 +33,7 @@ class ProfileDependsChecks(ScanBase):
>   @param options: cli options
>   @param repo_settings: repository settings instance
>   @param include_arches: set
> + @param include_profiles: set
>   @param caches: dictionary of our caches
>   @param repoman_incrementals: tuple
>   @param env: the environment
> @@ -46,6 +47,7 @@ class ProfileDependsChecks(ScanBase):
>   self.options = kwargs.get('options')
>   self.repo_settings = kwargs.get('repo_settings')
>   self.include_arches = kwargs.get('include_arches')
> + self.include_profiles = kwargs.get('include_profiles')
>   self.caches = kwargs.get('caches')
>   self.repoman_incrementals = kwargs.get('repoman_incrementals')
>   self.env = kwargs.get('env')
> @@ -81,8 +83,11 @@ class ProfileDependsChecks(ScanBase):
>   if arch not in self.include_arches:
>   continue
>  
> - relevant_profiles.extend(
> - (keyword, groups, prof) for prof in 
> self.profiles[arch])
> + for prof in self.profiles[arch]:
> + if self.include_profiles is not None:
> + if prof not in self.include_profiles:

Since prof is an instance of repoman.profile.ProfileDesc, you actually
have to use prof.sub_path for this containment check. That means your
benchmark skipped the profile that you intended to include.

> + continue
> + relevant_profiles.append((keyword, groups, 
> prof))
-- 
Thanks,
Zac



signature.asc
Description: OpenPGP digital signature


Re: [gentoo-portage-dev] [PATCH] repoman: add --include-profiles=PROFILES

2019-11-19 Thread Zac Medico
On 11/19/19 8:34 AM, Pacho Ramos wrote:
> El mar, 19-11-2019 a las 00:21 +, Sergei Trofimovich escribió:
>> repoman slows down ~linearly with amount of profiles being scanned.
>> In case of amd64 we have 28 stable profiles.
>>
>> To speed up processing and fit into time budged of various CIs we can
>> split the work across different processes that handle different profiles.
>>
>> Example benchmark on ::haskell overlay:
>> $ ./repoman full --include-arches=amd64
>> ~65 minutes
>> $ ./repoman full --include-profiles=default/linux/amd64/17.0
>> ~4 minutes
>> This allows for a crude sharding of work across processes and allows for
>> cheap tree-wide scans for early failures.
>>
> 
> Just for knowing (as I guess there is a technical issue preventing that), why
> repoman is not trying to check one profile per core in parallel by default by
> itself?

Some things to consider when implementing that:

* Some metadata cache backends do not work across forks (sqlite).

* You'll need a master process to aggregate serialized results from
child processes.

* Some work which is currently done only once would likely have to be
repeated in parallel child processes, for example multiple processes
would be simultaneously parsing the same metadata cache entries or even
generating metadata for the same package simultaneously.

> Thanks a lot for the info :)
-- 
Thanks,
Zac



signature.asc
Description: OpenPGP digital signature


Re: [gentoo-portage-dev] [PATCH] repoman: add --include-profiles=PROFILES

2019-11-19 Thread Pacho Ramos
El mar, 19-11-2019 a las 00:21 +, Sergei Trofimovich escribió:
> repoman slows down ~linearly with amount of profiles being scanned.
> In case of amd64 we have 28 stable profiles.
> 
> To speed up processing and fit into time budged of various CIs we can
> split the work across different processes that handle different profiles.
> 
> Example benchmark on ::haskell overlay:
> $ ./repoman full --include-arches=amd64
> ~65 minutes
> $ ./repoman full --include-profiles=default/linux/amd64/17.0
> ~4 minutes
> This allows for a crude sharding of work across processes and allows for
> cheap tree-wide scans for early failures.
> 

Just for knowing (as I guess there is a technical issue preventing that), why
repoman is not trying to check one profile per core in parallel by default by
itself?

Thanks a lot for the info :)


signature.asc
Description: This is a digitally signed message part


Re: [gentoo-portage-dev] [PATCH] repoman: add --include-profiles=PROFILES

2019-11-18 Thread Sergei Trofimovich
On Mon, 18 Nov 2019 16:45:58 -0800
Zac Medico  wrote:

> On 11/18/19 4:21 PM, Sergei Trofimovich wrote:
> > repoman slows down ~linearly with amount of profiles being scanned.
> > In case of amd64 we have 28 stable profiles.
> > 
> > To speed up processing and fit into time budged of various CIs we can
> > split the work across different processes that handle different profiles.
> > 
> > Example benchmark on ::haskell overlay:
> > $ ./repoman full --include-arches=amd64
> > ~65 minutes
> > $ ./repoman full --include-profiles=default/linux/amd64/17.0
> > ~4 minutes
> > This allows for a crude sharding of work across processes and allows for
> > cheap tree-wide scans for early failures.
> > 
> > Bug: https://bugs.gentoo.org/700456
> > Signed-off-by: Sergei Trofimovich 
> > ---
> >  repoman/lib/repoman/actions.py  | 4 
> >  repoman/lib/repoman/argparser.py| 7 +++
> >  repoman/lib/repoman/modules/scan/depend/__init__.py | 3 ++-
> >  repoman/lib/repoman/modules/scan/depend/profile.py  | 9 +++--
> >  repoman/lib/repoman/scanner.py  | 5 +
> >  repoman/man/repoman.1   | 4 
> >  6 files changed, 29 insertions(+), 3 deletions(-)
> > 
> > diff --git a/repoman/lib/repoman/actions.py b/repoman/lib/repoman/actions.py
> > index 1c9989a72..92d4d4e94 100644
> > --- a/repoman/lib/repoman/actions.py
> > +++ b/repoman/lib/repoman/actions.py
> > @@ -412,6 +412,10 @@ the whole commit message to abort.
> > report_options.append(
> > "--include-arches=\"%s\"" %
> > " ".join(sorted(self.scanner.include_arches)))
> > +   if self.scanner.include_profiles is not None:
> > +   report_options.append(
> > +   "--include-profiles=\"%s\"" %
> > +   " ".join(sorted(self.scanner.include_profiles)))
> >  
> > if portage_version is None:
> > sys.stderr.write("Failed to insert portage version in 
> > message!\n")
> > diff --git a/repoman/lib/repoman/argparser.py 
> > b/repoman/lib/repoman/argparser.py
> > index fa0e6ff90..670a0e91d 100644
> > --- a/repoman/lib/repoman/argparser.py
> > +++ b/repoman/lib/repoman/argparser.py
> > @@ -164,6 +164,13 @@ def parse_args(argv, repoman_default_opts):
> > 'A space separated list of arches used to '
> > 'filter the selection of profiles for dependency 
> > checks'))
> >  
> > +   parser.add_argument(
> > +   '--include-profiles',
> > +   dest='include_profiles', metavar='PROFILES', action='append',
> > +   help=(
> > +   'A space separated list of profiles used to '
> > +   'define the selection of profiles for dependency 
> > checks'))
> > +
> > parser.add_argument(
> > '-d', '--include-dev', dest='include_dev', action='store_true',
> > default=False,
> > diff --git a/repoman/lib/repoman/modules/scan/depend/__init__.py 
> > b/repoman/lib/repoman/modules/scan/depend/__init__.py
> > index c3cc0ddeb..9068760bb 100644
> > --- a/repoman/lib/repoman/modules/scan/depend/__init__.py
> > +++ b/repoman/lib/repoman/modules/scan/depend/__init__.py
> > @@ -19,7 +19,8 @@ module_spec = {
> > 'func_desc': {
> > },
> > 'mod_kwargs': ['qatracker', 'portdb', 'profiles', 
> > 'options',
> > -   'repo_metadata', 'repo_settings', 
> > 'include_arches', 'caches',
> > +   'repo_metadata', 'repo_settings', 
> > 'include_arches',
> > +   'include_profiles', 'caches',
> > 'repoman_incrementals', 'env', 'have', 
> > 'dev_keywords'
> > ],
> > 'func_kwargs': {
> > diff --git a/repoman/lib/repoman/modules/scan/depend/profile.py 
> > b/repoman/lib/repoman/modules/scan/depend/profile.py
> > index d980f4eca..0b1d74483 100644
> > --- a/repoman/lib/repoman/modules/scan/depend/profile.py
> > +++ b/repoman/lib/repoman/modules/scan/depend/profile.py
> > @@ -33,6 +33,7 @@ class ProfileDependsChecks(ScanBase):
> > @param options: cli options
> > @param repo_settings: repository settings instance
> > @param include_arches: set
> > +   @param include_profiles: set
> > @param caches: dictionary of our caches
> > @param repoman_incrementals: tuple
> > @param env: the environment
> > @@ -46,6 +47,7 @@ class ProfileDependsChecks(ScanBase):
> > self.options = kwargs.get('options')
> > self.repo_settings = kwargs.get('repo_settings')
> > self.include_arches = kwargs.get('include_arches')
> > +   self.include_profiles = kwargs.get('include_profiles')
> > self.caches = kwargs.get('caches')
> >

Re: [gentoo-portage-dev] [PATCH] repoman: add --include-profiles=PROFILES

2019-11-18 Thread Zac Medico
On 11/18/19 4:21 PM, Sergei Trofimovich wrote:
> repoman slows down ~linearly with amount of profiles being scanned.
> In case of amd64 we have 28 stable profiles.
> 
> To speed up processing and fit into time budged of various CIs we can
> split the work across different processes that handle different profiles.
> 
> Example benchmark on ::haskell overlay:
> $ ./repoman full --include-arches=amd64
> ~65 minutes
> $ ./repoman full --include-profiles=default/linux/amd64/17.0
> ~4 minutes
> This allows for a crude sharding of work across processes and allows for
> cheap tree-wide scans for early failures.
> 
> Bug: https://bugs.gentoo.org/700456
> Signed-off-by: Sergei Trofimovich 
> ---
>  repoman/lib/repoman/actions.py  | 4 
>  repoman/lib/repoman/argparser.py| 7 +++
>  repoman/lib/repoman/modules/scan/depend/__init__.py | 3 ++-
>  repoman/lib/repoman/modules/scan/depend/profile.py  | 9 +++--
>  repoman/lib/repoman/scanner.py  | 5 +
>  repoman/man/repoman.1   | 4 
>  6 files changed, 29 insertions(+), 3 deletions(-)
> 
> diff --git a/repoman/lib/repoman/actions.py b/repoman/lib/repoman/actions.py
> index 1c9989a72..92d4d4e94 100644
> --- a/repoman/lib/repoman/actions.py
> +++ b/repoman/lib/repoman/actions.py
> @@ -412,6 +412,10 @@ the whole commit message to abort.
>   report_options.append(
>   "--include-arches=\"%s\"" %
>   " ".join(sorted(self.scanner.include_arches)))
> + if self.scanner.include_profiles is not None:
> + report_options.append(
> + "--include-profiles=\"%s\"" %
> + " ".join(sorted(self.scanner.include_profiles)))
>  
>   if portage_version is None:
>   sys.stderr.write("Failed to insert portage version in 
> message!\n")
> diff --git a/repoman/lib/repoman/argparser.py 
> b/repoman/lib/repoman/argparser.py
> index fa0e6ff90..670a0e91d 100644
> --- a/repoman/lib/repoman/argparser.py
> +++ b/repoman/lib/repoman/argparser.py
> @@ -164,6 +164,13 @@ def parse_args(argv, repoman_default_opts):
>   'A space separated list of arches used to '
>   'filter the selection of profiles for dependency 
> checks'))
>  
> + parser.add_argument(
> + '--include-profiles',
> + dest='include_profiles', metavar='PROFILES', action='append',
> + help=(
> + 'A space separated list of profiles used to '
> + 'define the selection of profiles for dependency 
> checks'))
> +
>   parser.add_argument(
>   '-d', '--include-dev', dest='include_dev', action='store_true',
>   default=False,
> diff --git a/repoman/lib/repoman/modules/scan/depend/__init__.py 
> b/repoman/lib/repoman/modules/scan/depend/__init__.py
> index c3cc0ddeb..9068760bb 100644
> --- a/repoman/lib/repoman/modules/scan/depend/__init__.py
> +++ b/repoman/lib/repoman/modules/scan/depend/__init__.py
> @@ -19,7 +19,8 @@ module_spec = {
>   'func_desc': {
>   },
>   'mod_kwargs': ['qatracker', 'portdb', 'profiles', 
> 'options',
> - 'repo_metadata', 'repo_settings', 
> 'include_arches', 'caches',
> + 'repo_metadata', 'repo_settings', 
> 'include_arches',
> + 'include_profiles', 'caches',
>   'repoman_incrementals', 'env', 'have', 
> 'dev_keywords'
>   ],
>   'func_kwargs': {
> diff --git a/repoman/lib/repoman/modules/scan/depend/profile.py 
> b/repoman/lib/repoman/modules/scan/depend/profile.py
> index d980f4eca..0b1d74483 100644
> --- a/repoman/lib/repoman/modules/scan/depend/profile.py
> +++ b/repoman/lib/repoman/modules/scan/depend/profile.py
> @@ -33,6 +33,7 @@ class ProfileDependsChecks(ScanBase):
>   @param options: cli options
>   @param repo_settings: repository settings instance
>   @param include_arches: set
> + @param include_profiles: set
>   @param caches: dictionary of our caches
>   @param repoman_incrementals: tuple
>   @param env: the environment
> @@ -46,6 +47,7 @@ class ProfileDependsChecks(ScanBase):
>   self.options = kwargs.get('options')
>   self.repo_settings = kwargs.get('repo_settings')
>   self.include_arches = kwargs.get('include_arches')
> + self.include_profiles = kwargs.get('include_profiles')
>   self.caches = kwargs.get('caches')
>   self.repoman_incrementals = kwargs.get('repoman_incrementals')
>   self.env = kwargs.get('env')
> @@ -81,8 +83,11 @@ class ProfileDependsChecks(ScanBase):
>  

[gentoo-portage-dev] [PATCH] repoman: add --include-profiles=PROFILES

2019-11-18 Thread Sergei Trofimovich
repoman slows down ~linearly with amount of profiles being scanned.
In case of amd64 we have 28 stable profiles.

To speed up processing and fit into time budged of various CIs we can
split the work across different processes that handle different profiles.

Example benchmark on ::haskell overlay:
$ ./repoman full --include-arches=amd64
~65 minutes
$ ./repoman full --include-profiles=default/linux/amd64/17.0
~4 minutes
This allows for a crude sharding of work across processes and allows for
cheap tree-wide scans for early failures.

Bug: https://bugs.gentoo.org/700456
Signed-off-by: Sergei Trofimovich 
---
 repoman/lib/repoman/actions.py  | 4 
 repoman/lib/repoman/argparser.py| 7 +++
 repoman/lib/repoman/modules/scan/depend/__init__.py | 3 ++-
 repoman/lib/repoman/modules/scan/depend/profile.py  | 9 +++--
 repoman/lib/repoman/scanner.py  | 5 +
 repoman/man/repoman.1   | 4 
 6 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/repoman/lib/repoman/actions.py b/repoman/lib/repoman/actions.py
index 1c9989a72..92d4d4e94 100644
--- a/repoman/lib/repoman/actions.py
+++ b/repoman/lib/repoman/actions.py
@@ -412,6 +412,10 @@ the whole commit message to abort.
report_options.append(
"--include-arches=\"%s\"" %
" ".join(sorted(self.scanner.include_arches)))
+   if self.scanner.include_profiles is not None:
+   report_options.append(
+   "--include-profiles=\"%s\"" %
+   " ".join(sorted(self.scanner.include_profiles)))
 
if portage_version is None:
sys.stderr.write("Failed to insert portage version in 
message!\n")
diff --git a/repoman/lib/repoman/argparser.py b/repoman/lib/repoman/argparser.py
index fa0e6ff90..670a0e91d 100644
--- a/repoman/lib/repoman/argparser.py
+++ b/repoman/lib/repoman/argparser.py
@@ -164,6 +164,13 @@ def parse_args(argv, repoman_default_opts):
'A space separated list of arches used to '
'filter the selection of profiles for dependency 
checks'))
 
+   parser.add_argument(
+   '--include-profiles',
+   dest='include_profiles', metavar='PROFILES', action='append',
+   help=(
+   'A space separated list of profiles used to '
+   'define the selection of profiles for dependency 
checks'))
+
parser.add_argument(
'-d', '--include-dev', dest='include_dev', action='store_true',
default=False,
diff --git a/repoman/lib/repoman/modules/scan/depend/__init__.py 
b/repoman/lib/repoman/modules/scan/depend/__init__.py
index c3cc0ddeb..9068760bb 100644
--- a/repoman/lib/repoman/modules/scan/depend/__init__.py
+++ b/repoman/lib/repoman/modules/scan/depend/__init__.py
@@ -19,7 +19,8 @@ module_spec = {
'func_desc': {
},
'mod_kwargs': ['qatracker', 'portdb', 'profiles', 
'options',
-   'repo_metadata', 'repo_settings', 
'include_arches', 'caches',
+   'repo_metadata', 'repo_settings', 
'include_arches',
+   'include_profiles', 'caches',
'repoman_incrementals', 'env', 'have', 
'dev_keywords'
],
'func_kwargs': {
diff --git a/repoman/lib/repoman/modules/scan/depend/profile.py 
b/repoman/lib/repoman/modules/scan/depend/profile.py
index d980f4eca..0b1d74483 100644
--- a/repoman/lib/repoman/modules/scan/depend/profile.py
+++ b/repoman/lib/repoman/modules/scan/depend/profile.py
@@ -33,6 +33,7 @@ class ProfileDependsChecks(ScanBase):
@param options: cli options
@param repo_settings: repository settings instance
@param include_arches: set
+   @param include_profiles: set
@param caches: dictionary of our caches
@param repoman_incrementals: tuple
@param env: the environment
@@ -46,6 +47,7 @@ class ProfileDependsChecks(ScanBase):
self.options = kwargs.get('options')
self.repo_settings = kwargs.get('repo_settings')
self.include_arches = kwargs.get('include_arches')
+   self.include_profiles = kwargs.get('include_profiles')
self.caches = kwargs.get('caches')
self.repoman_incrementals = kwargs.get('repoman_incrementals')
self.env = kwargs.get('env')
@@ -81,8 +83,11 @@ class ProfileDependsChecks(ScanBase):
if arch not in self.include_arches:
continue
 
-   relevant_profiles.extend(
-