commit:     87912b460c533f8b010617018fdad42246db85f0
Author:     Siddhanth Rathod <xsiddhanthrathod <AT> gmail <DOT> com>
AuthorDate: Thu Sep 21 20:20:15 2023 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sun Dec  3 07:51:44 2023 +0000
URL:        https://gitweb.gentoo.org/proj/gentoolkit.git/commit/?id=87912b46

Enhancement: eclean-dist handle git checkouts

A new feature for eclean-dist to clean git3-src.
Optionally, cleaning the vcs-src can be skipped with --skip-vcs.

Bug: https://bugs.gentoo.org/622938

Signed-off-by: Siddhanth Rathod <xsiddhanthrathod <AT> gmail.com>
Closes: https://github.com/gentoo/gentoolkit/pull/33
Signed-off-by: Sam James <sam <AT> gentoo.org>

 pym/gentoolkit/eclean/clean.py  | 35 ++++++++++++++++++++++++++++++++--
 pym/gentoolkit/eclean/cli.py    | 42 ++++++++++++++++++++++++++++-------------
 pym/gentoolkit/eclean/search.py | 28 ++++++++++++++++++++++++++-
 3 files changed, 89 insertions(+), 16 deletions(-)

diff --git a/pym/gentoolkit/eclean/clean.py b/pym/gentoolkit/eclean/clean.py
index 92449b4..0347f1d 100644
--- a/pym/gentoolkit/eclean/clean.py
+++ b/pym/gentoolkit/eclean/clean.py
@@ -5,6 +5,7 @@
 
 
 import os
+import shutil
 import sys
 
 import gentoolkit.pprinter as pp
@@ -25,7 +26,7 @@ class CleanUp:
         self.controller = controller
         self.quiet = quiet
 
-    def clean_dist(self, clean_dict):
+    def clean_dist(self, clean_dict, vcs):
         """Calculate size of each entry for display, prompt user if needed,
         delete files if approved and return the total size of files that
         have been deleted.
@@ -41,6 +42,7 @@ class CleanUp:
         for key in sorted(clean_dict):
             clean_size += self._clean_files(clean_dict[key], key, file_type)
         # return total size of deleted or to delete files
+        clean_size += self._clean_vcs_src(vcs)
         return clean_size
 
     def clean_pkgs(self, clean_dict, pkgdir):
@@ -74,7 +76,7 @@ class CleanUp:
         # return total size of deleted or to delete files
         return clean_size
 
-    def pretend_clean(self, clean_dict):
+    def pretend_clean(self, clean_dict, vcs={}):
         """Shortcut function that calculates total space savings
         for the files in clean_dict.
 
@@ -85,6 +87,8 @@ class CleanUp:
         file_type = "file"
         clean_size = 0
         # tally all entries one by one; sorting helps reading
+        if vcs:
+            clean_size += self._clean_vcs_src(vcs, pretend=True)
         for key in sorted(clean_dict):
             key_size = self._get_size(clean_dict[key])
             self.controller(key_size, key, clean_dict[key], file_type)
@@ -153,3 +157,30 @@ class CleanUp:
                     print(pp.error("Could not delete " + file_), 
file=sys.stderr)
                     print(pp.error("Error: %s" % str(er)), file=sys.stderr)
         return clean_size
+
+    def _clean_vcs_src(self, deprecated_vcs, pretend=False):
+        clean_size = 0
+        for checkout in deprecated_vcs:
+            csize = 0
+            for path, dirs, files in os.walk(checkout):
+                for f in files:
+                    fp = os.path.join(path, f)
+                    try:
+                        statinfo = os.stat(fp)
+                    except OSError as er:
+                        print(
+                            pp.error("Could not get stat info for:" + fp),
+                            file=sys.stderr,
+                        )
+                        print(pp.error("Error: %s" % str(er)), file=sys.stderr)
+                    clean_size += statinfo.st_size
+                    csize += statinfo.st_size
+
+            try:
+                self.controller(csize, checkout, checkout, "checkout")
+                if not pretend:
+                    shutil.rmtree(checkout)
+            except OSError as er:
+                print(pp.error("Could not delete " + checkout), 
file=sys.stderr)
+                print(pp.error("Error: %s" % str(er)), file=sys.stderr)
+        return clean_size

diff --git a/pym/gentoolkit/eclean/cli.py b/pym/gentoolkit/eclean/cli.py
index e3dd540..52ddf2a 100644
--- a/pym/gentoolkit/eclean/cli.py
+++ b/pym/gentoolkit/eclean/cli.py
@@ -13,25 +13,25 @@ __version__ = "@VERSION@"
 __productname__ = "eclean"
 __description__ = "A cleaning tool for Gentoo distfiles and binaries."
 
+import getopt
 import os
-import sys
 import re
+import sys
 import time
-import getopt
 
 import portage
-from portage.output import white, yellow, turquoise, green, red
+from portage.output import green, red, turquoise, white, yellow
 
 import gentoolkit.pprinter as pp
+from gentoolkit.eclean.clean import CleanUp
+from gentoolkit.eclean.exclude import ParseExcludeFileException, 
parseExcludeFile
+from gentoolkit.eclean.output import OutputControl
 from gentoolkit.eclean.search import (
     DistfilesSearch,
     findPackages,
-    port_settings,
     pkgdir,
+    port_settings,
 )
-from gentoolkit.eclean.exclude import parseExcludeFile, 
ParseExcludeFileException
-from gentoolkit.eclean.clean import CleanUp
-from gentoolkit.eclean.output import OutputControl
 
 # from gentoolkit.eclean.dbapi import Dbapi
 from gentoolkit.eprefix import EPREFIX
@@ -309,6 +309,10 @@ def printUsage(_error=None, help=None, 
unresolved_invalids=None):
             "   " + '"two hundreds kilobytes", etc.  Units are: ' + "G, M, K 
and B.",
             file=out,
         )
+        print(
+            yellow(" --skip-vcs") + "  - skip cleaning of vcs_src ",
+            file=out,
+        )
         print(file=out)
     print(
         "More detailed instruction can be found in",
@@ -420,6 +424,8 @@ def parseArgs(options={}):
                 options["unique-use"] = True
             elif o in ("--no-clean-invalid"):
                 options["no-clean-invalid"] = True
+            elif o in ("--skip-vcs"):
+                options["skip-vcs"] = True
             else:
                 return_code = False
         # sanity check of --deep only options:
@@ -457,7 +463,11 @@ def parseArgs(options={}):
         "verbose",
     ]
     getopt_options["short"]["distfiles"] = "fs:"
-    getopt_options["long"]["distfiles"] = ["fetch-restricted", "size-limit="]
+    getopt_options["long"]["distfiles"] = [
+        "fetch-restricted",
+        "size-limit=",
+        "skip-vcs",
+    ]
     getopt_options["short"]["packages"] = "iu"
     getopt_options["long"]["packages"] = [
         "ignore-failure",
@@ -481,6 +491,7 @@ def parseArgs(options={}):
     options["ignore-failure"] = False
     options["no-clean-invalid"] = False
     options["unique-use"] = False
+    options["skip-vcs"] = False
     # if called by a well-named symlink, set the action accordingly:
     action = None
     # temp print line to ensure it is the svn/branch code running, etc..
@@ -546,6 +557,7 @@ def doAction(action, options, exclude={}, output=None):
         files_type = "distfiles"
     saved = {}
     deprecated = {}
+    vcs = []
     # find files to delete, depending on the action
     if not options["quiet"]:
         output.einfo("Building file list for " + action + " cleaning...")
@@ -567,7 +579,7 @@ def doAction(action, options, exclude={}, output=None):
             # portdb=Dbapi(portage.db[portage.root]["porttree"].dbapi),
             # var_dbapi=Dbapi(portage.db[portage.root]["vartree"].dbapi),
         )
-        clean_me, saved, deprecated = engine.findDistfiles(
+        clean_me, saved, deprecated, vcs = engine.findDistfiles(
             exclude=exclude,
             destructive=options["destructive"],
             fetch_restricted=options["fetch-restricted"],
@@ -581,7 +593,7 @@ def doAction(action, options, exclude={}, output=None):
     cleaner = CleanUp(output.progress_controller, options["quiet"])
 
     # actually clean files if something was found
-    if clean_me:
+    if clean_me or vcs:
         # verbose pretend message
         if options["pretend"] and not options["quiet"]:
             output.einfo("Here are the " + files_type + " that would be 
deleted:")
@@ -590,9 +602,13 @@ def doAction(action, options, exclude={}, output=None):
             output.einfo("Cleaning " + files_type + "...")
         # do the cleanup, and get size of deleted files
         if options["pretend"]:
-            clean_size = cleaner.pretend_clean(clean_me)
+            if options["skip-vcs"]:
+                vcs = {}
+            clean_size = cleaner.pretend_clean(clean_me, vcs)
         elif action in ["distfiles"]:
-            clean_size = cleaner.clean_dist(clean_me)
+            if options["skip-vcs"]:
+                vcs = {}
+            clean_size = cleaner.clean_dist(clean_me, vcs)
         elif action in ["packages"]:
             clean_size = cleaner.clean_pkgs(clean_me, pkgdir)
         # vocabulary for final message
@@ -602,7 +618,7 @@ def doAction(action, options, exclude={}, output=None):
             verb = "were"
         # display freed space
         if not options["quiet"]:
-            output.total("normal", clean_size, len(clean_me), verb, action)
+            output.total("normal", clean_size, len(clean_me) + len(vcs), verb, 
action)
     # nothing was found
     elif not options["quiet"]:
         output.einfo("Your " + action + " directory was already clean.")

diff --git a/pym/gentoolkit/eclean/search.py b/pym/gentoolkit/eclean/search.py
index 77f16af..de424c5 100644
--- a/pym/gentoolkit/eclean/search.py
+++ b/pym/gentoolkit/eclean/search.py
@@ -7,6 +7,7 @@
 import os
 import stat
 import sys
+import shlex
 from functools import partial
 from inspect import signature
 from typing import Optional
@@ -134,6 +135,7 @@ class DistfilesSearch:
         # gather the files to be cleaned
         self.output("...checking limits for %d ebuild sources" % len(pkgs))
 
+        vcs = self.vcs_check(_distdir)
         checks = self._get_default_checks(size_limit, time_limit, exclude, 
destructive)
         checks.extend(extra_checks)
         clean_me = self._check_limits(_distdir, checks, clean_me)
@@ -148,7 +150,7 @@ class DistfilesSearch:
                 + "%s remaining candidates to clean" % len(clean_me)
             )
             clean_me, saved = self._check_excludes(exclude, clean_me)
-        return clean_me, saved, deprecated
+        return clean_me, saved, deprecated, vcs
 
     # begin _check_limits code block
 
@@ -332,6 +334,30 @@ class DistfilesSearch:
             deprecated.update(_deprecated)
         return pkgs, deprecated
 
+    def vcs_check(self, distdir):
+        """Checks $DISTDIR/vcs-src for checkouts which are not in the vardb"""
+        # For now we only check git
+        vcs_src = os.path.join(distdir, "git3-src")
+        expected_dirs = set()
+        for i in set(self.vardb.cpv_all()):
+            if "live" in self.vardb.aux_get(i, ["PROPERTIES"]):
+                try:
+                    # try to get the dir names of the cloned
+                    # repos from the environment file.
+                    vcs_dir = {
+                        i.split("=")[-1].strip('"')
+                        for i in shlex.split(
+                            self.vardb._aux_env_search(i, ["EVCS_STORE_DIRS"])[
+                                "EVCS_STORE_DIRS"
+                            ].strip("()")
+                        )
+                    }
+                    expected_dirs.update(vcs_dir)
+                except KeyError:
+                    pass
+        actual_dirs = {os.path.join(vcs_src, i) for i in os.listdir(vcs_src)}
+        return actual_dirs.difference(expected_dirs)
+
     def _fetch_restricted(self, pkgs_, cpvs):
         """perform fetch restricted non-destructive source
         filename lookups

Reply via email to