commit:     b7eec0cd3bb59c2b21e91a71619212dbff8a5b0b
Author:     Dirkjan Ochtman <dirkjan <AT> ochtman <DOT> nl>
AuthorDate: Tue May  3 09:01:29 2016 +0000
Commit:     Brian Dolbec <dolsen <AT> gentoo <DOT> org>
CommitDate: Sun May  8 21:18:41 2016 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=b7eec0cd

repoman: Migrate from XmlLint to etree.XMLSchema for validation

Remove No longer used repoman._xml module
This change based on work by Dirkjan Ochtman <djc <AT> gentoo.org>
Updated the change from XML.DTD to XMLSchema.
Additionally:
    Move the metadata.xsd path determination code to metadata.py.
    Add the DISTDIR backup location and fetching of the file if missing or 
stale.

 pym/repoman/_xml.py                              | 105 -----------------------
 pym/repoman/metadata.py                          |  21 +++++
 pym/repoman/modules/scan/metadata/pkgmetadata.py |   9 +-
 pym/repoman/scanner.py                           |  10 +--
 4 files changed, 26 insertions(+), 119 deletions(-)

diff --git a/pym/repoman/_xml.py b/pym/repoman/_xml.py
deleted file mode 100644
index 33a536a..0000000
--- a/pym/repoman/_xml.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# -*- coding:utf-8 -*-
-
-from __future__ import print_function, unicode_literals
-
-import sys
-import xml
-
-# import our initialized portage instance
-from repoman._portage import portage
-
-from portage import os
-from portage.output import red
-from portage.process import find_binary
-
-from repoman.metadata import fetch_metadata_xsd
-from repoman._subprocess import repoman_getstatusoutput
-
-
-class _XMLParser(xml.etree.ElementTree.XMLParser):
-
-       def __init__(self, data, **kwargs):
-               xml.etree.ElementTree.XMLParser.__init__(self, **kwargs)
-               self._portage_data = data
-               if hasattr(self, 'parser'):
-                       self._base_XmlDeclHandler = self.parser.XmlDeclHandler
-                       self.parser.XmlDeclHandler = 
self._portage_XmlDeclHandler
-                       self._base_StartDoctypeDeclHandler = \
-                               self.parser.StartDoctypeDeclHandler
-                       self.parser.StartDoctypeDeclHandler = \
-                               self._portage_StartDoctypeDeclHandler
-
-       def _portage_XmlDeclHandler(self, version, encoding, standalone):
-               if self._base_XmlDeclHandler is not None:
-                       self._base_XmlDeclHandler(version, encoding, standalone)
-               self._portage_data["XML_DECLARATION"] = (version, encoding, 
standalone)
-
-       def _portage_StartDoctypeDeclHandler(
-               self, doctypeName, systemId, publicId, has_internal_subset):
-               if self._base_StartDoctypeDeclHandler is not None:
-                       self._base_StartDoctypeDeclHandler(
-                               doctypeName, systemId, publicId, 
has_internal_subset)
-               self._portage_data["DOCTYPE"] = (doctypeName, systemId, 
publicId)
-
-
-class _MetadataTreeBuilder(xml.etree.ElementTree.TreeBuilder):
-       """
-       Implements doctype() as required to avoid deprecation warnings with
-       >=python-2.7.
-       """
-       def doctype(self, name, pubid, system):
-               pass
-
-
-class XmlLint(object):
-
-       def __init__(self, options, repoman_settings, metadata_xsd=None):
-               self.metadata_xsd = (metadata_xsd or
-                       os.path.join(repoman_settings["DISTDIR"], 
'metadata.xsd'))
-               self.options = options
-               self.repoman_settings = repoman_settings
-               self._is_capable = metadata_xsd is not None
-               self.binary = None
-               self._check_capable()
-
-       def _check_capable(self):
-               if self.options.mode == "manifest":
-                       return
-               self.binary = find_binary('xmllint')
-               if not self.binary:
-                       print(red("!!! xmllint not found. Can't check 
metadata.xml.\n"))
-               elif not self._is_capable:
-                       if not fetch_metadata_xsd(self.metadata_xsd, 
self.repoman_settings):
-                               sys.exit(1)
-                       # this can be problematic if xmllint changes their 
output
-                       self._is_capable = True
-
-       @property
-       def capable(self):
-               return self._is_capable
-
-       def check(self, checkdir, repolevel):
-               '''Runs checks on the package metadata.xml file
-
-               @param checkdir: string, path
-               @param repolevel: integer
-               @return boolean, False == bad metadata
-               '''
-               if not self.capable:
-                       if self.options.xml_parse or repolevel == 3:
-                               print("%s sorry, xmllint is needed.  failing\n" 
% red("!!!"))
-                               sys.exit(1)
-                       return True
-               # xmlint can produce garbage output even on success, so only 
dump
-               # the ouput when it fails.
-               st, out = repoman_getstatusoutput(
-                       self.binary + " --nonet --noout --schema %s %s" % (
-                               portage._shell_quote(self.metadata_xsd),
-                               portage._shell_quote(
-                                       os.path.join(checkdir, 
"metadata.xml"))))
-               if st != os.EX_OK:
-                       print(red("!!!") + " metadata.xml is invalid:")
-                       for z in out.splitlines():
-                               print(red("!!! ") + z)
-                       return False
-               return True

diff --git a/pym/repoman/metadata.py b/pym/repoman/metadata.py
index 7c64c8e..a9ad3e8 100644
--- a/pym/repoman/metadata.py
+++ b/pym/repoman/metadata.py
@@ -99,3 +99,24 @@ def fetch_metadata_xsd(metadata_xsd, repoman_settings):
                                pass
 
        return True
+
+
+def get_metadata_xsd(repo_settings):
+       '''Locate and or fetch the metadata.xsd file
+
+       @param repo_settings: RepoSettings instance
+       @returns: path to the metadata.xsd file
+       '''
+       metadata_xsd = None
+       for path in reversed(repo_settings.repo_config.eclass_db.porttrees):
+               path = os.path.join(path, 'metadata/xml-schema/metadata.xsd')
+               if os.path.exists(path):
+                       metadata_xsd = path
+                       break
+       if metadata_xsd is None:
+               metadata_xsd = os.path.join(
+                       repo_settings.repoman_settings["DISTDIR"], 
'metadata.xsd'
+                       )
+
+               fetch_metadata_xsd(metadata_xsd, repo_settings.repoman_settings)
+       return metadata_xsd

diff --git a/pym/repoman/modules/scan/metadata/pkgmetadata.py 
b/pym/repoman/modules/scan/metadata/pkgmetadata.py
index 317ab56..3ca7897 100644
--- a/pym/repoman/modules/scan/metadata/pkgmetadata.py
+++ b/pym/repoman/modules/scan/metadata/pkgmetadata.py
@@ -26,7 +26,6 @@ from repoman._portage import portage
 from repoman.metadata import metadata_dtd_uri
 from repoman.checks.herds.herdbase import get_herd_base
 from repoman.checks.herds.metadata import check_metadata, UnknownHerdsError
-from repoman._xml import XmlLint
 from repoman.modules.scan.scanbase import ScanBase
 
 from portage.exception import InvalidAtom
@@ -110,13 +109,11 @@ class PkgMetadata(ScanBase, USEFlagChecks):
                repo_settings = kwargs.get('repo_settings')
                self.qatracker = kwargs.get('qatracker')
                self.options = kwargs.get('options')
-               metadata_xsd = kwargs.get('metadata_xsd')
+               self.metadata_xsd = kwargs.get('metadata_xsd')
                self.globalUseFlags = kwargs.get('uselist')
                self.repoman_settings = repo_settings.repoman_settings
                self.musedict = {}
                self.muselist = set()
-               self.xmllint = XmlLint(self.options, self.repoman_settings,
-                       metadata_xsd=metadata_xsd)
 
        def check(self, **kwargs):
                '''Performs the checks on the metadata.xml for the package
@@ -129,7 +126,6 @@ class PkgMetadata(ScanBase, USEFlagChecks):
                xpkg = kwargs.get('xpkg')
                checkdir = kwargs.get('checkdir')
                checkdirlist = kwargs.get('checkdirlist').get()
-               repolevel = kwargs.get('repolevel')
 
                self.musedict = {}
                if self.options.mode in ['manifest']:
@@ -221,7 +217,8 @@ class PkgMetadata(ScanBase, USEFlagChecks):
 
                # Only carry out if in package directory or check forced
                if not metadata_bad:
-                       if not self.xmllint.check(checkdir, repolevel):
+                       validator = etree.XMLSchema(file=self.metadata_xsd)
+                       if not validator.validate(_metadata_xml):
                                self.qatracker.add_error("metadata.bad", xpkg + 
"/metadata.xml")
                del metadata_bad
                self.muselist = frozenset(self.musedict)

diff --git a/pym/repoman/scanner.py b/pym/repoman/scanner.py
index fd07209..48d9001 100644
--- a/pym/repoman/scanner.py
+++ b/pym/repoman/scanner.py
@@ -10,6 +10,7 @@ from portage import normalize_path
 from portage import os
 from portage.output import green
 from portage.util.futures.extendedfutures import ExtendedFuture
+from repoman.metadata import get_metadata_xsd
 from repoman.modules.commit import repochecks
 from repoman.profile import check_profiles, dev_profile_keywords, setup_profile
 from repoman.repos import repo_metadata
@@ -56,13 +57,6 @@ class Scanner(object):
                        portage.util.stack_lists([self.categories], 
incremental=1))
                self.categories = self.repo_settings.repoman_settings.categories
 
-               metadata_xsd = None
-               for path in 
reversed(self.repo_settings.repo_config.eclass_db.porttrees):
-                       path = os.path.join(path, 
'metadata/xml-schema/metadata.xsd')
-                       if os.path.exists(path):
-                               metadata_xsd = path
-                               break
-
                self.portdb = repo_settings.portdb
                self.portdb.settings = self.repo_settings.repoman_settings
                # We really only need to cache the metadata that's necessary 
for visibility
@@ -187,7 +181,7 @@ class Scanner(object):
                        "qatracker": self.qatracker,
                        "vcs_settings": self.vcs_settings,
                        "options": self.options,
-                       "metadata_xsd": metadata_xsd,
+                       "metadata_xsd": get_metadata_xsd(self.repo_settings),
                        "uselist": uselist,
                        "checks": checks,
                        "repo_metadata": self.repo_metadata,

Reply via email to