Gavin Panella has proposed merging lp:~allenap/launchpad/bug-import-munging into lp:launchpad/devel.
Requested reviews: Launchpad code reviewers (launchpad-reviewers) This adds a script, utilities/massage-bug-import-xml, that attempts to repair bug import XML <https://help.launchpad.net/Bugs/ImportFormat> that is not quite right, or needs some extra work before it will import cleanly. See the docstring of massage() for more information on what that involves. I wrote this script to help import one particular project's bugs, so it is untested other than by use. I have tried to make it a bit more general, but I can imagine that developers may need to customize it for one-off imports. I don't really have time to do any more work on it, but I want it to go somewhere where people can find it rather than sitting only on my machine. -- https://code.launchpad.net/~allenap/launchpad/bug-import-munging/+merge/39505 Your team Launchpad code reviewers is requested to review the proposed merge of lp:~allenap/launchpad/bug-import-munging into lp:launchpad/devel.
=== added file 'utilities/massage-bug-import-xml' --- utilities/massage-bug-import-xml 1970-01-01 00:00:00 +0000 +++ utilities/massage-bug-import-xml 2010-10-28 10:26:25 +0000 @@ -0,0 +1,221 @@ +#!/usr/bin/env python2.6 +# -*- mode: python -*- + +from base64 import standard_b64encode +from optparse import OptionParser +import sys + +from lxml import etree + + +NS = "https://launchpad.net/xmlns/2006/bugs" + + +def norm_text(elem): + if elem is not None: + if elem.text is None: + elem.text = u"" + else: + elem.text = elem.text.strip() + + +def truncate(text, message=None): + lines = text.splitlines() + if len(lines) >= 30: + if message is None: + message = "[Truncated]" + else: + message = "[Truncated; %s]" % message + return u"%s...\n\n%s" % ( + "\n".join(lines[:30]).strip(), message) + else: + return text + + +def problem(message): + sys.stderr.write("{0}\n".format(message)) + + +def problem_detail(message): + sys.stderr.write(" {0}\n".format(message)) + + +def problem_resolution(message): + sys.stderr.write(" --> {0}\n".format(message)) + + +def problem_resolved(): + sys.stderr.write("\n") + + +def massage(root, project_name, fix_nickname, tag_nickname): + """Fix problems in the bug import XML tree. + + This includes: + + - Adding a tags element if one does not exist, + + - Fixing up the bug nickname, adding the existing nickname as a tag, + + - Fixing up the description, including truncating it if it's too long, + + - Fixing up the first comment, including truncating it if it's too long, + + - Normalizing whitespace. + + """ + # Scan the tree, fixing up issues. + for bug in root.findall('{%s}bug' % NS): + # Get or create the tags element. + tags = bug.find('{%s}tags' % NS) + if tags is None: + tags = etree.SubElement(bug, '{%s}tags' % NS) + + nickname = bug.find('{%s}nickname' % NS) + if nickname is None: + # Add an empty nickname to be filled in later. + nickname = etree.SubElement(bug, '{%s}nickname' % NS) + elif tag_nickname: + # Add the original nickname as a tag. + etree.SubElement(tags, '{%s}tag' % NS).text = nickname.text + + # Change the nickname. + if nickname.text is None or fix_nickname: + nickname.text = u"%s-%s" % (project_name, bug.get('id')) + + # Get the first comment and its text. We'll need these later. + first_comment = bug.find('{%s}comment' % NS) + first_comment_text = first_comment.find('{%s}text' % NS) + norm_text(first_comment_text) + + # Check the description. + description = bug.find('{%s}description' % NS) + norm_text(description) + if len(description.text) == 0: + problem("Bug %s has no description." % bug.get('id')) + # Try and get the description from the first comment. + if first_comment_text is None: + problem_detail("No comments!") + problem_resolution("Setting description to '-'.") + description.text = u'-' + elif len(first_comment_text.text) == 0: + problem_detail("First comment has no text!") + problem_resolution("Setting description to '-'.") + description.text = u'-' + else: + problem_detail("First comment has text.") + problem_resolution("Removing description.") + # The spec says that the description is optional, but + # the importer treats it as optional. + bug.remove(description) + problem_resolved() + elif len(description.text) > 50000: + problem( + "Bug %s's description is too long (%d chars)." % ( + bug.get('id'), len(description.text),)) + # Compare the description to the first comment. If it's + # the same, we don't need the description. + if first_comment_text is None: + problem_detail("No comments!") + problem_resolution("Adding comment.") + raise NotImplementedError("Add a comment.") + elif description.text == first_comment_text.text: + problem_detail('Description is same as first comment.') + problem_resolution('Trimming description.') + # It's safe to point the user to an attachment here, + # even though it has not yet been created. It will be + # created later because the first comment is also too + # long. + description.text = truncate( + description.text, 'see "Full description" attachment') + else: + problem_resolution("Truncating description.") + raise NotImplementedError("Fix overlong description.") + problem_resolved() + + # Check first comment text. + if first_comment_text is not None: + if len(first_comment_text.text) == 0: + problem( + "Bug %s's first comment has no text." % bug.get('id')) + problem_resolution("Setting comment text to '-'.") + first_comment_text.text = u'-' + problem_resolved() + elif len(first_comment_text.text) > 50000: + problem( + "Bug %s's first comment is too long (%d chars)." % ( + bug.get('id'), len(first_comment_text.text))) + # Save the original text as an attachment. + problem_resolution('Adding attachment.') + attachment = etree.SubElement( + first_comment, '{%s}attachment' % NS) + etree.SubElement(attachment, '{%s}filename' % NS).text = ( + u"%s-bug-%s-full-description.txt" % ( + project_name, bug.get('id'))) + etree.SubElement(attachment, '{%s}title' % NS).text = ( + u"Full description (text/plain, utf-8)") + etree.SubElement(attachment, '{%s}mimetype' % NS).text = ( + u"text/plain") + etree.SubElement(attachment, '{%s}contents' % NS).text = ( + standard_b64encode( + first_comment_text.text.encode('utf-8'))) + # Trim the comment text. + problem_resolution('Trimming comment text.') + first_comment_text.text = truncate( + first_comment_text.text, + 'see "Full description" attachment') + problem_resolved() + + +def main(arguments): + # optparse.OptionParser uses lower-case for usage and help text by + # default. This is distressing, so it is corrected for below. + usage = "Usage: %prog [options]" + description = """ + This acts as a filter: pipe bug import XML into stdin and capture + stdout. By default it will ensure that bug descriptions and the first + comment are correct. If either exceeds 50,000 characters it is + truncated and an attachment is created to hold the original. + """ + parser = OptionParser( + usage=usage, + description=description.strip(), + add_help_option=False) + parser.add_option( + "-p", "--project", dest="project_name", metavar="NAME", + help="The project to which this import data refers.") + parser.add_option( + "--fix-nickname", action="store_true", dest="fix_nickname", + help="Normalize the nickname to ${project_name}-${bug-id}.") + parser.add_option( + "--tag-nickname", action="store_true", dest="tag_nickname", + help="Add the original bug nickname as a tag.") + parser.add_option( + "-h", "--help", action="help", + help="Show this help message and exit.") + parser.set_defaults( + project_name=None, + fix_nickname=False, + tag_nickname=False) + + options, args = parser.parse_args(arguments) + if len(args) != 0: + parser.error("Positional arguments are not recognized.") + if options.project_name is None: + parser.error("A project name must be specified.") + + tree = etree.parse(sys.stdin) + massage( + root=tree.getroot(), + project_name=options.project_name, + fix_nickname=options.fix_nickname, + tag_nickname=options.tag_nickname) + tree.write( + sys.stdout, encoding='utf-8', + pretty_print=True, xml_declaration=True) + + return 0 + + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:]))
_______________________________________________ Mailing list: https://launchpad.net/~launchpad-reviewers Post to : [email protected] Unsubscribe : https://launchpad.net/~launchpad-reviewers More help : https://help.launchpad.net/ListHelp

