The branch, epub/master, has been updated. - Log -----------------------------------------------------------------
commit ceabe988b16ac69754f4da78880cd02a2dfcbb53 Author: Josh Hieronymus <[email protected]> Date: Fri Sep 13 01:46:20 2013 -0400 Refactor EPUB metadata extraction functions. The EPUB metadata extraction functions were repetitive, so moving common elements to a separate function makes it easier to add new metadata extraction functions and modify existing ones. diff --git a/lib/scripts/epub/epub_xhtml_utilities.py b/lib/scripts/epub/epub_xhtml_utilities.py index 3252e4a..f8f474c 100644 --- a/lib/scripts/epub/epub_xhtml_utilities.py +++ b/lib/scripts/epub/epub_xhtml_utilities.py @@ -14,31 +14,41 @@ import xml.etree.ElementTree as ET # XML namespace used in the exported XHTML file _XHTML_XMLNS = "http://www.w3.org/1999/xhtml" # metadata values to be used when the document's metadata cannot be extracted -_DEFAULT_TITLE = "A LyX-Created EPUB Book" -_DEFAULT_LANGUAGE = "en" -_DEFAULT_IDENTIFIER = "123456789X" -_DEFAULT_IDENTIFIER_SCHEME = "ISBN" -_DEFAULT_AUTHOR = "LyX User" -_DEFAULT_AUTHOR_FILE_AS = "User, LyX" - -def extract_attribute(xhtml_root, attribute_xhtml_class): +_DEFAULT_METADATA_VALUES = { + "TITLE": "A LyX-Created EPUB Book", + "LANGUAGE": "en", + "IDENTIFIER": "123456789X", + "IDENTIFIER_SCHEME": "ISBN", + "AUTHOR": "LyX User", + "AUTHOR_FILE_AS": "User, Lyx"} +# classes of XHTML elements containing metadata attributes +_XHTML_CLASSES = { + "TITLE": "epub-title", + "LANGUAGE": "epub-language", + "IDENTIFIER": "epub-identifier", + "IDENTIFIER_SCHEME": "epub-identifier-scheme", + "AUTHOR": "epub-author-reading-order", + "AUTHOR_FILE_AS": "epub-author-file-as-order"} + +def extract_attribute(xhtml_root, attribute): """Extract an EPUB metadata attribute from an XHTML file and return it. Keyword Arguments: xhtml_root -- an ElementTree Element representing the root element of a LyX document's exported XHTML file - attribute_xhtml_class -- the value of the class attribute of the XHTML element - containing the attribute + attribute -- a string representing the EPUB metadata attribute to be extracted + from the XHTML file """ + attribute_xhtml_class = _XHTML_CLASSES[attribute] attribute_container_xpath_schema = ".//{{{0}}}div[@class='epub-metadata {1}']/{{{0}}}a" attribute_container_xpath = attribute_container_xpath_schema.format(_XHTML_XMLNS, attribute_xhtml_class) attribute_container = xhtml_root.find(attribute_container_xpath) if attribute_container is not None: return attribute_container.tail - return None + return _DEFAULT_METADATA_VALUES[attribute] def extract_title(xhtml_root): """Extract an EPUB title from an XHTML file and return it. @@ -48,11 +58,7 @@ def extract_title(xhtml_root): document's exported XHTML file """ - title_xhtml_class = "epub-title" - title = extract_attribute(xhtml_root, title_xhtml_class) - if title is None: - title = _DEFAULT_TITLE - + title = extract_attribute(xhtml_root, "TITLE") return title def extract_language(xhtml_root): @@ -63,11 +69,7 @@ def extract_language(xhtml_root): document's exported XHTML file """ - language_xhtml_class = "epub-language" - language = extract_attribute(xhtml_root, language_xhtml_class) - if language is None: - language = _DEFAULT_LANGUAGE - + language = extract_attribute(xhtml_root, "LANGUAGE") return language def extract_identifier(xhtml_root): @@ -78,11 +80,7 @@ def extract_identifier(xhtml_root): document's exported XHTML file """ - identifier_xhtml_class = "epub-identifier" - identifier = extract_attribute(xhtml_root, identifier_xhtml_class) - if identifier is None: - identifier = _DEFAULT_IDENTIFIER - + identifier = extract_attribute(xhtml_root, "IDENTIFIER") return identifier def extract_identifier_scheme(xhtml_root): @@ -93,11 +91,7 @@ def extract_identifier_scheme(xhtml_root): document's exported XHTML file """ - identifier_scheme_xhtml_class = "epub-identifier-scheme" - identifier_scheme = extract_attribute(xhtml_root, identifier_scheme_xhtml_class) - if identifier_scheme is None: - identifier_scheme = _DEFAULT_IDENTIFIER_SCHEME - + identifier_scheme = extract_attribute(xhtml_root, "IDENTIFIER_SCHEME") return identifier_scheme def extract_author(xhtml_root): @@ -108,11 +102,7 @@ def extract_author(xhtml_root): document's exported XHTML file """ - author_xhtml_class = "epub-author-reading-order" - author = extract_attribute(xhtml_root, author_xhtml_class) - if author is None: - author = _DEFAULT_AUTHOR - + author = extract_attribute(xhtml_root, "AUTHOR") return author def extract_author_file_as(xhtml_root): @@ -123,9 +113,5 @@ def extract_author_file_as(xhtml_root): document's exported XHTML file """ - author_file_as_xhtml_class = "epub-author-file-as-order" - author_file_as = extract_attribute(xhtml_root, author_file_as_xhtml_class) - if author_file_as is None: - author_file_as = _DEFAULT_AUTHOR_FILE_AS - + author_file_as = extract_attribute(xhtml_root, "AUTHOR_FILE_AS") return author_file_as ----------------------------------------------------------------------- Summary of changes: lib/scripts/epub/epub_xhtml_utilities.py | 68 ++++++++++++------------------ 1 files changed, 27 insertions(+), 41 deletions(-) hooks/post-receive -- Repositories for GSOC work
