master] Refactor EPUB metadata extraction functions.

Josh Hieronymus Fri, 13 Sep 2013 12:34:08 -0700

The branch, epub/master, has been updated.

- Log -----------------------------------------------------------------


commit ceabe988b16ac69754f4da78880cd02a2dfcbb53
Author: Josh Hieronymus <[email protected]>
Date:   Fri Sep 13 01:46:20 2013 -0400

    Refactor EPUB metadata extraction functions.
    
    The EPUB metadata extraction functions were repetitive, so moving
    common elements to a separate function makes it easier to add new
    metadata extraction functions and modify existing ones.

diff --git a/lib/scripts/epub/epub_xhtml_utilities.py 
b/lib/scripts/epub/epub_xhtml_utilities.py
index 3252e4a..f8f474c 100644
--- a/lib/scripts/epub/epub_xhtml_utilities.py
+++ b/lib/scripts/epub/epub_xhtml_utilities.py
@@ -14,31 +14,41 @@ import xml.etree.ElementTree as ET
 # XML namespace used in the exported XHTML file
 _XHTML_XMLNS = "http://www.w3.org/1999/xhtml";
 # metadata values to be used when the document's metadata cannot be extracted
-_DEFAULT_TITLE = "A LyX-Created EPUB Book"
-_DEFAULT_LANGUAGE = "en"
-_DEFAULT_IDENTIFIER = "123456789X"
-_DEFAULT_IDENTIFIER_SCHEME = "ISBN"
-_DEFAULT_AUTHOR = "LyX User"
-_DEFAULT_AUTHOR_FILE_AS = "User, LyX"
-
-def extract_attribute(xhtml_root, attribute_xhtml_class):
+_DEFAULT_METADATA_VALUES = {
+        "TITLE": "A LyX-Created EPUB Book",
+        "LANGUAGE": "en",
+        "IDENTIFIER": "123456789X",
+        "IDENTIFIER_SCHEME": "ISBN",
+        "AUTHOR": "LyX User",
+        "AUTHOR_FILE_AS": "User, Lyx"}
+# classes of XHTML elements containing metadata attributes 
+_XHTML_CLASSES = {
+        "TITLE": "epub-title",
+        "LANGUAGE": "epub-language",
+        "IDENTIFIER": "epub-identifier",
+        "IDENTIFIER_SCHEME": "epub-identifier-scheme",
+        "AUTHOR": "epub-author-reading-order",
+        "AUTHOR_FILE_AS": "epub-author-file-as-order"}
+
+def extract_attribute(xhtml_root, attribute):
     """Extract an EPUB metadata attribute from an XHTML file and return it.
     
     Keyword Arguments:
     xhtml_root -- an ElementTree Element representing the root element of a LyX
         document's exported XHTML file
     
-    attribute_xhtml_class -- the value of the class attribute of the XHTML 
element
-        containing the attribute
+    attribute -- a string representing the EPUB metadata attribute to be 
extracted
+        from the XHTML file
 
     """
+    attribute_xhtml_class = _XHTML_CLASSES[attribute]
     attribute_container_xpath_schema = ".//{{{0}}}div[@class='epub-metadata 
{1}']/{{{0}}}a"
     attribute_container_xpath = 
attribute_container_xpath_schema.format(_XHTML_XMLNS, attribute_xhtml_class)
     attribute_container = xhtml_root.find(attribute_container_xpath)
     if attribute_container is not None:
         return attribute_container.tail
     
-    return None
+    return _DEFAULT_METADATA_VALUES[attribute]
 
 def extract_title(xhtml_root):
     """Extract an EPUB title from an XHTML file and return it.
@@ -48,11 +58,7 @@ def extract_title(xhtml_root):
         document's exported XHTML file
 
     """
-    title_xhtml_class = "epub-title"
-    title = extract_attribute(xhtml_root, title_xhtml_class)
-    if title is None:
-        title = _DEFAULT_TITLE
-    
+    title = extract_attribute(xhtml_root, "TITLE")  
     return title
 
 def extract_language(xhtml_root):
@@ -63,11 +69,7 @@ def extract_language(xhtml_root):
         document's exported XHTML file
 
     """
-    language_xhtml_class = "epub-language"
-    language = extract_attribute(xhtml_root, language_xhtml_class)
-    if language is None:
-        language = _DEFAULT_LANGUAGE
-    
+    language = extract_attribute(xhtml_root, "LANGUAGE")
     return language
 
 def extract_identifier(xhtml_root):
@@ -78,11 +80,7 @@ def extract_identifier(xhtml_root):
         document's exported XHTML file
 
     """
-    identifier_xhtml_class = "epub-identifier"
-    identifier = extract_attribute(xhtml_root, identifier_xhtml_class)
-    if identifier is None:
-        identifier = _DEFAULT_IDENTIFIER
-    
+    identifier = extract_attribute(xhtml_root, "IDENTIFIER")
     return identifier
 
 def extract_identifier_scheme(xhtml_root):
@@ -93,11 +91,7 @@ def extract_identifier_scheme(xhtml_root):
         document's exported XHTML file
 
     """
-    identifier_scheme_xhtml_class = "epub-identifier-scheme"
-    identifier_scheme = extract_attribute(xhtml_root, 
identifier_scheme_xhtml_class)
-    if identifier_scheme is None:
-        identifier_scheme = _DEFAULT_IDENTIFIER_SCHEME
-    
+    identifier_scheme = extract_attribute(xhtml_root, "IDENTIFIER_SCHEME")
     return identifier_scheme
 
 def extract_author(xhtml_root):
@@ -108,11 +102,7 @@ def extract_author(xhtml_root):
         document's exported XHTML file
 
     """
-    author_xhtml_class = "epub-author-reading-order"
-    author = extract_attribute(xhtml_root, author_xhtml_class)
-    if author is None:
-        author = _DEFAULT_AUTHOR
-    
+    author = extract_attribute(xhtml_root, "AUTHOR")
     return author
 
 def extract_author_file_as(xhtml_root):
@@ -123,9 +113,5 @@ def extract_author_file_as(xhtml_root):
         document's exported XHTML file
 
     """
-    author_file_as_xhtml_class = "epub-author-file-as-order"
-    author_file_as = extract_attribute(xhtml_root, author_file_as_xhtml_class)
-    if author_file_as is None:
-        author_file_as = _DEFAULT_AUTHOR_FILE_AS
-    
+    author_file_as = extract_attribute(xhtml_root, "AUTHOR_FILE_AS")
     return author_file_as

-----------------------------------------------------------------------

Summary of changes:
 lib/scripts/epub/epub_xhtml_utilities.py |   68 ++++++++++++------------------
 1 files changed, 27 insertions(+), 41 deletions(-)


hooks/post-receive
-- 
Repositories for GSOC work

[LyX GSoC/epub/master] Refactor EPUB metadata extraction functions.

Reply via email to