Thought I'd help out a bit with the documentation effort by writing a tool to import the current documentation.
The facts: * it's written in javascript (rhino) to lower the hacking-barrier * it imports the xdocs starting from the source tree. It translates them to html with an XSL and cleans them up with the htmlcleaner (so that they would look as if edited through the daisy-wiki) * it imports the images * while creating the documents and images, a mapping between original filenames and daisy document IDs is kept, and in a second pass the links in all documents are translated. What still needs to be done & issues: * the current XSL just contains the bare minimum to get something done (I focussed my efforts on the import.js), someone needs to systematically look at all the tags in the document-v10 dtd to see if they're handled correctly (e.g. <dl> is still todo). * testing/verifying of the results * only document-v10 based documents are supported, if necessary support for faqs and others could be added (= mainly xsl work) * Daisy doesn't have a <code>-like tag, we need to decide what to do with this. Daisy doesn't have this since the Mozilla/IE editor APIs don't support the creation of this type of tag. How to use: * save the two attached files somewhere * customize the configuration variables on top of import.js * download/install a daisy distro (version 1.3-M2) * run with "$DAISY_HOME/bin/daisy-js import.js" -- Bruno Dumon http://outerthought.org/ Outerthought - Open Source, Java & XML Competence Support Center [EMAIL PROTECTED] [EMAIL PROTECTED]
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:template match="/"> <xsl:apply-templates select="document/body"/> </xsl:template> <xsl:template match="document/body"> <html> <body> <xsl:apply-templates/> </body> </html> </xsl:template> <xsl:template match="s1"> <h1><xsl:value-of select="@title"/></h1> <xsl:apply-templates/> </xsl:template> <xsl:template match="s2"> <h2><xsl:value-of select="@title"/></h2> <xsl:apply-templates/> </xsl:template> <xsl:template match="s3"> <h3><xsl:value-of select="@title"/></h3> <xsl:apply-templates/> </xsl:template> <xsl:template match="s4"> <h4><xsl:value-of select="@title"/></h4> <xsl:apply-templates/> </xsl:template> <xsl:template match="sl"> <ul><xsl:apply-templates/></ul> </xsl:template> <xsl:template match="link|connect|jump|fork"> <a><xsl:copy-of select="@*"/><xsl:apply-templates/></a> </xsl:template> <xsl:template match="figure"> <img><xsl:copy-of select="@*"/><xsl:apply-templates/></img> </xsl:template> <xsl:template match="source"> <pre><xsl:apply-templates/></pre> </xsl:template> <xsl:template match="note"> <p class="note"><xsl:apply-templates/></p> </xsl:template> <xsl:template match="fixme"> <p class="fixme"><xsl:apply-templates/></p> </xsl:template> <xsl:template match="@*|node()"> <xsl:copy> <xsl:apply-templates select="@*|node()"/> </xsl:copy> </xsl:template> </xsl:stylesheet>
importPackage(java.io); importPackage(Packages.org.outerj.daisy.repository); importClass(Packages.org.outerj.daisy.repository.clientimpl.RemoteRepositoryManager); // Configuration var docRoot = "/home/bruno/oss/cocoon-2.1.7/src/documentation/xdocs/" var imagesRoot = "/home/bruno/oss/cocoon-2.1.7/src/documentation/images/"; var xsl = "document-to-daisyhtml.xsl"; var htmlcleanerXml = "/home/bruno/projects/daisy/trunk/daisy/applications/daisywiki/frontend/src/cocoon/webapp/daisy/resources/conf/htmlcleaner.xml"; var repositoryURL = "http://localhost:9263"; var repoUser = "testuser"; // note: must be a user with Administrator rights var repoPwd = "testuser"; var collection = "coolsite"; // collection in which to put the documents // connect to repository var repositoryManager = new RemoteRepositoryManager(repositoryURL, new Credentials(repoUser, repoPwd)); var repository = repositoryManager.getRepository(new Credentials(repoUser, repoPwd)); var activeRoleIds = java.lang.reflect.Array.newInstance(java.lang.Long.TYPE, 1); activeRoleIds[0] = 1; repository.setActiveRoleIds(activeRoleIds); // build stylesheet template var stylesheetSource = new Packages.javax.xml.transform.stream.StreamSource(xsl); var transformerFactory = Packages.javax.xml.transform.TransformerFactory.newInstance(); var stylesheetTemplate = transformerFactory.newTemplates(stylesheetSource); // build html cleaner template var htmlcleanerconf = new java.io.File(htmlcleanerXml); if (!htmlcleanerconf.exists()) { print("HTML Cleaner configuration file not found at: " + htmlcleanerconf.getAbsolutePath()); quit(); } var htmlCleanerFactory = new Packages.org.outerj.daisy.htmlcleaner.HtmlCleanerFactory(); var htmlCleanerTemplate = htmlCleanerFactory.buildTemplate(new Packages.org.xml.sax.InputSource(htmlcleanerconf.getAbsolutePath())); // mapping of daisy ids and files var docids = []; // list of created document ids (redundant) var map = new Object(); // mapping of document file URI (with .html extension instead of .xml) to created daisy document ID var imagesMap = new Object(); // mapping of image file name to created daisy document ID // fetch collection var daisyCollection = repository.getCollectionManager().getCollection(collection, false); // begin! importImages(); importXdocsRecursive(new File(docRoot)); postProcessLinks(); // Recursively processes a directory containing xdoc documents. function importXdocsRecursive(dir) { var files = dir.listFiles(); for (var i = 0; i < files.length; i++) { var file = files[i]; if (file.getName().endsWith(".xml") && file.getName() != "book.xml") { var document = parse(file.getAbsolutePath()); print("parsed " + file.getAbsolutePath()); var title = extractTitle(document, file.getName()); var doctype = document.getDoctype(); var usedoc = false; if (doctype == null) { print("SKIPPED file because it has no doctype: " + file.getAbsolutePath()); } else if (doctype.getPublicId() == "-//APACHE//DTD Documentation V1.0//EN") { document = transformToDaisyHtml(document); usedoc = true; } else { print("SKIPPED file because it has an unsupported doctype: " + file.getAbsolutePath()); } if (usedoc) { handleLinks(document, file); var serializedDoc = serializeDoc(document); // create doc in daisy var daisyDoc = repository.createDocument(title, "SimpleDocument"); daisyDoc.setPart("SimpleDocumentContent", "text/xml", serializedDoc); daisyDoc.addToCollection(daisyCollection); daisyDoc.save(); map[changeExtension(file.toURI().toString())] = daisyDoc.getId(); docids.push(daisyDoc.getId()); } } else if (file.isDirectory()) { importXdocsRecursive(file); } else { print("SKIPPED file " + file.getAbsolutePath()); } } } // change .xml to .html extension function changeExtension(uri) { if (uri.endsWith(".xml")) { return uri.substring(0, uri.length() - 4) + ".html"; } else { return uri; } } // parses an xml file or stream function parse(data) { // directly construct xerces parser since this allows to set some useful options, such as ignoring the dtd var parser = new Packages.org.apache.xerces.parsers.DOMParser(); parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); parser.setFeature("http://apache.org/xml/features/create-cdata-nodes", false); parser.setFeature("http://xml.org/sax/features/external-general-entities", false); parser.setFeature("http://xml.org/sax/features/external-parameter-entities", false); parser.parse(new Packages.org.xml.sax.InputSource(data)); return parser.getDocument(); } // extracts the document/header/title function extractTitle(document, defaultTitle) { var titles = document.getElementsByTagName("title"); if (titles.getLength() < 1) return defaultTitle; // hooray for dom! var titleString = ""; var titleEl = titles.item(0); var children = titleEl.childNodes; for (var i = 0; i < children.getLength(); i++) { if (children.item(i).nodeType == 3) // 3 = text node type titleString += children.item(0).getData(); } return titleString; } // translates an xdoc document to daisy html using xsl function transformToDaisyHtml(document) { var transformer = stylesheetTemplate.newTransformer(); var source = new Packages.javax.xml.transform.dom.DOMSource(document); var result = new Packages.javax.xml.transform.dom.DOMResult(); transformer.transform(source, result); return result.getNode(); } // absolutizes relative URLs in the file function handleLinks(document, file) { var docURI = file.toURI(); var links = document.getElementsByTagName("a"); for (var i = 0; i < links.getLength(); i++) { var hrefAttr = links.item(i).getAttributeNodeNS(null, "href"); if (hrefAttr != null) { try { var absURI = docURI.resolve(hrefAttr.getValue()); hrefAttr.setValue(absURI.toString()); } catch (e) { print("error resolving uri, skipping: " + hrefAttr.getValue()); } } } } // serialize dom tree and pull it through HTML cleaner to get exact HTML as it would have been saved in the daisy wiki function serializeDoc(document) { var transformer = transformerFactory.newTransformer(); var source = new Packages.javax.xml.transform.dom.DOMSource(document); var writer = new java.io.StringWriter(); var result = new Packages.javax.xml.transform.stream.StreamResult(writer); transformer.transform(source, result); var xml = writer.toString(); return htmlCleanerTemplate.newHtmlCleaner().cleanToByteArray(xml); } // for all created documents, translates links to other created documents function postProcessLinks() { print("============================================================================"); print("Will now translate links."); print("============================================================================"); for (var i = 0; i < docids.length; i++) { print("Working on doc " + docids[i] + " (" + i + "/" + docids.length + ")"); var daisyDoc = repository.getDocument(docids[i], true); var xmldoc = parse(daisyDoc.getPart("SimpleDocumentContent").getDataStream()); var didSomething = translateLinks(xmldoc); if (didSomething) { var serializedDoc = serializeDoc(xmldoc); daisyDoc.setPart("SimpleDocumentContent", "text/xml", serializedDoc); daisyDoc.save(); } } } function translateLinks(document) { var didSomething = false; // normal links var links = document.getElementsByTagName("a"); for (var i = 0; i < links.getLength(); i++) { var hrefAttr = links.item(i).getAttributeNodeNS(null, "href"); if (hrefAttr != null) { var id = map[hrefAttr.getValue()]; if (id != null) { hrefAttr.setValue("daisy:" + id); didSomething = true; print("translated link"); } } } // images var images = document.getElementsByTagName("img"); for (var i = 0; i < images.getLength(); i++) { var srcAttr = images.item(i).getAttributeNodeNS(null, "src"); if (srcAttr != null) { var src = srcAttr.getValue(); if (src.startsWith("images/")) { src = src.substring("images/".length); print ("looking up image " + src); var id = imagesMap[src]; if (id != null) { srcAttr.setValue("daisy:" + id); didSomething = true; print("translated image link"); } } } } return didSomething; } // imports the images function importImages() { print("===================================================="); print("Importing images"); print("===================================================="); var imagesDir = new File(imagesRoot); var files = imagesDir.listFiles(); for (var i = 0; i < files.length; i++) { var file = files[i]; var fileName = file.getName(); if (fileName.endsWith(".jpg") || fileName.endsWith(".gif") || fileName.endsWith(".png")) { print("Importing image " + file.getName()); var docName = fileName.substring(0, fileName.length() - 4); var daisyDoc = repository.createDocument(docName, "Image"); var mimeType; if (fileName.endsWith(".jpg")) mimeType = "image/jpeg"; else if (fileName.endsWith(".png")) mimeType = "image/png"; else if (fileName.endsWith(".gif")) mimeType = "image/gif"; daisyDoc.setPart("ImageData", mimeType, new FilePartDataSource(file)); daisyDoc.setPartFileName("ImageData", file.getName()); daisyDoc.addToCollection(daisyCollection); daisyDoc.save(); imagesMap[file.getName()] = daisyDoc.getId(); print("stored image " + file.getName()); } } }
