Gerben, Maybe this is of some use. The Lua code in the file checks the xml and returns info when something is wrong. I am using it for example in my takenotes module as: <notes file="events/na-30416155-5-48v.xml" checkxml="yes"/> And this is a sketch of how it is embedded in that module: % Prerun an xml check. \doifinset{\xmlatt{#1}{checkxml}}{yes,on,true}% {\xmlcommand{#1}{.}{xmlcommon:checkxml}} % Execute this code if no error was found. etc. dr. Hans van der Meer |
\startluacode -- Define our namespace as hvdm hvdm = hvdm or {}
-- Function checks the correctness of an XML tree. hvdm.checkXMLtree = function (xml, linesep) local xml = xml and xml or "" local linesep = linesep and linesep or "\n" local badspace = {} -- Empty files do not need processing. if xml == "" then return "" end -- Step one is remove leading characters before first node. local leadingnode = string.find(xml, "<", 1) or 1 if leadingnode ~= 1 then xml = string.sub(xml, leadingnode, -1) end -- Strip all comments <!-- --> but node pairs inside make it complicated. local c1, c2 = string.find(xml, "<!--", 1, true) while c1 do -- Search for closing of comment. local c3, c4 = string.find(xml, "-->", c2 + 1, true) if c3 then -- Remove comment. xml = string.sub(xml, 1, c1 - 1) .. string.sub(xml, c4 + 1, -1) -- Search for next comment. c1, c2 = string.find(xml, "<!--", 1, true) else badspace[#badspace + 1] = " >>> Open comment <!-- Fix this first" c1 = nil end end -- Strip <?xml version ... ?> and DOCTYPE nodes. if string.find(xml, "<%?xml.*%?>") then xml = string.gsub(xml, "<%?xml.*%?>", "") else badspace[#badspace + 1] = " >>> missing or incorrect <?xml ?>" end xml = string.gsub(xml, "<.*DOCTYPE[^>]*>", "") -- Strip everything between > and <. xml = string.gsub(xml, ">[^<]*<", "><") -- Spot all cases of '< node', etc. which have bad spaces. for node in string.gmatch(xml, "</?%s+[^>]*") do badspace[#badspace + 1] = " >>> erroneous whitespace in " .. node .. ">" end for node in string.gmatch(xml, "<[^>]*%s+/?>") do badspace[#badspace + 1] = " >>> erroneous whitespace in " .. node end -- Remove the bad spaces. if #badspace > 0 then xml = string.gsub(xml, "<%s+", "<") xml = string.gsub(xml, "</%s+", "</") xml = string.gsub(xml, "%s+>", ">") xml = string.gsub(xml, "%s+/>", "/>") end -- Strip all <xx/> nodes, must be done before stripping attributes. xml = string.gsub(xml, "<[^>]*/>", "") -- Strip attributes. local fa1, fa2, fa3 = 0, 0, 0 while fa3 do local fa1,fa2 = string.find(xml, "<%S+%s+", 1) if fa1 then _, fa3 = string.find(xml, ">", fa2, true) if fa3 then xml = string.sub(xml, 1, fa2-1) .. string.sub(xml, fa3, -1) end end end -- Strip all remaining whitespace. xml = string.gsub(xml, "%s", "") local function nextnode(str, last) local newlast if last ~= nil then last, newlast = string.find(str, "</?[^>]*>", last + 1) end return newlast end -- At this point we have the bare <node> tree. -- Strip all adjacent pairs <node></node> recursively. -- The xml is correct when nothing is left. local function nextnode(str, last) local newlast if last ~= nil then last, newlast = string.find(str, "</?[^>]*>", last) end return newlast end -- Remove adjacent pairs <node></node>. local i1, i2, j1, j2 i1, i2 = 1, nextnode(xml, 1) if i2 ~= nil then j1, j2 = i2 + 1, nextnode(xml, i2 + 1) end -- Loop until nothing left for a correct xml tree. while j1 ~= nil do -- Check if we have node> and /node>. if string.sub(xml, i1 + 1, i2) == string.sub(xml, j1 + 2, j2) then -- Here is a matching pair, remove all of them. local removal = string.sub(xml, i1, i2) .. string.sub(xml, j1, j2) xml = string.gsub(xml, removal, "") -- Start over at the beginning. i1, i2 = 1, nextnode(xml, 1) if i2 ~= nil then j1, j2 = i2 + 1, nextnode(xml, i2 + 1) else j1 = nil end else -- Shift to next pair of nodes. if j2 ~= nil then i1, i2, j1, j2 = j1, j2, j2 + 1, nextnode(xml, j2 + 1) else j1 = nil end end end -- If something is leftover then the xml tree is not correct. if #xml > 0 then badspace[#badspace + 1] = " >>> " .. xml end -- Note that xml might end up as a single newline chracter. if #badspace > 0 then return "XML tree is not correct" .. linesep .. table.concat(badspace, linesep) .. linesep end return "" end -- end of checkXMLtree -- Function checks the correctness of an XML tree in given file. hvdm.checkXMLfile = function (filename) -- Filename must have suffix .xml. if hvdm.filesuffix(filename) ~= "xml" then return "Filename of " .. filename .. " missing suffix .xml" end -- Open the file for reading and check. local inputfile = io.open(filename, "r") if inputfile then -- Read contents into a string then close the file. local xml = inputfile:read("a") io.close(inputfile) -- Execute the test. local outcome = hvdm.checkXMLtree(xml, "\\crlf ") return outcome == "" and "" or "XML check of file " .. filename .. " : " .. outcome -- Note that an empty tree has no errors. end end -- end of checkXMLfile -- Function checks the correctness of an XML tree in given directory. hvdm.checkXMLfiles = function (directoryname) -- Make temp file for directory listing and open for writing. local tempname = "/tmp/hvdm_list_tmp.txt" local temp = io.open(tempname,"w+") -- Write diretory listingof .xml files to the temp file. os.execute("ls -1 " .. directoryname .. "/*" .. ".xml" .. " >" .. tempname) -- Reset temp file for reading of the directory entries. temp:seek("set") -- Retrieve line by line the filenames and put into a table. local filenames = {} local filename = temp:read("l") while filename ~= nil do filenames[#filenames + 1] = filename filename = temp:read("l") end -- Close the tempfile and remove it and its name. temp:close() os.remove(tempname) -- Initialize combined result. local outcome = "" -- Check one by one the xml files from the table. for _,name in ipairs(filenames) do outcome = outcome .. hvdm.checkXMLfile(name) end -- Return combined outcome. return outcome end -- end of checkXMLfiles \stopluacode
|
___________________________________________________________________________________ If your question is of interest to others as well, please add an entry to the Wiki! maillist : ntg-context@ntg.nl / http://www.ntg.nl/mailman/listinfo/ntg-context webpage : http://www.pragma-ade.nl / http://context.aanhet.net archive : https://bitbucket.org/phg/context-mirror/commits/ wiki : http://contextgarden.net ___________________________________________________________________________________