On 8/9/2023 12:10 PM, denis.ma...@unibe.ch wrote:
Keith, you can also check hyphenations using a script:

-- check-hyphens.lua
--[[
     analyze hyphenations based on a ConTeXt log file
     enable hyphenation tracking in the ConTeXt file with
     \enabletrackers[hyphenation.applied]
     then run this script with
     lua check-hyphens.lua input_file whitelist.ending
     for the input_file we assume .log, so no need to add this
     for the whitelist a file ending has to be supplied
     the whitelist is optional
]]

-- local lines = string.splitlines(io.loaddata("oeps.tex")or "") or { }

-- local pprint = require('pprint')

function main (input_file, whitelist_file)
     local lines = lines_from(input_file .. ".log")
     local whitelist = {}
     if whitelist_file == nil then
         whitelist = {}
     else
         whitelist = lines_from(whitelist_file)
     end
     --pprint (lines)
     --pprint (whitelist)
     local filteredWordlist = filterHyphenationsWordlist
                 (cleanLines
                     (getHyphenationLines(lines)),
                     whitelist)
     -- pprint(filteredWordlist)
     saveResultsToFile(filteredWordlist, 'check-hyphens.log')
end

-- see if the file exists

-- http://lua-users.org/wiki/FileInputOutput

-- see if the file exists
function file_exists(file)
     local f = io.open(file, "rb")
     if f then f:close() end
     return f ~= nil
end
-- get all lines from a file, returns an empty
-- list/table if the file does not exist
function lines_from(file)
     if not file_exists(file) then return {} end
     local lines = {}
     for line in io.lines(file) do
         lines[#lines + 1] = line
     end
     return lines
end

-- String testing
function starts_with(str, start)
     return str:sub(1, #start) == start
end

-- get relevant lines
function getHyphenationLines(lines)
     local lines_with_hyphenations = {}
     for k,v in pairs(lines) do
         if
             (starts_with(v, "hyphenated")
             and not string.find(v, "start hyphenated words")
             and not string.find(v, "stop hyphenated words"))
         then table.insert(lines_with_hyphenations, v) end
     end
     return lines_with_hyphenations
end

-- String cleaning
-- wrapper functions

function cleanLines (xs)
     local cleanedLines = {}
     for k,v in pairs(xs) do
         table.insert(cleanedLines, cleanLine(v))
     end
     return cleanedLines
end

function cleanLine (x)
     return removeTrailingPunctuation(getWord(x))
end

-- 1. Start reading at colon
function getWord(x)
     -- wir lesen aber Zeichen 26
     return string.sub(x,26)
end

-- 2. Remove trailing punctuation
function removeTrailingPunctuation (x)
     if string.find(x, ',') then
         return x:sub(1, -2)
     else
         return x
     end
end

-- test if word is in second list
function inList (x, list)
     for k,v in ipairs(list) do
         if v == x then
             return true
         end
     end
     return nil
end

-- Filter hyphenated words based on second list (whitelist)
function filterHyphenationsWordlist (xs, list)
     local result = {}
     for k,v in ipairs(xs) do
         if not inList(v, list) then table.insert (result, v) end
     end
     return result
end

function saveResultsToFile(results, output_file)
     -- Opens a file in write mode
     output_file = io.open("check_hyphens.log", "w")
     -- sets the default output file as output_file
     io.output(output_file)
     -- iterate oiver
     for k,v in ipairs(results) do
         io.write(v..'\n')
     end
     -- closes the open file
     io.close(output_file)
end

-- Run
main(arg[1], arg[2])
Ok, a little lua lesson, if you don't mind.

---- xxx.tex ----

\enabletrackers[hyphenation.applied]

\starttext
    \input tufte
\stoptext

---- xxx.tmp ----

re-fine

---- xxx.lua ----

local function check(logname,whitename)
    if not logname then
        return
    end
    local data = io.loaddata(logname) or ""
    if data == "" then
        return
    end
local blob = string.match(data,"start hyphenated words(.-)stop hyphenated words")
    if not blob then
        return
    end
local white = table.tohash(string.splitlines(whitename and io.loaddata(whitename) or ""))
    for n, s in string.gmatch(blob,"(%d+) *: (%S+)") do
        if white[s] then
            -- were good
        else
            print(n,s)
        end
    end
end

check(environment.files[1],environment.files[2])

-- print("TEST 1")
-- check("xxx.log")
-- print("TEST 2")
-- check("xxx.log","xxx.tmp")

-------------------

>mtxrun --script xxx xxx.log
1       dis-tinguish
1       harmo-nize
1       re-fine

>mtxrun --script xxx xxx.log xxx.tmp
1       dis-tinguish
1       harmo-nize

That said, i wonder if we should add the filename, just in case one includes 20 files and a whitelist could be an option to the tracker.

Now the good news is that the tracker is actually already a bit more clever. After a run you will see

  xxx-hyphenation-new.lua

that has the hyphenated words (not the numbers)

and you can make a whitelist

  xxx-hyphenation-old.lua

in which case you only get the new ones.

Hans

-----------------------------------------------------------------
                                          Hans Hagen | PRAGMA ADE
              Ridderstraat 27 | 8061 GH Hasselt | The Netherlands
       tel: 038 477 53 69 | www.pragma-ade.nl | www.pragma-pod.nl
-----------------------------------------------------------------

___________________________________________________________________________________
If your question is of interest to others as well, please add an entry to the 
Wiki!

maillist : ntg-context@ntg.nl / https://www.ntg.nl/mailman/listinfo/ntg-context
webpage  : https://www.pragma-ade.nl / http://context.aanhet.net
archive  : https://bitbucket.org/phg/context-mirror/commits/
wiki     : https://contextgarden.net
___________________________________________________________________________________

Reply via email to