Hi Hans,

On Sun, 2025-04-13 at 22:56 +0200, Hans Hagen wrote:
> \directlua {
>      local n = \fontid\font
>      local f = font.getfont(n)
>      local c = f.characters[65]
>      if c then
>          local u = c.tounicode
>          c.tounicode = "0042"
>          font.addcharacters(n, { characters = { [65] = c } })
>          f = font.getfont(n)
>          c = f.characters[65]
>          print(u,c.tounicode)
>      end
> }
>
> \def\luaglyphtounicode#1#2%
>    {\directlua {
>      local n = \fontid\font
>      local c = font.getfont(n).characters[\number#1]
>      if c then
>          c.tounicode = "#2"
>          font.addcharacters(n, { characters = { [\number#1] = c } })
>      end
>    }}
>
> % This needs to be done before the font is used!
>
> \luaglyphtounicode{66}{0043}
>
> AB % BC
>
> \bye

This code doesn't seem to work for me; when running "pdftotext" I get
"AB", not "BC". I get the same results with both the LuaTeX in TL25 and
with a self-built version from the latest sources at
gitlab.lisn.upsaclay.fr/texlive/luatex.

Actually, I can't get "tounicode" to work with TFM fonts at all.
Compiling this file with Plain LuaTeX

    \catcode`\%=12
    \directlua{
        pdf.setgentounicode(1)

        local function define_font(name, filename)
            local basename, extension = filename:match("^(.+)%.(.*)$")
            local filetype = ({
                otf = "opentype fonts",
                pfb = "type1 fonts",
                tfm = "tfm",
                ttf = "truetype fonts",
            })[extension]
            local fonttype = ({
                otf = "opentype",
                pfb = "type1",
                tfm = nil,
                ttf = "truetype",
            })[extension]

            local font_name
            if extension == "tfm" then
                font_name = basename
            else
                font_name = name
            end

            local encodingbytes
            if extension == "pfb" then
                encodingbytes = 2
            else
                encodingbytes = nil
            end

            local path = kpse.find_file(filename, filetype)

            chars = {}
            for i = 0x0, 0xFF do
                chars[i] = {
                    index = i,
                    width = tex.sp("1em"),
                    height = tex.sp("1em"),
                    depth = 0,
                    tounicode = { utf8.codepoint("it works!", 1, -1) },
                }
            end

            local id = font.define {
                name = font_name,
                filename = path,
                type = "real",
                format = fonttype,
                characters = chars,
                tounicode = 1,
                encodingbytes = encodingbytes,
            }
            token.set_char(name, id)
        end

        define_font("otf", "texgyrechorus-mediumitalic.otf")
        define_font("ttf", "NotoSans-Black.ttf")
        define_font("tfm", "ClearSans-Medium-tlf-ot1.tfm")
        define_font("typeone", "pcrro8a.pfb")
    }

    \nopagenumbers
    Initial: {a \par}
    OpenType: {\setfontid\otf a \par}
    TrueType: {\setfontid\ttf a \par}
    TeX Font Metrics: {\setfontid\tfm a \par}
    Type 1: {\setfontid\typeone a \par}

    \bye

gives the following output with "pdftotext":

    Initial: a
    OpenType: it works!
    TrueType: it works!
    TeX Font Metrics: a
    Type 1: it works!

Or if you prefer a ConTeXt example, this file

    % engine=luatex
    \start
        \definedfont[file:ClearSans-Medium-tlf-ot1.tfm]
        \startluacode
            pdf.setgentounicode(1)
            local current = font.fonts[font.current()]
            current.name = "ClearSans-Medium-tlf-ot1"
            current.tounicode = 1
            for _, char in pairs(current.characters) do
                char.tounicode = { utf8.codepoint("it works!", 1, -1) }
            end
            local id = font.define(current)
            token.set_char("tfm", id, "global")
        \stopluacode
    \stop

    \setuppagenumbering[state=stop]

    \starttext
        x{\setfontid\tfm a \par}
    \stoptext

gives the following output with "pdftotext":

    xa

Thanks,
-- Max

Reply via email to