Hi,

I recently converted the Turkish hyphenation pattern "trhyph.tex" into unicode. The file is attached. This pattern set includes only those characters used in the current, modern Turkish alphabet ( http://en.wikipedia.org/wiki/Turkish_alphabet ). The pattern file at http://www.ctan.org/get/language/hyph-utf8/tex/generic/hyph-utf8/patterns/hyph-tr.tex has patterns for those characters which are not being used in current Turkish. The older characters may still be useful to those who deal with historical texts, but not really for the current day Turkish writer.

I hope this file can be turned into one which may be included in the hyph-utf8 package. Let me know if I can help in any way.

Best,

Ekin


% A mechanically generated Turkish Hyphenation table for TeX,
% using the University of Washington diacritical coding
% developed by P. A. MacKay for the Ottoman Texts Project.
% Slightly modified by H. Turgut Uyar.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% June 24, 2008
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Converted to unicode by Sukru Ekin Kocabas for use in XeTeX environment.
% Released in public domain, no rights reserved.
% Mappings can be found at http://unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT
% This page also looks good 
http://www.microsoft.com/globaldev/reference/sbcs/1254.mspx
% the following characters were modified
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% The first column is ISO 8859-9 Code number, the second column is UNICODE
% These are characters specific to Turkish
% 0xFD  0x0131  #       LATIN SMALL LETTER DOTLESS I
% 0xF6  0x00F6  #       LATIN SMALL LETTER O WITH DIAERESIS
% 0xFC  0x00FC  #       LATIN SMALL LETTER U WITH DIAERESIS
% 0xE7  0x00E7  #       LATIN SMALL LETTER C WITH CEDILLA
% 0xF0  0x011F  #       LATIN SMALL LETTER G WITH BREVE
% 0xFE  0x015F  #       LATIN SMALL LETTER S WITH CEDILLA
% 0xDD  0x0130  #       LATIN CAPITAL LETTER I WITH DOT ABOVE
% 0xD6  0x00D6  #       LATIN CAPITAL LETTER O WITH DIAERESIS
% 0xDC  0x00DC  #       LATIN CAPITAL LETTER U WITH DIAERESIS
% 0xC7  0x00C7  #       LATIN CAPITAL LETTER C WITH CEDILLA
% 0xD0  0x011E  #       LATIN CAPITAL LETTER G WITH BREVE
% 0xDE  0x015E  #       LATIN CAPITAL LETTER S WITH CEDILLA
% The upper-case lower-case mapping of the following two characters need to be 
modified for Turkish
% 0x49  0x0049  #       LATIN CAPITAL LETTER I
% 0x69  0x0069  #       LATIN SMALL LETTER I
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% As argued in the TeX book by Knuth pp. 452--454, TeX converts
% all characters which are letters into lower case before hyphenation
% This implies that the above characters should have \catcode 11
% Also, the conversion to go from lower to capital case should be defined
% First let's define them all as valid characters
\catcode"0131=11
\catcode"00F6=11
\catcode"00FC=11
\catcode"00E7=11
\catcode"011F=11
\catcode"015F=11
\catcode"0130=11
\catcode"00D6=11
\catcode"00DC=11
\catcode"00C7=11
\catcode"011E=11
\catcode"015E=11
% Alright. Let's now define the lowercase versions of all characters
\global\lccode"0131="0131
\global\lccode"0069="0069
\global\lccode"00F6="00F6
\global\lccode"00FC="00FC
\global\lccode"00E7="00E7
\global\lccode"011F="011F
\global\lccode"015F="015F
\global\lccode"0130="0069
\global\lccode"0049="0131
\global\lccode"00D6="00F6
\global\lccode"00DC="00FC
\global\lccode"00C7="00E7
\global\lccode"011E="011F
\global\lccode"015E="015F
% Now let's define the uppercase ones.
\global\uccode"0131="0049
\global\uccode"0069="0130
\global\uccode"00F6="00D6
\global\uccode"00FC="00DC
\global\uccode"00E7="00C7
\global\uccode"011F="011E
\global\uccode"015F="015E
\global\uccode"0130="0130
\global\uccode"0049="0049
\global\uccode"00D6="00D6
\global\uccode"00DC="00DC
\global\uccode"00C7="00C7
\global\uccode"011E="011E
\global\uccode"015E="015E

% Converted to ISO-8859-9 from T1 encoding of TeX by 'replace all' command of a 
text editor
% Then, converted to UTF-8 by the following command in Linux
% iconv --from-code=ISO-8859-9 --to-code=UTF-8 ./oldfile > ./newfile

\message{Turkish Hyphenation Patterns `trhyph' Version 2b <June 24, 2008>}
\message{Prepared for TeX systems using UNICODE, like XeTeX.}
\begingroup

\patterns{%
2a1
2e1
2ı1
2i1
2o1
2ö1
2u1
2ü1
1b1
1c1
1ç1
1d1
1f1
1g1
1ğ1
1h1
1j1
1k1
1l1
1m1
1n1
1p1
1r1
1s1
1ş1
1t1
1v1
1y1
1z1
2e2cek.
2bb
2bc
2bç
2bd
2bf
2bg
2bğ
2bh
2bj
2bk
2bl
2bm
2bn
2bp
2br
2bs
2bş
2bt
2bv
2by
2bz
2cb
2cc
2cç
2cd
2cf
2cg
2cğ
2ch
2cj
2ck
2cl
2cm
2cn
2cp
2cr
2cs
2cş
2ct
2cv
2cy
2cz
2çb
2çc
2çç
2çd
2çf
2çg
2çğ
2çh
2çj
2çk
2çl
2çm
2çn
2çp
2çr
2çs
2çş
2çt
2çv
2çy
2çz
2db
2dc
2dç
2dd
2df
2dg
2dğ
2dh
2dj
2dk
2dl
2dm
2dn
2dp
2dr
2ds
2dş
2dt
2dv
2dy
2dz
2fb
2fc
2fç
2fd
2ff
2fg
2fğ
2fh
2fj
2fk
2fl
2fm
2fn
2fp
2fr
2fs
2fş
2ft
2fv
2fy
2fz
2gb
2gc
2gç
2gd
2gf
2gg
2gğ
2gh
2gj
2gk
2gl
2gm
2gn
2gp
2gr
2gs
2gş
2gt
2gv
2gy
2gz
2ğb
2ğc
2ğç
2ğd
2ğf
2ğg
2ğğ
2ğh
2ğj
2ğk
2ğl
2ğm
2ğn
2ğp
2ğr
2ğs
2ğş
2ğt
2ğv
2ğy
2ğz
2hb
2hc
2hç
2hd
2hf
2hg
2hğ
2hh
2hj
2hk
2hl
2hm
2hn
2hp
2hr
2hs
2hş
2ht
2hv
2hy
2hz
2jb
2jc
2jç
2jd
2jf
2jg
2jğ
2jh
2jj
2jk
2jl
2jm
2jn
2jp
2jr
2js
2jş
2jt
2jv
2jy
2jz
2kb
2kc
2kç
2kd
2kf
2kg
2kğ
2kh
2kj
2kk
2kl
2km
2kn
2kp
2kr
2ks
2kş
2kt
2kv
2ky
2kz
2lb
2lc
2lç
2ld
2lf
2lg
2lğ
2lh
2lj
2lk
2ll
2lm
2ln
2lp
2lr
2ls
2lş
2lt
2lv
2ly
2lz
2mb
2mc
2mç
2md
2mf
2mg
2mğ
2mh
2mj
2mk
2ml
2mm
2mn
2mp
2mr
2ms
2mş
2mt
2mv
2my
2mz
2nb
2nc
2nç
2nd
2nf
2ng
2nğ
2nh
2nj
2nk
2nl
2nm
2nn
2np
2nr
2ns
2nş
2nt
2nv
2ny
2nz
2pb
2pc
2pç
2pd
2pf
2pg
2pğ
2ph
2pj
2pk
2pl
2pm
2pn
2pp
2pr
2ps
2pş
2pt
2pv
2py
2pz
2rb
2rc
2rç
2rd
2rf
2rg
2rğ
2rh
2rj
2rk
2rl
2rm
2rn
2rp
2rr
2rs
2rş
2rt
2rv
2ry
2rz
2sb
2sc
2sç
2sd
2sf
2sg
2sğ
2sh
2sj
2sk
2sl
2sm
2sn
2sp
2sr
2ss
2sş
2st
2sv
2sy
2sz
2şb
2şc
2şç
2şd
2şf
2şg
2şğ
2şh
2şj
2şk
2şl
2şm
2şn
2şp
2şr
2şs
2şş
2şt
2şv
2şy
2şz
2tb
2tc
2tç
2td
2tf
2tg
2tğ
2th
2tj
2tk
2tl
2tm
2tn
2tp
2tr
2ts
2tş
2tt
2tv
2ty
2tz
2vb
2vc
2vç
2vd
2vf
2vg
2vğ
2vh
2vj
2vk
2vl
2vm
2vn
2vp
2vr
2vs
2vş
2vt
2vv
2vy
2vz
2yb
2yc
2yç
2yd
2yf
2yg
2yğ
2yh
2yj
2yk
2yl
2ym
2yn
2yp
2yr
2ys
2yş
2yt
2yv
2yy
2yz
2zb
2zc
2zç
2zd
2zf
2zg
2zğ
2zh
2zj
2zk
2zl
2zm
2zn
2zp
2zr
2zs
2zş
2zt
2zv
2zy
2zz
a3a2
a3e2
a3ı2
a3i2
a3o2
a3ö2
a3u2
a3ü2
e3a2
e3e2
e3ı2
e3i2
e3o2
e3ö2
e3u2
e3ü2
ı3a2
ı3e2
ı3ı2
ı3i2
ı3o2
ı3ö2
ı3u2
ı3ü2
i3a2
i3e2
i3ı2
i3i2
i3o2
i3ö2
i3u2
i3ü2
o3a2
o3e2
o3ı2
o3i2
o3o2
o3ö2
o3u2
o3ü2
ö3a2
ö3e2
ö3ı2
ö3i2
ö3o2
ö3ö2
ö3u2
ö3ü2
u3a2
u3e2
u3ı2
u3i2
u3o2
u3ö2
u3u2
u3ü2
ü3a2
ü3e2
ü3ı2
ü3i2
ü3o2
ü3ö2
ü3u2
ü3ü2
tu4r4k
m1t4rak
}
\endgroup
\endinput

Reply via email to