You can download those files from http://jsoftware.com/jwiki/JuneKim


07. 2. 13, June Kim <[EMAIL PROTECTED]>이(가) 작성:
There are many rough corners to refine, but here's the first working version.

First you need to create unitab.ijf file.

=======================================
require 'regex jfiles'
t=: 1!:1 <'temp\box\EastAsianWidth.txt'
point=:'^([0-9A-F]{4});(Na|N|H|A|W|F)' rxmatches t
range=:'([0-9A-F]{1,4})\.\.([0-9A-F]{1,4});(Na|N|H|A|W|F)' rxmatches t

pod=: }."1 point rxfrom t
rad=: }."1 range rxfrom t
widthcode=:;: 'N Na H A F W'
towc=: widthcode&i. NB. towidthcode

dfh=: 16&#. @ ('0123456789ABCDEF'&i.)
po=:(dfh each {."1 pod),. <"0 towc"0 {:"1 pod
ra=:(,&.>/"1 dfh each 2&{."1 rad),. <"0 towc"0 {:"1 rad
poa=:>{."1 po

fill=: 4 : 0
       'r c'=.x
       r=. ({.r)+ i. >: -~/ r
       ({.c) r}y
)

tab=:65536$0 NB. missing is N
tab=:(> {:"1 po) poa} tab
tab=:>./ ra fill"1 tab

jcreate 'temp\box\unitab'
(<tab) jappend 'temp\box\unitab'
==================================


Then you can use unitab from now on.

Save the following code in diswid.ijs

==================================
require 'jfiles'
tab=:>jread 'temp\box\unitab';0
diswidr=: [: >: [: 4&<: [: {&tab 3&u:@:ucp"1  NB.for rank 1
diswid=: diswidr`0:@.(''&-:)
===================================

Finally, we can have a verb to change a box into unicode, with
visually correct alignment. I've interspersed test codes(assert). You
may delete them. I double-checked all the tests run ok on my machine.
(Used DejaVu Sans Mono on Windows XP SP2, Korean version)

Note: Box lines are A(Ambiguous) width in unicode and I took them as
"narrow" for simplicity.

====================================
NB. Based on Roger's work at
http://www.jsoftware.com/jwiki/Essays/Boxed_Array_Display

require 'strings'
load 'temp\box\diswid.ijs'


unibox=: 3 : 0  NB. by Chris Burke
   fm=. (16+i.11) { a.
   msk=. y e. fm
   if. -. 1 e. msk do. y return. end.
   to=. 4 u: 9484 9516 9488 9500 9532 9508 9492 9524 9496 9474 9472
   y=. ucp y
   msk=. y e. fm
   un=. to {~ fm i. msk#y
   utf8 un (I.msk) } y
)


boxed  =: 32 = 3!:0
mt     =: 0 e. $
boxc   =: 9!:6 ''
ub=:[EMAIL PROTECTED]
boxcu=:ub boxc

tcorn  =: 2  0{boxcu
tint   =: 1 10{boxcu
bcorn  =: 8  6{boxcu
bint   =: 7 10{boxcu

todisplay=:[:>(([ ,LF , ])&.>)/


displaywidth=:[:>./ ([:>./ +/"1@:diswid) &>

toplines=: 3 : 0
       (< (displaywidth y) $ ub _1{boxc),y
)

te=: ucp each '──';'한'
assert te-:toplines <ucp '한'

te=: ucp each '───';'한';'abc'
assert te-:toplines ucp each '한';'abc'


leftlines=: 3 : 0
       ,&.>/"1 (<_2{ boxcu),. y
)
le=: ucp each '│한';'│abc';'│고'
assert le-:leftlines ucp each '한';'abc';'고'

topleftpoint=:(4 {boxc)&((<0 0)})
topleftpoint=: 3 : 0
       p=. {.4 { boxcu
       f=.>{.y
       (<p 0}f),}.y
)
tl=: ucp each '┼──';'│bc';'│고'
assert tl-:topleftpoint ucp each '───';'│bc';'│고'

topleft=: topleftpoint @ leftlines @ toplines

e=: <@dtb;._2 ucp 0 : 0
┼───
│한
│abc
)
assert e-:topleft ucp each '한';'abc'

take=: 4 : 0 NB. x is display size (height,dwidth)
       'r c'=.x
       l=. r{.y
       ((#+(c- [:+/ diswid)) {. ' '"_^:mt)   each l
)


e=: <@('. '&charsub)@dtb;._2 ucp 0 : 0
한.
...
...
)
assert 2 3 3 -: ,$&> e
assert e-:(3 3) take <ucp '한'

e=: <@('. '&charsub)@dtb;._2 ucp 0 : 0
한a..
b....
한한.
.....
)
assert 4 5 3 5 -: ,$&> e
assert e-:(4 5) take ucp each '한a';'b';'한한'


b1=: <@dtb;._2 ucp 0 : 0
de
ab
)

b2=: <@dtb;._2 ucp 0 : 0
def
abc
g한
jkl
)
b3=: <@dtb;._2 ucp 0 : 0
mno
)

ball=:b1;<b2
inside =: 1 1&}. @: ; @: (,.&.>/"1) @: (topleft&.>)
NB. todisplay ,&.>/"1 |:> topleft each (<4 3) (take&.>) ball

e=:  <@('. '&charsub)@dtb;._2 ucp 0 : 0
de.│def.
ab.│abc.
...│g한.
...│jkl.
───┼────
def│mno.
abc│....
g한│....
jkl│....
...│....
)

ball2=:ball,: b2;<b3
inside=:[: 1&}. each [: 1&}. [: ; (,&.>&.>/"1) @: (topleft each)
assert e-: inside ({4 5;3 4) (take&.>) ball2

edge   =: ,@(1&,.)@[ [EMAIL PROTECTED] +:@[EMAIL PROTECTED] $ ]
right  =: edge&(5 9{boxcu)@>@(0&{)@[ ,~&.>"1 ]

e=: <@dtb;._2 ucp 0 : 0
aaaaa│
aaaaa┤
aaaaa│
aaaaa│
)
assert e-:(1 2;2 2) right  4$<ucp 'aaaaa'

left   =: (edge )&(3 9{boxcu)@>@(0&{)@[ ,&.>"1 ]
e=: <@dtb;._2 ucp 0 : 0
│aaaaa
├aaaaa
│aaaaa
│aaaaa
)
assert e-:(1 2;2 2) left  4$<ucp 'aaaaa'

bot    =: ([: < 1&|.@(bcorn&,)@(edge&bint)@>@(1&{)@[) ,"1~ ]
e=: <@dtb;._2 ucp 0 : 0
 aaaaa
 aaaaa
 aaaaa
 aaaaa
└──┴──┘
)
assert e-:(1 2;2 2) bot 4$<ucp ' aaaaa'

top    =: ([:< 1&|.@(tcorn&,)@(edge&tint)@>@(1&{)@[) ,"1  ]
e=: <@dtb;._2 ucp 0 : 0
┌──┬──┐
 aaaaa
 aaaaa
 aaaaa
 aaaaa
)
assert e-:(1 2;2 2) top 4$<ucp ' aaaaa'

perim  =: [ top [ bot [ left right

e=: <@dtb;._2 ucp 0 : 0
┌──┬──┐
│aaaaa│
├aaaaa┤
│aaaaa│
│aaaaa│
└──┴──┘
)
assert e-:(1 2;2 2) perim 4$<ucp 'aaaaa'

frame=:[ perim [EMAIL PROTECTED] inside@:(take&.>)"2 ,:^:(1 = [EMAIL 
PROTECTED])@]

e=: <@dtb;._2 ucp 0 : 0
┌───┬────┐
│de │def │
│ab │abc │
│   │g한 │
│   │jkl │
├───┼────┤
│def│mno │
│abc│    │
│g한│    │
│jkl│    │
│   │    │
└───┴────┘
)
assert e-:(4 5;3 4) frame ball2

sh =: (*/@}: , {:)@(1&,)@$ $ ,

matsize=:$&>
matsize=: 3 : 0
       r=. #y
       c=.     >./ ([: +/ diswid)&> y
       r,c
)

assert 3 4-:matsize ucp each '한글';'abc';'a'
rc     =: (>./@sh&.>) @: (,.@|:"2@:(0&{"1);1&{"1) @: (matsize&>)
ball3=:    ball2 ,. ball
assert (4 4;3 3 3)-:rc ball3

bl     =: }.@(,&0)@(+/)@(0&=)@(|/ [EMAIL PROTECTED]@(,&1))
mask   =: 1&,. #&, ,.&0@>:@[EMAIL PROTECTED]


shline=:*/@(1&,)@$ $ ,  NB. into 2-dimension (connected)
assert (4$<'aa')-: shline <"1] 2 2 2 $ 'a'

rows   =: */[EMAIL PROTECTED]
assert  12 6 3-: rows <"1]2 2 3 3$'a'

mat    =: [EMAIL PROTECTED]@rows { a: , shline
e=: <@dtb;._2 ucp 0 : 0
aaa
aaa
aaa

aaa
aaa
aaa


aaa
aaa
aaa

aaa
aaa
aaa
)
assert e-:mat <"1]2 2 3 3$'a'

thorn1 =: (<@ucp"1@":)`thbox @. boxed
thbox  =: (rc frame ]) @: ([EMAIL PROTECTED]&.>)

fulltest=: 2 3 $ (i.2 3) ; 'ab한c' ; (i.4 1) ; (<2 2$'ussr') ; 12 ; <+&.>i.2 2 3
e=: <@dtb;._2 ucp 0 : 0
┌─────┬─────┬─────────┐
│0 1 2│ab한c│0        │
│3 4 5│     │1        │
│     │     │2        │
│     │     │3        │
├─────┼─────┼─────────┤
│┌──┐ │12   │┌─┬──┬──┐│
││us│ │     ││0│1 │2 ││
││sr│ │     │├─┼──┼──┤│
│└──┘ │     ││3│4 │5 ││
│     │     │└─┴──┴──┘│
│     │     │         │
│     │     │┌─┬──┬──┐│
│     │     ││6│7 │8 ││
│     │     │├─┼──┼──┤│
│     │     ││9│10│11││
│     │     │└─┴──┴──┘│
└─────┴─────┴─────────┘
)
assert e-:thbox fulltest
==============================



2007/2/13, June Kim <[EMAIL PROTECTED]>:
> I'm working on the code.
>
> In the mean time, here is the code for calculating display width:
>
> First you need to save the text file at
> http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
>
> ===============================================
> require 'regex jfiles'
> t=: 1!:1 <'EastAsianWidth.txt'
> point=:'^([0-9A-F]{4});(Na|N|H|A|W|F)' rxmatches t
> range=:'([0-9A-F]{1,4})\.\.([0-9A-F]{1,4});(Na|N|H|A|W|F)' rxmatches t
> jcreate 'unidatapoint'
> (< }."1 point rxfrom t) jappend 'unidatapoint'
> jcreate 'unidatarange'
> (< }."1 range rxfrom t) jappend 'unidatarange'
> ===============================================
>
> Now you have unidatapoint.ijf and unidatarange.ijf and are able to use them.
>
> ===============================================
> require 'jfiles'
>
> NB. N  : half
> NB. Na : half
> NB. H  : half
> NB. A  : half
> NB. F  : full
> NB. W  : full
>
> widthcode=:;: 'N Na H A F W'
> pod=:>jread 'unidatapoint';0
> rad=:>jread 'unidatarange';0
>
> towc=: widthcode&i. NB. towidthcode
>
> dfh=. 16&#. @ ('0123456789ABCDEF'&i.)
> po=:(dfh each {."1 pod),. <"0 towc"0 {:"1 pod
> ra=:(,&.>/"1 dfh each 2&{."1 rad),. <"0 towc"0 {:"1 rad
> poa=:>{."1 po
>
> fill=: 4 : 0
>        'r c'=.x
>        r=. ({.r)+ i. >: -~/ r
>        ({.c) r}y
> )
>
> tab=:65536$0 NB. missing is N
> tab=:(> {:"1 po) poa} tab
> tab=:>./ ra fill"1 tab
>
> diswid=: [: >: [: 4&<: [: {&tab 3&u:@ucp  NB.for rank 1
> ================================================
> For performance improvement, you could save tab using jfile and use
> it. Also, you could use more compact representation(using 3 bits to
> represent each character and compress the data).
>
> Usage Example:
>   diswid '한글ab!─'
> 2 2 1 1 1 1
>   (,:~ ((ucp'-') $~ +/@diswid)) ucp '한글ab!-'  NB. properly showing
> the top line in fixed-pitch font
> --------
> 한글ab!-
>
>
>
> 2007/2/13, Eric Iverson <[EMAIL PROTECTED]>:
> > The problem of proper display of boxed unicode data is an interesting
> > one. The first step to getting this fixed is for someone to provide a
> > working J model that takes an arbitrary boxed argument and produces the
> > character stream that properly displays it. If we had such a model we
> > might consider incorporating it into the JE.
> >
> > ----- Original Message -----
> > From: "June Kim" <[EMAIL PROTECTED]>
> > To: "General forum" <[email protected]>
> > Sent: Sunday, February 11, 2007 5:11 AM
> > Subject: Re: [Jgeneral] wd 'set ...' with box draw characters
> >
> >
> > > 2007/2/11, Chris Burke <[EMAIL PROTECTED]>:
> > >> June Kim wrote:
> > > [snip]
> > >> > Second, the box is broken with different width characters(that is,
> > >> > when the length of bytes of the encoding, and the width of the
> > >> > characters on display don't match). What is the usual way of
> > >> > solving
> > >> > it in other programming languages? There is a unicode standard for
> > >> > character widths. http://unicode.org/reports/tr11/
> > >> >
> > >> > Python implements that standard(along with others) in unicodedata
> > >> > module.
> > >> >
> > >> >>>> unicodedata.east_asian_width(u'한')
> > >> > 'W'
> > >> >>>> unicodedata.east_asian_width(u'a')
> > >> > 'Na'
> > >> >
> > >> > (u specifies the following string is unicode. east_asian_width
> > >> > returns
> > >> > the width of the character, not only for east asian characters but
> > >> > all
> > >> > unicode characters; it's got a narrow name due to its history)
> > >> >
> > > [snip]
> > >>
> > >> If you are having problems with display, it is because of the font,
> > >> not
> > >> because we are not using unicode.
> > > [snip]
> > >
> > > When a string is boxed and the string includes characters that have
> > > different width to the byte lenghts, then the box is broken in J. It
> > > is not because of the font. It is because J makes an assumption that
> > > every character's width is same with its byte length, which is
> > > obviously false in many writting+encoding systems, including east
> > > asians. We can definitely say J's box display isn't internationalized
> > > yet.
> > >
> > > For example, 54620 (in unicode code point) is a Korean character,
> > > which is pronounced as "han". It's width is "Wide"(twice wide as latin
> > > alphabets)
> > >
> > >   han=.4 u: 54620
> > >   <han
> > > +---+
> > > |한|
> > > +---+
> > >   <8 u: han
> > > +---+
> > > |한|
> > > +---+
> > >
> > > Since J counts the byte length for determining character's width, and
> > > the byte length for han is 3 in UTF-8( 3-: #8 u: han ), the box's
> > > horizontal character '-'(of which width is "Narrow") is printed three
> > > times, and on the display the box is broken.
> > >
> >
> >
> > 
--------------------------------------------------------------------------------
> >
> >
> > > ----------------------------------------------------------------------
> > > For information about J forums see http://www.jsoftware.com/forums.htm
> >
> > ----------------------------------------------------------------------
> > For information about J forums see http://www.jsoftware.com/forums.htm
> >
>

----------------------------------------------------------------------
For information about J forums see http://www.jsoftware.com/forums.htm

Reply via email to