There are many rough corners to refine, but here's the first working version.

First you need to create unitab.ijf file.

=======================================
require 'regex jfiles'
t=: 1!:1 <'temp\box\EastAsianWidth.txt'
point=:'^([0-9A-F]{4});(Na|N|H|A|W|F)' rxmatches t
range=:'([0-9A-F]{1,4})\.\.([0-9A-F]{1,4});(Na|N|H|A|W|F)' rxmatches t

pod=: }."1 point rxfrom t
rad=: }."1 range rxfrom t
widthcode=:;: 'N Na H A F W'
towc=: widthcode&i. NB. towidthcode

dfh=: 16&#. @ ('0123456789ABCDEF'&i.)
po=:(dfh each {."1 pod),. <"0 towc"0 {:"1 pod
ra=:(,&.>/"1 dfh each 2&{."1 rad),. <"0 towc"0 {:"1 rad
poa=:>{."1 po

fill=: 4 : 0
        'r c'=.x
        r=. ({.r)+ i. >: -~/ r
        ({.c) r}y
)

tab=:65536$0 NB. missing is N
tab=:(> {:"1 po) poa} tab
tab=:>./ ra fill"1 tab

jcreate 'temp\box\unitab'
(<tab) jappend 'temp\box\unitab'
==================================


Then you can use unitab from now on.

Save the following code in diswid.ijs

==================================
require 'jfiles'
tab=:>jread 'temp\box\unitab';0
diswidr=: [: >: [: 4&<: [: {&tab 3&u:@:ucp"1  NB.for rank 1
diswid=: diswidr`0:@.(''&-:)
===================================

Finally, we can have a verb to change a box into unicode, with
visually correct alignment. I've interspersed test codes(assert). You
may delete them. I double-checked all the tests run ok on my machine.
(Used DejaVu Sans Mono on Windows XP SP2, Korean version)

Note: Box lines are A(Ambiguous) width in unicode and I took them as
"narrow" for simplicity.

====================================
NB. Based on Roger's work at
http://www.jsoftware.com/jwiki/Essays/Boxed_Array_Display

require 'strings'
load 'temp\box\diswid.ijs'


unibox=: 3 : 0  NB. by Chris Burke
   fm=. (16+i.11) { a.
   msk=. y e. fm
   if. -. 1 e. msk do. y return. end.
   to=. 4 u: 9484 9516 9488 9500 9532 9508 9492 9524 9496 9474 9472
   y=. ucp y
   msk=. y e. fm
   un=. to {~ fm i. msk#y
   utf8 un (I.msk) } y
)


boxed  =: 32 = 3!:0
mt     =: 0 e. $
boxc   =: 9!:6 ''
ub=:[EMAIL PROTECTED]
boxcu=:ub boxc

tcorn  =: 2  0{boxcu
tint   =: 1 10{boxcu
bcorn  =: 8  6{boxcu
bint   =: 7 10{boxcu

todisplay=:[:>(([ ,LF , ])&.>)/


displaywidth=:[:>./ ([:>./ +/"1@:diswid) &>

toplines=: 3 : 0
        (< (displaywidth y) $ ub _1{boxc),y
)

te=: ucp each '──';'한'
assert te-:toplines <ucp '한'

te=: ucp each '───';'한';'abc'
assert te-:toplines ucp each '한';'abc'


leftlines=: 3 : 0
        ,&.>/"1 (<_2{ boxcu),. y
)
le=: ucp each '│한';'│abc';'│고'
assert le-:leftlines ucp each '한';'abc';'고'

topleftpoint=:(4 {boxc)&((<0 0)})
topleftpoint=: 3 : 0
        p=. {.4 { boxcu
        f=.>{.y
        (<p 0}f),}.y
)
tl=: ucp each '┼──';'│bc';'│고'
assert tl-:topleftpoint ucp each '───';'│bc';'│고'

topleft=: topleftpoint @ leftlines @ toplines

e=: <@dtb;._2 ucp 0 : 0
┼───
│한
│abc
)
assert e-:topleft ucp each '한';'abc'

take=: 4 : 0 NB. x is display size (height,dwidth)
        'r c'=.x
        l=. r{.y
        ((#+(c- [:+/ diswid)) {. ' '"_^:mt)   each l
)


e=: <@('. '&charsub)@dtb;._2 ucp 0 : 0
한.
...
...
)
assert 2 3 3 -: ,$&> e
assert e-:(3 3) take <ucp '한'

e=: <@('. '&charsub)@dtb;._2 ucp 0 : 0
한a..
b....
한한.
.....
)
assert 4 5 3 5 -: ,$&> e
assert e-:(4 5) take ucp each '한a';'b';'한한'


b1=: <@dtb;._2 ucp 0 : 0
de
ab
)

b2=: <@dtb;._2 ucp 0 : 0
def
abc
g한
jkl
)
b3=: <@dtb;._2 ucp 0 : 0
mno
)

ball=:b1;<b2
inside =: 1 1&}. @: ; @: (,.&.>/"1) @: (topleft&.>)
NB. todisplay ,&.>/"1 |:> topleft each (<4 3) (take&.>) ball

e=:  <@('. '&charsub)@dtb;._2 ucp 0 : 0
de.│def.
ab.│abc.
...│g한.
...│jkl.
───┼────
def│mno.
abc│....
g한│....
jkl│....
...│....
)

ball2=:ball,: b2;<b3
inside=:[: 1&}. each [: 1&}. [: ; (,&.>&.>/"1) @: (topleft each)
assert e-: inside ({4 5;3 4) (take&.>) ball2

edge   =: ,@(1&,.)@[ [EMAIL PROTECTED] +:@[EMAIL PROTECTED] $ ]
right  =: edge&(5 9{boxcu)@>@(0&{)@[ ,~&.>"1 ]

e=: <@dtb;._2 ucp 0 : 0
aaaaa│
aaaaa┤
aaaaa│
aaaaa│
)
assert e-:(1 2;2 2) right  4$<ucp 'aaaaa'

left   =: (edge )&(3 9{boxcu)@>@(0&{)@[ ,&.>"1 ]
e=: <@dtb;._2 ucp 0 : 0
│aaaaa
├aaaaa
│aaaaa
│aaaaa
)
assert e-:(1 2;2 2) left  4$<ucp 'aaaaa'

bot    =: ([: < 1&|.@(bcorn&,)@(edge&bint)@>@(1&{)@[) ,"1~ ]
e=: <@dtb;._2 ucp 0 : 0
aaaaa
aaaaa
aaaaa
aaaaa
└──┴──┘
)
assert e-:(1 2;2 2) bot 4$<ucp ' aaaaa'

top    =: ([:< 1&|.@(tcorn&,)@(edge&tint)@>@(1&{)@[) ,"1  ]
e=: <@dtb;._2 ucp 0 : 0
┌──┬──┐
aaaaa
aaaaa
aaaaa
aaaaa
)
assert e-:(1 2;2 2) top 4$<ucp ' aaaaa'

perim  =: [ top [ bot [ left right

e=: <@dtb;._2 ucp 0 : 0
┌──┬──┐
│aaaaa│
├aaaaa┤
│aaaaa│
│aaaaa│
└──┴──┘
)
assert e-:(1 2;2 2) perim 4$<ucp 'aaaaa'

frame=:[ perim [EMAIL PROTECTED] inside@:(take&.>)"2 ,:^:(1 = [EMAIL 
PROTECTED])@]

e=: <@dtb;._2 ucp 0 : 0
┌───┬────┐
│de │def │
│ab │abc │
│   │g한 │
│   │jkl │
├───┼────┤
│def│mno │
│abc│    │
│g한│    │
│jkl│    │
│   │    │
└───┴────┘
)
assert e-:(4 5;3 4) frame ball2

sh =: (*/@}: , {:)@(1&,)@$ $ ,

matsize=:$&>
matsize=: 3 : 0
        r=. #y
        c=.     >./ ([: +/ diswid)&> y
        r,c
)

assert 3 4-:matsize ucp each '한글';'abc';'a'
rc     =: (>./@sh&.>) @: (,.@|:"2@:(0&{"1);1&{"1) @: (matsize&>)
ball3=:    ball2 ,. ball
assert (4 4;3 3 3)-:rc ball3

bl     =: }.@(,&0)@(+/)@(0&=)@(|/ [EMAIL PROTECTED]@(,&1))
mask   =: 1&,. #&, ,.&0@>:@[EMAIL PROTECTED]


shline=:*/@(1&,)@$ $ ,  NB. into 2-dimension (connected)
assert (4$<'aa')-: shline <"1] 2 2 2 $ 'a'

rows   =: */[EMAIL PROTECTED]
assert  12 6 3-: rows <"1]2 2 3 3$'a'

mat    =: [EMAIL PROTECTED]@rows { a: , shline
e=: <@dtb;._2 ucp 0 : 0
aaa
aaa
aaa

aaa
aaa
aaa


aaa
aaa
aaa

aaa
aaa
aaa
)
assert e-:mat <"1]2 2 3 3$'a'

thorn1 =: (<@ucp"1@":)`thbox @. boxed
thbox  =: (rc frame ]) @: ([EMAIL PROTECTED]&.>)

fulltest=: 2 3 $ (i.2 3) ; 'ab한c' ; (i.4 1) ; (<2 2$'ussr') ; 12 ; <+&.>i.2 2 3
e=: <@dtb;._2 ucp 0 : 0
┌─────┬─────┬─────────┐
│0 1 2│ab한c│0        │
│3 4 5│     │1        │
│     │     │2        │
│     │     │3        │
├─────┼─────┼─────────┤
│┌──┐ │12   │┌─┬──┬──┐│
││us│ │     ││0│1 │2 ││
││sr│ │     │├─┼──┼──┤│
│└──┘ │     ││3│4 │5 ││
│     │     │└─┴──┴──┘│
│     │     │         │
│     │     │┌─┬──┬──┐│
│     │     ││6│7 │8 ││
│     │     │├─┼──┼──┤│
│     │     ││9│10│11││
│     │     │└─┴──┴──┘│
└─────┴─────┴─────────┘
)
assert e-:thbox fulltest
==============================



2007/2/13, June Kim <[EMAIL PROTECTED]>:
I'm working on the code.

In the mean time, here is the code for calculating display width:

First you need to save the text file at
http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt

===============================================
require 'regex jfiles'
t=: 1!:1 <'EastAsianWidth.txt'
point=:'^([0-9A-F]{4});(Na|N|H|A|W|F)' rxmatches t
range=:'([0-9A-F]{1,4})\.\.([0-9A-F]{1,4});(Na|N|H|A|W|F)' rxmatches t
jcreate 'unidatapoint'
(< }."1 point rxfrom t) jappend 'unidatapoint'
jcreate 'unidatarange'
(< }."1 range rxfrom t) jappend 'unidatarange'
===============================================

Now you have unidatapoint.ijf and unidatarange.ijf and are able to use them.

===============================================
require 'jfiles'

NB. N  : half
NB. Na : half
NB. H  : half
NB. A  : half
NB. F  : full
NB. W  : full

widthcode=:;: 'N Na H A F W'
pod=:>jread 'unidatapoint';0
rad=:>jread 'unidatarange';0

towc=: widthcode&i. NB. towidthcode

dfh=. 16&#. @ ('0123456789ABCDEF'&i.)
po=:(dfh each {."1 pod),. <"0 towc"0 {:"1 pod
ra=:(,&.>/"1 dfh each 2&{."1 rad),. <"0 towc"0 {:"1 rad
poa=:>{."1 po

fill=: 4 : 0
       'r c'=.x
       r=. ({.r)+ i. >: -~/ r
       ({.c) r}y
)

tab=:65536$0 NB. missing is N
tab=:(> {:"1 po) poa} tab
tab=:>./ ra fill"1 tab

diswid=: [: >: [: 4&<: [: {&tab 3&u:@ucp  NB.for rank 1
================================================
For performance improvement, you could save tab using jfile and use
it. Also, you could use more compact representation(using 3 bits to
represent each character and compress the data).

Usage Example:
  diswid '한글ab!─'
2 2 1 1 1 1
  (,:~ ((ucp'-') $~ +/@diswid)) ucp '한글ab!-'  NB. properly showing
the top line in fixed-pitch font
--------
한글ab!-



2007/2/13, Eric Iverson <[EMAIL PROTECTED]>:
> The problem of proper display of boxed unicode data is an interesting
> one. The first step to getting this fixed is for someone to provide a
> working J model that takes an arbitrary boxed argument and produces the
> character stream that properly displays it. If we had such a model we
> might consider incorporating it into the JE.
>
> ----- Original Message -----
> From: "June Kim" <[EMAIL PROTECTED]>
> To: "General forum" <[email protected]>
> Sent: Sunday, February 11, 2007 5:11 AM
> Subject: Re: [Jgeneral] wd 'set ...' with box draw characters
>
>
> > 2007/2/11, Chris Burke <[EMAIL PROTECTED]>:
> >> June Kim wrote:
> > [snip]
> >> > Second, the box is broken with different width characters(that is,
> >> > when the length of bytes of the encoding, and the width of the
> >> > characters on display don't match). What is the usual way of
> >> > solving
> >> > it in other programming languages? There is a unicode standard for
> >> > character widths. http://unicode.org/reports/tr11/
> >> >
> >> > Python implements that standard(along with others) in unicodedata
> >> > module.
> >> >
> >> >>>> unicodedata.east_asian_width(u'한')
> >> > 'W'
> >> >>>> unicodedata.east_asian_width(u'a')
> >> > 'Na'
> >> >
> >> > (u specifies the following string is unicode. east_asian_width
> >> > returns
> >> > the width of the character, not only for east asian characters but
> >> > all
> >> > unicode characters; it's got a narrow name due to its history)
> >> >
> > [snip]
> >>
> >> If you are having problems with display, it is because of the font,
> >> not
> >> because we are not using unicode.
> > [snip]
> >
> > When a string is boxed and the string includes characters that have
> > different width to the byte lenghts, then the box is broken in J. It
> > is not because of the font. It is because J makes an assumption that
> > every character's width is same with its byte length, which is
> > obviously false in many writting+encoding systems, including east
> > asians. We can definitely say J's box display isn't internationalized
> > yet.
> >
> > For example, 54620 (in unicode code point) is a Korean character,
> > which is pronounced as "han". It's width is "Wide"(twice wide as latin
> > alphabets)
> >
> >   han=.4 u: 54620
> >   <han
> > +---+
> > |한|
> > +---+
> >   <8 u: han
> > +---+
> > |한|
> > +---+
> >
> > Since J counts the byte length for determining character's width, and
> > the byte length for han is 3 in UTF-8( 3-: #8 u: han ), the box's
> > horizontal character '-'(of which width is "Narrow") is printed three
> > times, and on the display the box is broken.
> >
>
>
> 
--------------------------------------------------------------------------------
>
>
> > ----------------------------------------------------------------------
> > For information about J forums see http://www.jsoftware.com/forums.htm
>
> ----------------------------------------------------------------------
> For information about J forums see http://www.jsoftware.com/forums.htm
>

----------------------------------------------------------------------
For information about J forums see http://www.jsoftware.com/forums.htm

Reply via email to