June Kim wrote:
How do you convert between character encodings in J? Has anyone tried
using iconv library from J?

Following is my quick and dirty version of python code using iconv.
(You need libiconv2.dll, libcharset1.dll, and libintl3.dll
which you can attain from the binaries zip file and the dependencies
zip file from http://gnuwin32.sourceforge.net/packages/libiconv.htm)
Someone may help me translating the code into J. I am not an expert at
using 'cd' in J.

======

from ctypes import *

errno_codes=dict((
    (0,''),
    (7,'E2BIG'),
    (22,'EINVAL'),
    (42,'EILSEQ'),
))

KOREAN_CP949='\xc7\xd1\xb1\xdb'
KOREAN_UTF8='\xed\x95\x9c\xea\xb8\x80'

libc=cdll.msvcrt
libc._errno.restype=POINTER(c_int)
libiconv=cdll.libiconv2
iconv_open=libiconv.libiconv_open
iconv_close=libiconv.libiconv_close
iconv_=libiconv.libiconv

def get_errno():
    return libc._errno().contents.value

def errno_str(errno):
    return errno_codes[errno]

def iconv(s,fromcode,tocode):
    cd=iconv_open(tocode,fromcode)

    insize=len(s)
    mybuf=create_string_buffer(insize*6+1)
    outbuf=cast(mybuf,c_char_p)
    inbuf=c_char_p(s)

    inbytesleft=c_long(insize)
    outbytesleft=c_long(insize*6+1)

    res=iconv_(cd,byref(inbuf),byref(inbytesleft),
                  byref(outbuf),byref(outbytesleft))
    errno=get_errno()
    iconv_close(cd)
    if errno_str(errno):
        raise errno_str(errno)
    return mybuf.value


def test():
    assert iconv(KOREAN_CP949,'CP949','UTF-8')==KOREAN_UTF8
    assert iconv(KOREAN_UTF8,'UTF-8','CP949')==KOREAN_CP949

if __name__=='__main__':
    test()

The signiture should look like this,
iconv_open=: ' libiconv_open +x *c *c' iconvapi
iconv_iconv=: ' libiconv + x x *x *x *x *x' iconvapi
iconv_close=: ' libiconv_close + x x' iconvapi

eg, this convert wide character y to x, (beware wrapping)
assert. _1~: uconv=. 0{:: iconv_open x;((4=NWCHAR_T){::'UTF-16';'UTF-32'), bigendian{::'LE';'BE'
ct=. (n3=. 4*#y)#CNB
assert. _1~: 0{:: urc=. iconv_iconv uconv;(,iad 'y');(,NWCHAR_T*#y);(,iad 'ct');(,n3)
iconv_close <uconv
r=. (n3-{._1{::urc){.ct

you should be able to start with the above example. BTW the file name for these dll that I use are
  libiconv-2.dll
  libcharset-1.dll

regards,
----------------------------------------------------------------------
For information about J forums see http://www.jsoftware.com/forums.htm

Reply via email to