June Kim wrote:
How do you convert between character encodings in J? Has anyone tried
using iconv library from J?
Following is my quick and dirty version of python code using iconv.
(You need libiconv2.dll, libcharset1.dll, and libintl3.dll
which you can attain from the binaries zip file and the dependencies
zip file from http://gnuwin32.sourceforge.net/packages/libiconv.htm)
Someone may help me translating the code into J. I am not an expert at
using 'cd' in J.
======
from ctypes import *
errno_codes=dict((
(0,''),
(7,'E2BIG'),
(22,'EINVAL'),
(42,'EILSEQ'),
))
KOREAN_CP949='\xc7\xd1\xb1\xdb'
KOREAN_UTF8='\xed\x95\x9c\xea\xb8\x80'
libc=cdll.msvcrt
libc._errno.restype=POINTER(c_int)
libiconv=cdll.libiconv2
iconv_open=libiconv.libiconv_open
iconv_close=libiconv.libiconv_close
iconv_=libiconv.libiconv
def get_errno():
return libc._errno().contents.value
def errno_str(errno):
return errno_codes[errno]
def iconv(s,fromcode,tocode):
cd=iconv_open(tocode,fromcode)
insize=len(s)
mybuf=create_string_buffer(insize*6+1)
outbuf=cast(mybuf,c_char_p)
inbuf=c_char_p(s)
inbytesleft=c_long(insize)
outbytesleft=c_long(insize*6+1)
res=iconv_(cd,byref(inbuf),byref(inbytesleft),
byref(outbuf),byref(outbytesleft))
errno=get_errno()
iconv_close(cd)
if errno_str(errno):
raise errno_str(errno)
return mybuf.value
def test():
assert iconv(KOREAN_CP949,'CP949','UTF-8')==KOREAN_UTF8
assert iconv(KOREAN_UTF8,'UTF-8','CP949')==KOREAN_CP949
if __name__=='__main__':
test()
The signiture should look like this,
iconv_open=: ' libiconv_open +x *c *c' iconvapi
iconv_iconv=: ' libiconv + x x *x *x *x *x' iconvapi
iconv_close=: ' libiconv_close + x x' iconvapi
eg, this convert wide character y to x, (beware wrapping)
assert. _1~: uconv=. 0{:: iconv_open x;((4=NWCHAR_T){::'UTF-16';'UTF-32'),
bigendian{::'LE';'BE'
ct=. (n3=. 4*#y)#CNB
assert. _1~: 0{:: urc=. iconv_iconv uconv;(,iad 'y');(,NWCHAR_T*#y);(,iad
'ct');(,n3)
iconv_close <uconv
r=. (n3-{._1{::urc){.ct
you should be able to start with the above example. BTW the file name for these
dll that I use are
libiconv-2.dll
libcharset-1.dll
regards,
----------------------------------------------------------------------
For information about J forums see http://www.jsoftware.com/forums.htm