Arabic complex morphology! I hvae written a small function which forms a word
given the MEZAN AL SARFEE and it's root
by calling something like
ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x623, AE_MORPH_NEXT, 0x652, AE_MORPH_DIAC_FATHA, AE_MORPH_REST, AE_MORPH_END);
where the 0xXXX are Unicode letters (Ahrof al ziadaa)
and AE_MORPH_XXX are directive like get next letter or previous letter or all but last letters with or without diacritics
#define _GNU_SOURCE /* 1st to get rid of non-GNU, 2nd to have strndup and alloca */ #include<stdio.h> #include<stdlib.h> #include<string.h> #include<stdarg.h> #include<assert.h>
#include<glib.h>
#define g_unichar_len(x) g_unichar_to_utf8((x),NULL)
#define g_utf8_len(x) (g_utf8_next_char(x)-(x))
enum AE_DIACRITICS {
AE_DIAC_OTHER=1,
AE_FATHATAN=1<<1,AE_DAMMATAN=1<<2, AE_KASRATAN=1<<3,
AE_FATHA=1<<4,AE_DAMMA=1<<5, AE_KASRA=1<<6,
AE_SHADDA=1<<7, AE_SUKUN=1<<8
};
gchar *ae_diacritics(const gchar *t,gunichar *c, gint *result) {
gunichar ch;
int r=0;
gchar *txt=(gchar *)t;
ch=g_utf8_get_char(txt);
*c=ch;
txt=g_utf8_find_next_char(txt,NULL);
ch=g_utf8_get_char(txt);
while(*txt && g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK)
{
switch(ch) {
case 0x064B: case 0x064C: case 0x064D:
case 0x064E: case 0x064F: case 0x0650:
case 0x0651: case 0x0652:
r|=1<<(ch-0x064B+1); break;
default:
r|=AE_DIAC_OTHER;
}
txt=g_utf8_find_next_char(txt,NULL);
ch=g_utf8_get_char(txt);
}
*result=r;
return txt;
}
gchar *ae_back_gc(const gchar *t,gunichar *c) {
gunichar ch;
gchar *txt=(gchar *)t;
txt=g_utf8_prev_char(txt);
ch=g_utf8_get_char(txt);
while(*txt && g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK)
{
txt=g_utf8_prev_char(txt);
ch=g_utf8_get_char(txt);
}
*c=ch;
return txt;
}
int ae_get_diacritics(gchar *txt) {
gunichar c;
int r;
ae_diacritics(txt,&c, &r);
return r;
}
gint ae_gc_len(const gchar *t) {
gunichar ch;
gint r=0;
gchar *txt=(gchar *)t;
ch=g_utf8_get_char(txt);
while(*txt) {
while (*txt &&
g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK) {
txt=g_utf8_find_next_char(txt,NULL);
ch=g_utf8_get_char(txt);
}
if (!*txt) return r;
r+=1;
txt=g_utf8_find_next_char(txt,NULL);
ch=g_utf8_get_char(txt);
}
return r;
}
/* expand stress on last char if any */
gchar *ae_last_stress_expand(const gchar *txt) {
gunichar ch;
gint ch_len,gc_len,l;
int diac=0;
gchar *str , *sh, *ptr,*tmp;
l=strlen(txt);
tmp=ae_back_gc(txt+l,&ch);
gc_len=ae_diacritics(tmp,&ch, &diac)-tmp;
ch_len=g_unichar_len(ch);
if (diac&AE_SHADDA) {
assert((str=malloc(l+ch_len+1))!=NULL);
memcpy(str, txt, l-gc_len+ch_len);
ptr=str+l-gc_len+ch_len;
ptr+=g_unichar_to_utf8(0x64E,ptr);
ptr+=g_unichar_to_utf8(ch,ptr);
/* add rest diact without shadda */
sh=g_utf8_strchr(txt+l-gc_len+ch_len,-1,0x0651);
if (sh-tmp-ch_len>0) {
memcpy(ptr, tmp+ch_len, sh-tmp-ch_len);
ptr+=sh-tmp-ch_len;
}
if (tmp+gc_len-sh-2>0) {
memcpy(ptr, sh+2, tmp+gc_len-sh-2);
ptr+=tmp+gc_len-sh-2;
}
*ptr=0;
return str;
} else {
return strndup(txt,l);
}
}
/* the next function expand last stress even if the stress is not on last char
*/
/*
gchar *ae_last_stress_expand(const gchar *txt) {
gunichar ch;
gint ch_len,gc_len,l,tmp;
int diac=0;
gchar *r,*s , *sh, *ptr;
l=strlen(txt);
sh=g_utf8_strrchr(txt,l,0x651);
if (sh) {
ptr=ae_back_gc(sh,&ch);
gc_len=ae_diacritics(ptr,&ch, &diac)-ptr;
ch_len=g_unichar_len(ch);
assert((s=malloc(l+ch_len+1))!=NULL);
memcpy(s, txt, l);
r=s; s+=ptr-txt+ch_len;
s+=g_unichar_to_utf8(0x64E,s);
s+=g_unichar_to_utf8(ch,s);
if (sh-ptr-ch_len>0) {
memcpy(s, ptr+ch_len, sh-ptr-ch_len);
s+=sh-ptr-ch_len;
}
if (txt+l-sh-2>0) {
memcpy(s, sh+2, txt+l-sh-2);
s+=txt+l-sh-2;
}
*s=0;
return r;
} else {
return strndup(txt,l);
}
}
*/
gchar *ae_destress(const gchar *txt) {
gunichar ch;
gint ch_len,gc_len,s,tmp; /* length of char and grapheme cluster */
int diac=0;
gchar *str=strdup(txt);
gchar *ptr=str,*sh;
s=strlen(txt)+1;
while(*txt) {
gc_len=ae_diacritics(txt,&ch, &diac)-txt;
ch_len=g_unichar_len(ch);
if (diac&AE_SHADDA) {
tmp=ptr-str; s+=ch_len;
assert((str=realloc(str,s))!=NULL);
ptr=str+tmp;
ptr+=g_unichar_to_utf8(ch,ptr);
*ptr++='\331'; *ptr++='\222';
ptr+=g_unichar_to_utf8(ch,ptr);
/* add rest diact without shadda */
sh=g_utf8_strchr(txt,-1,0x0651);
if (sh-txt-ch_len>0) {
memcpy(ptr, txt+ch_len, sh-txt-ch_len);
ptr+=sh-txt-ch_len;
}
if (txt+gc_len-sh-2>0) {
memcpy(ptr, sh+2, txt+gc_len-sh-2);
ptr+=txt+gc_len-sh-2;
}
txt+=gc_len;
} else {
if (ptr!=txt) memcpy(ptr, txt, gc_len);
txt+=gc_len;
ptr+=gc_len;
}
}
*ptr=0;
return str;
}
/* backup */
gchar *ae_destress_working(const gchar *txt) {
gunichar ch;
gint ch_len,gc_len,s,tmp; /* length of char and grapheme cluster */
int diac=0;
gchar *str=strdup(txt);
gchar *ptr=str,*sh;
s=strlen(txt)+1;
while(*txt) {
ch_len=g_utf8_find_next_char(txt,NULL)-txt;
gc_len=ae_diacritics(txt,&ch, &diac)-txt;
if (diac&AE_SHADDA) {
tmp=ptr-str; s+=ch_len;
assert((str=realloc(str,s))!=NULL);
ptr=str+tmp;
memcpy(ptr, txt, ch_len); ptr+=ch_len;
*ptr++='\331'; *ptr++='\222';
memcpy(ptr, txt, ch_len); ptr+=ch_len;
/* add rest diact without shadda */
sh=g_utf8_strchr(txt,-1,0x0651);
if (sh-txt-ch_len>0) {
memcpy(ptr, txt+ch_len, sh-txt-ch_len);
ptr+=sh-txt-ch_len;
}
if (txt+gc_len-sh-2>0) {
memcpy(ptr, sh+2, txt+gc_len-sh-2);
ptr+=txt+gc_len-sh-2;
}
txt+=gc_len;
} else {
if (ptr!=txt) memcpy(ptr, txt, gc_len);
txt+=gc_len;
ptr+=gc_len;
}
}
*ptr=0;
return str;
}
gchar *ae_restress_(gchar *txt) {
gunichar ch;
gint ch_len,gc_len; /* length of char and grapheme cluster */
int diac=0;
gchar *str=txt,*ptr=txt,*nxt;
while(*txt) {
ch_len=g_utf8_len(txt);
gc_len=ae_diacritics(txt,&ch, &diac)-txt;
if ((diac&AE_SUKUN) && (g_utf8_get_char(txt+gc_len)==ch) ) {
if (ptr!=txt) memmove(ptr, txt, ch_len); /* copy char
alone */
txt+=gc_len;
ptr+=ch_len;
*ptr++='\331'; *ptr++='\221'; /* safe because sukun len
== shadda len */
// ptr+=2;
nxt=ae_diacritics(txt,&ch, &diac);
gc_len=nxt-txt;
/* *ptr=0;
printf("/%s:Ù%s/\n",str,txt+ch_len); */
if (ptr!=txt+ch_len && gc_len-ch_len) memmove(ptr,
txt+ch_len, gc_len-ch_len); /* copy after shadda */
txt=nxt;
ptr+=gc_len-ch_len;
} else {
if (ptr!=txt) memmove(ptr, txt, gc_len);
txt+=gc_len;
ptr+=gc_len;
}
}
*ptr=0;
return str;
}
/* TODO: rewrite destress and restress as recarsion and benckmark */
enum AE_MORPH_MODES { AE_MORPH_END=0, AE_MORPH_DIAC_ON=-1 ,
AE_MORPH_DIAC_OFF=-2,
AE_MORPH_DIAC_KASRA=-3, AE_MORPH_DIAC_DAMMA=-4, AE_MORPH_DIAC_FATHA=-5,
AE_MORPH_SKIP=-6, AE_MORPH_NEXT=-7, AE_MORPH_GET_NEXT=-8,
/* AE_MORPH_BACK=-6, AE_MORPH_PREV=-7, AE_MORPH_GET_PREV=-8, */
AE_MORPH_REST=-9, AE_MORPH_REST_1=-10, AE_MORPH_REST_2=-11, /* rest but
1, rest but 2*/
AE_MORPH_MAX=-12
};
#define AE_MORPH_NUM(x) (AE_MORPH_MAX-(x))
#define AE_MORPH_GET_NUM(x) (AE_MORPH_NUM(0)-(x))
/* TODO: size is not used!! */
gchar *ae_morph(gchar *root,gchar *buff,gint size,...) {
gchar *ptr=buff,*txt=root,*s1,*s2,s3;
int op,l,ll,diac,diac_st=1; /* 0:no diac,1:diac on*/
gunichar ch;
va_list ap;
va_start(ap, size);
while((op = va_arg(ap, gunichar))!=AE_MORPH_END) {
/* printf("<%d>\n",op); */
if (op>0) {
ae_diacritics(txt,&ch, &diac);
/* if (op==0x671 && (ch==0x671 || ch==0x622 ||
ch==0x623 || ch==0x625 || 0x672 || 0x673) ) continue; */
ptr+=g_unichar_to_utf8(op,ptr);
} else switch(op) {
case AE_MORPH_DIAC_ON: diac_st=1; break;
case AE_MORPH_DIAC_OFF: diac_st=0; break;
case AE_MORPH_DIAC_FATHA:
case AE_MORPH_DIAC_DAMMA:
case AE_MORPH_DIAC_KASRA:
diac_st=op; break;
case AE_MORPH_SKIP:
txt=ae_diacritics(txt,&ch, &diac);
break;
case AE_MORPH_NEXT:
case AE_MORPH_GET_NEXT:
if (diac_st==1) {
l=ae_diacritics(txt,&ch, &diac)-txt;
memcpy(ptr,txt,l);
ptr+=l;
if (op==AE_MORPH_NEXT) txt+=l;
} else {
ll=ae_diacritics(txt,&ch, &diac)-txt;
l=g_unichar_to_utf8(ch,NULL);
memcpy(ptr,txt,l);
ptr+=l;
if (op==AE_MORPH_NEXT) txt+=ll;
if (diac_st) ptr+= g_unichar_to_utf8( 0x64E +
diac_st - AE_MORPH_DIAC_FATHA , ptr);
}
break;
case AE_MORPH_REST_1:
case AE_MORPH_REST_2:
case AE_MORPH_REST:
if (diac_st==1) {
*ptr=0;
l=strlen(txt);
strcpy(ptr,txt);
txt+=l;
ptr+=l;
} else {
while(*txt) {
txt=ae_diacritics(txt,&ch, &diac);
ptr+=g_unichar_to_utf8(ch,ptr);
if (diac_st) ptr+= g_unichar_to_utf8(
0x64E + diac_st - AE_MORPH_DIAC_FATHA , ptr);
}
}
if (op!=AE_MORPH_REST) {
txt=ae_back_gc(txt,&ch);
*(ptr=ae_back_gc(ptr,&ch))=0;
}
if (op==AE_MORPH_REST_2) {
txt=ae_back_gc(txt,&ch);
*(ptr=ae_back_gc(ptr,&ch))=0;
}
break;
default:
/* TODO: */
printf("not implemented-d\n");
break;
} /* END switch */
/* printf(":\n"); */
} /* END WHILE */
*ptr=0;
va_end(ap);
return buff;
}
/* ae_filter: convert it's input to spell-format (mainely deal with alef and
hamza) */
gchar *ae_filter_(gchar *txt) {
/*
+ Ø§ÙØ£Ù٠اÙÙÙÙØ© ÙØ§ ÙØ±Ø³Ù
عÙÙÙØ§ ØØ±Ùات
+ ÙØ¥Ù ØªØØ±Ùت Ø¨ØºÙØ± اÙÙØªØ Ø§ÙØ¶Ù
ÙÙ (بÙ
ا Ù٠ذÙÙ
Ø§ÙØ´Ø¯Ø©) ØªØ¹ÙØ¯ Ø¥Ù٠أصÙÙØ§ Ø¥Ù
ا ÙØ§Ù Ø£Ù ÙØ§Ø¡
+ Ø£ÙÙ ÙÙÙØ© Ù
ØªØ¨ÙØ¹Ø© بأÙÙ ÙÙÙØ© تعاد Ø§ÙØ«Ø§ÙÙØ©
ÙØ£ØµÙÙØ§ (سار - ÙØ§Ø¹Ù - Ø³Ø§ÙØ±)
+ إذا ÙÙ
ÙÙ٠أصÙÙØ§ ÙØ§Ù Ø£Ù ÙØ§Ø¡ ÙÙÙ ÙÙ
زة
+ ÙÙ
زة Ù
ØªØ¨ÙØ¹Ø© بأÙÙ ÙÙÙØ© ØªØµØ¨Ø Ù
دة Ø¥Ù
ÙØ§Ø¦ÙاÙ
+ ÙÙ
زة اÙÙØµÙ ØªÙØ¶Ø¹ ÙÙ
ÙØ¹ Ø§ÙØ§Ø¨ØªØ¯Ø§Ø¡ بساÙÙ
*/
gunichar ch,old_ch=0;
gint ch_len,gc_len; /* length of char and grapheme cluster */
int diac=0;
gchar *str=txt,*ptr=txt,*nxt;
while(*txt) {
ch_len=g_utf8_len(txt);
gc_len=ae_diacritics(txt,&ch, &diac)-txt;
switch(ch) {
case 0x672: /* Ø£Ù٠أصÙÙØ§ ÙØ§Ù */
if (diac!=AE_FATHA || old_ch==0x627) {
*ptr++='\331'; *ptr++='\210'; /* safe because
...etc*/
if (ptr!=txt) memmove(ptr, txt+ch_len,
gc_len-ch_len);
} else {*ptr++='\330'; *ptr++='\247';}
ptr+=gc_len-ch_len;
break;
case 0x673: /* Ø£Ù٠أصÙÙØ§ ÙØ§Ø¡ */
if (diac!=AE_FATHA || old_ch==0x627) {
*ptr++='\331'; *ptr++='\212'; /* safe because
...etc*/
if (ptr!=txt) memmove(ptr, txt+ch_len,
gc_len-ch_len);
} else {*ptr++='\330'; *ptr++='\247';}
ptr+=gc_len-ch_len;
break;
default:
ptr+=gc_len;
}
txt+=gc_len;
old_ch=ch;
}
return str;
}
/*
* sub_roots_sp: the returned list in almost spell-format
* which needs to be filtered to be in spell-format
*/
GList *sub_roots_sp(const gchar *root_str) {
/* TODO: ضبط عÙ٠اÙÙØ¹Ù */
/* TODO: Ø§ÙØªØ¹Ø§Ù
Ù Ù
ع اÙÙÙ
زات ÙØØ±ÙÙ Ø§ÙØ¹ÙØ©
ÙØ§ÙØ¥Ù
ÙØ§Ø¡ */
GList *ls = NULL;
gchar *base_root, *root,*buff,*str,*s1,*s2,*s3;
gint s,size;
gunichar ch;
gint diac;
root=ae_destress(root_str);
s=strlen(root)+1; size=s+16;
buff=(gchar *)malloc(size);
base_root=strndup(root_str,size-1);
ae_restress_(base_root);
ls = g_list_append (ls, base_root);
/* Ø£ÙØ¹Ù */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x623, AE_MORPH_NEXT, 0x652,
AE_MORPH_DIAC_FATHA, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* ÙØ§Ø¹Ù */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_NEXT, AE_MORPH_DIAC_FATHA,
0x627, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* ÙØ¹ÙÙ */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_NEXT, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT,
AE_MORPH_DIAC_ON, 0x651, 0x64E, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* اÙÙØ¹Ù */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_FATHA, 0x671, 0x646, 0x652,AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* Ø§ÙØ¹ÙÙ */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652,
AE_MORPH_DIAC_FATHA,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x651,0x64E,
AE_MORPH_END)),size-1));
/* ØªÙØ¹ÙÙ */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, 0x62A, 0x64E, AE_MORPH_NEXT,
AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,
AE_MORPH_DIAC_ON, 0x651, 0x64E, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* ØªÙØ§Ø¹Ù */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT, 0x64E,
0x627, AE_MORPH_DIAC_FATHA, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* Ø§Ø³ØªÙØ¹Ù */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, 0x671, 0x633, 0x652, 0x62A, 0x64E,
AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652 , AE_MORPH_NEXT, 0x64E,
AE_MORPH_DIAC_ON, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* Ø§ÙØ¹Ùع٠*/
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652,
AE_MORPH_GET_NEXT, 0x64E,
0x648, 0x652, AE_MORPH_NEXT, 0x64E, AE_MORPH_DIAC_ON,
AE_MORPH_REST,
AE_MORPH_END)),size-1));
ae_diacritics(root,&ch, &diac);
/* TODO: ضبط عÙ٠اÙÙØ¹Ù */
/* Ø§ÙØªØ¹Ù */
switch(ch) {
case 0x648: case 0x64A:
/* اتع٠*/
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_SKIP,
0x62A, 0x64E,
AE_MORPH_DIAC_ON, AE_MORPH_REST,
AE_MORPH_END)),size-1));
break;
case 0x630:
/* Ø§ÙØ¯Ø¹Ù */
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_SKIP, 0x62F
, 0x652, 0x62F, 0x64E,
AE_MORPH_DIAC_ON, AE_MORPH_REST,
AE_MORPH_END)),size-1));
case 0x62F: case 0x632:
/* Ø§ÙØ¯Ø¹Ù */
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652,
0x62F, 0x64E,
AE_MORPH_DIAC_ON, AE_MORPH_REST,
AE_MORPH_END)),size-1));
break;
case 0x635: case 0x636: case 0x637: case 0x638:
/* Ø§ÙØ·Ø¹Ù */
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652,
0x637, 0x64E,
AE_MORPH_DIAC_ON, AE_MORPH_REST,
AE_MORPH_END)),size-1));
break;
default:
/* Ø§ÙØªØ¹Ù */
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652,
0x62A, 0x64E,
AE_MORPH_DIAC_FATHA, AE_MORPH_REST,
AE_MORPH_END)),size-1));
}
free(buff);
free(root);
return ls;
}
GList *sub_roots_attach_pronoun(const gchar *root_str) {
GList *ls = NULL;
gchar *base_root, *root, *expanded;
gchar *buff,*str,*s1,*s2,*s3;
gint s,size;
gunichar ch;
gint diac;
root=ae_destress(root_str);
expanded=ae_last_stress_expand(root_str); /* use restress */
s=strlen(root)+1; size=s+16;
buff=(gchar *)malloc(size);
/* ÙÙ ÙØ¹Ù */
base_root=strndup(root_str,size-1);
ae_restress_(base_root);
ls = g_list_append (ls, base_root);
/* ÙØ¹ÙÙ ((تÙ)|(ÙØ§)|(تÙ)|(تÙ
ا)|(تÙ
)|(تÙ)|(تÙÙ)|(Ù))
*/
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x64F,
AE_MORPH_END)),size-1));
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x646, 0x64E, 0x627,
AE_MORPH_END)),size-1));
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x64E,
AE_MORPH_END)),size-1));
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x645,0x64E, 0x627,
AE_MORPH_END)),size-1));
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x645,
AE_MORPH_END)),size-1));
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x650,
AE_MORPH_END)),size-1));
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x646, 0x651, 0x64E,
AE_MORPH_END)),size-1));
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x646, 0x64E,
AE_MORPH_END)),size-1));
/* ÙÙ
ا/ÙÙ
ا/ÙÙ ÙØ¹ÙÙ ( (ا)|(تÙ)|(تا) ) */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x64E, 0x627,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x64E, 0x62A, 0x64E, 0x627,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x64E, 0x62A, 0x652,
AE_MORPH_END)),size-1));
/* ÙÙ
ÙØ¹ÙÙ (ÙØ§) */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x64f, 0x648, 0x652,0x627,
AE_MORPH_END)),size-1));
/* اÙÙ
ضارع */
/* (Ù|ت)ÙØ¹ÙÙ */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652,
AE_MORPH_DIAC_ON,
AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652,
AE_MORPH_DIAC_ON,
AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
AE_MORPH_END)),size-1));
/* (Ø£|Ù)ÙØ¹ÙÙ */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x623, 0x64E, AE_MORPH_NEXT,0x652,
AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x646, 0x64E, AE_MORPH_NEXT,0x652,
AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
AE_MORPH_END)),size-1));
/* (Ù|ت)ÙØ¹ÙÙ ( (اÙ)|(ÙÙ) ) */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652,
AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652,
0x627,0x646,0x650,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652,
AE_MORPH_DIAC_ON,
AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652,
0x627,0x646,0x650,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652,
AE_MORPH_DIAC_ON,
AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652,
0x648,0x646,0x64E,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652,
AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652,
0x648,0x646,0x64E,
AE_MORPH_END)),size-1));
/* ØªÙØ¹ÙÙ (ÙÙ) */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652,
AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x650,
0x64A,0x652, 0x646,0x64E,
AE_MORPH_END)),size-1));
/* ((ت|(Ù))ÙØ¹ÙÙ (Ù) */
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652,
AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652,
0x646,0x64E,
AE_MORPH_END)),size-1));
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652,
AE_MORPH_DIAC_ON,
AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652,
0x646,0x64E,
AE_MORPH_END)),size-1));
/* Ø§ÙØ£Ù
ر */
/* Ø§ÙØ¹Ù ((ا)|(ÙØ§)|(Ù)|(Ù))Ø */
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, AE_MORPH_END)),size-1));
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x627, AE_MORPH_END)),size-1));
ls = g_list_append (ls,
strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x648, 0x64F,0x627,
AE_MORPH_END)),size-1));
free(buff);
free(expanded);
free(root);
return ls;
}
/* test */
int main() {
gchar str[]="جدÙÙ";
gchar *s;
GList *ls,*l2;
ls=sub_roots_sp("جدÙÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("Ø¯ÙØ±ÙسÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("سÙÙÙÙÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("ÙÙØ¨ÙØÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("ÙÙØ¨ÙÙÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("ÙÙØµÙÙÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("ÙÙØ¨ÙÙÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("ذÙÙÙØ±Ù");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("Ø¶ÙØ±ÙبÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("Ø¯ÙØÙØ±ÙجÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("زÙÙÙØ²ÙÙÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("Ø¨ÙØ¹ÙØ«ÙØ±Ù");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("ÙÙØ¹ÙدÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("ÙÙØ³ÙÙÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("ÙÙÙ²ÙÙÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",
ae_filter_((gchar *)ls->data));
ls=g_list_next(ls);
}
ls=sub_roots_sp("سÙÙ³ÙØ±Ù");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",
ae_filter_((gchar *)ls->data));
ls=g_list_next(ls);
}
ls=sub_roots_sp("ÙÙØ´ÙÙ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("Ø£ÙÙ
ÙØ±Ù");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("Ø¬ÙØ¯ÙÙ");
ls=g_list_first(ls);
printf("--------\n\tØ§Ø³ÙØ§Ø¯ [%s] ÙÙØ¶Ù
ائر\n",(gchar
*)ls->data);
while(ls) {
printf("--------\n\tØ§Ø³ÙØ§Ø¯ [%s] ÙÙØ¶Ù
ائر\n",(gchar
*)ls->data);
l2=sub_roots_attach_pronoun((gchar *)ls->data);
l2=g_list_first(l2);
while(l2) {
printf("[%s]\n",(gchar *)l2->data);
l2=g_list_next(l2);
}
ls=g_list_next(ls);
}
ls=sub_roots_sp("Ø¯ÙØ±ÙسÙ");
ls=g_list_first(ls);
printf("--------\n\tØ§Ø³ÙØ§Ø¯ [%s] ÙÙØ¶Ù
ائر\n",(gchar
*)ls->data);
while(ls) {
printf("--------\n\tØ§Ø³ÙØ§Ø¯ [%s] ÙÙØ¶Ù
ائر\n",(gchar
*)ls->data);
l2=sub_roots_attach_pronoun((gchar *)ls->data);
l2=g_list_first(l2);
while(l2) {
printf("[%s]\n",(gchar *)l2->data);
l2=g_list_next(l2);
}
ls=g_list_next(ls);
}
}
_______________________________________________ Developer mailing list [email protected] http://lists.arabeyes.org/mailman/listinfo/developer

