I'm attaching a diff which adapts apertium-transfer to have a -b option
to match the output from lt-proc -b for external lexical transfer.
$ echo "L'estació més plujosa del any és l'estiu" | apertium -d .
ca-en-biltrans
^El<det><def><f><sg>/The<det><def><f><sg>$
^estació<n><f><sg>/season<n><sg>/station<n><sg>$
^més<preadv>/more<preadv>$ ^plujós<adj><f><sg>/rainy<adj><sint><f><sg>$
^de<pr>/of<pr>/from<pr>$ ^el<det><def><m><sg>/the<det><def><m><sg>$
^any<n><m><sg>/year<n><sg>$
^ser<vbser><pri><p3><sg>/be<vbser><pri><p3><sg>$
^el<det><def><m><sg>/the<det><def><m><sg>$
^estiu<n><m><sg>/summer<n><sg>$^.<sent>/.<sent>$
The translatey bit works ok:
$ echo "L'estació més plujosa del any és l'estiu" | apertium-destxt |
apertium -f none -d . ca-en-bilrans
| /home/fran/source/apertium/trunk/apertium/apertium/apertium-transfer
-b apertium-en-ca.ca-en.t1x ca-en.t1x.bin | apertium-retxt
^default<default>{^The<det><def><f><sg>$}$
^default<default>{^season<n><sg>$}$ ^default<default>{^more<preadv>$}$
^default<default>{^rainy<adj><sint><f><sg>$}$
^default<default>{^of<pr>$}$ ^default<default>{^the<det><def><m><sg>$}$
^default<default>{^year<n><sg>$}$
^default<default>{^be<vbser><pri><p3><sg>$}$
^default<default>{^the<det><def><m><sg>$}$
^default<default>{^summer<n><sg>$}$^default<default>{^.<sent>$}$
I don't get why the transfer rules aren't getting run though...
$ echo "L'estació més plujosa del any és l'estiu" | apertium -d .
ca-en-chunker
^Det_nom_adv_adj<SN><f><sg>{^the<det><def><sg>$ ^rainy<adj><sint><sup>$
^season<n><3>$}$ ^de<PREP>{^of<pr>$}$
^el_nom<SN><m><sg>{^the<det><def><3>$ ^year<n><3>$}$
^verbcj<SV><vbser><pri><p3><sg>{^be<vbser><pri><4><5>$}$
^el_nom<SN><m><sg>{^the<det><def><3>$
^summer<n><3>$}$^punt<sent>{^.<sent>$}$
Any ideas ?
Fran
Index: apertium_transfer.cc
===================================================================
--- apertium_transfer.cc (revision 35515)
+++ apertium_transfer.cc (working copy)
@@ -38,6 +38,7 @@
void message(char *progname)
{
cerr << "USAGE: " << basename(progname) << " trules preproc biltrans [input [output]]" << endl;
+ cerr << " " << basename(progname) << " -b trules preproc [input [output]]" << endl;
cerr << " " << basename(progname) << " -n trules preproc [input [output]]" << endl;
cerr << " " << basename(progname) << " -x extended trules preproc biltrans [input [output]]" << endl;
cerr << " " << basename(progname) << " -c trules preproc biltrans [input [output]]" << endl;
@@ -47,6 +48,7 @@
cerr << " biltrans bilingual letter transducer file" << endl;
cerr << " input input file, standard input by default" << endl;
cerr << " output output file, standard output by default" << endl;
+ cerr << " -b input from lexical transfer" << endl;
cerr << " -n don't use bilingual dictionary" << endl;
cerr << " -x bindix extended mode with user dictionary" << endl;
cerr << " -c case-sensitiveness while accessing bilingual dictionary" << endl;
@@ -108,6 +110,7 @@
#if HAVE_GETOPT_LONG
static struct option long_options[] =
{
+ {"from-bilingual", no_argument, 0, 'b'},
{"no-bilingual", no_argument, 0, 'n'},
{"extended", required_argument, 0, 'x'},
{"case-sensitive", no_argument, 0, 'c'},
@@ -117,15 +120,19 @@
{0, 0, 0, 0}
};
- int c=getopt_long(argc, argv, "nx:czth", long_options, &option_index);
+ int c=getopt_long(argc, argv, "nbx:czth", long_options, &option_index);
#else
- int c=getopt(argc, argv, "nx:czth");
+ int c=getopt(argc, argv, "nbx:czth");
#endif
if (c==-1)
break;
switch (c)
{
+ case 'b':
+ t.setPreBilingual(true);
+ break;
+
case 'n':
t.setUseBilingual(false);
break;
@@ -167,7 +174,7 @@
break;
case 5:
- if(t.getUseBilingual() == false)
+ if(t.getUseBilingual() == false || t.getPreBilingual() == true)
{
output = open_output(argv[argc-1]);
input = open_input(argv[argc-2]);
@@ -186,7 +193,7 @@
break;
case 4:
- if(t.getUseBilingual() == false)
+ if(t.getUseBilingual() == false || t.getPreBilingual() == true)
{
input = open_input(argv[argc-1]);
testfile(argv[argc-2]);
@@ -202,7 +209,7 @@
}
break;
case 3:
- if(t.getUseBilingual() == false)
+ if(t.getUseBilingual() == false || t.getPreBilingual() == true)
{
testfile(argv[argc-1]);
testfile(argv[argc-2]);
Index: transfer.h
===================================================================
--- transfer.h (revision 35515)
+++ transfer.h (working copy)
@@ -77,6 +77,7 @@
enum OutputType{lu,chunk};
OutputType defaultAttrs;
+ bool preBilingual;
bool useBilingual;
bool null_flush;
bool internal_null_flush;
@@ -139,6 +140,8 @@
void transfer(FILE *in, FILE *out);
void setUseBilingual(bool value);
bool getUseBilingual(void) const;
+ void setPreBilingual(bool value);
+ bool getPreBilingual(void) const;
void setExtendedDictionary(string const &fstfile);
void setCaseSensitiveness(bool value);
bool getNullFlush(void);
Index: transfer.cc
===================================================================
--- transfer.cc (revision 35515)
+++ transfer.cc (working copy)
@@ -59,6 +59,7 @@
lastrule = NULL;
defaultAttrs = lu;
useBilingual = true;
+ preBilingual = false;
isExtended = false;
null_flush = false;
internal_null_flush = false;
@@ -1779,7 +1780,7 @@
if(tmpword.size() != 0)
{
pair<wstring, int> tr;
- if(useBilingual)
+ if(useBilingual && preBilingual == false)
{
if(isExtended && (*tmpword[0])[0] == L'*')
{
@@ -1798,6 +1799,35 @@
tr = fstp.biltransWithQueue(*tmpword[0], false);
}
}
+ else if(preBilingual)
+ {
+ wstring sl = L"";
+ wstring tl = L"";
+ int seenParen = 0;
+ for(wstring::const_iterator it = tmpword[0]->begin(); it != tmpword[0]->end(); it++)
+ {
+ if(*it == L'/')
+ {
+ seenParen++;
+ continue;
+ }
+ if(seenParen == 0)
+ {
+ sl.push_back(*it);
+ }
+ else if(seenParen == 1)
+ {
+ tl.push_back(*it);
+ }
+ else if(seenParen > 1)
+ {
+ break;
+ }
+ }
+ tmpword[0]->assign(sl);
+ tr = pair<wstring, int>(tl, 1);
+ //wcerr << L"pb: " << *tmpword[0] << L" :: " << sl << L" >> " << tl << endl ;
+ }
else
{
tr = pair<wstring, int>(*tmpword[0], 0);
@@ -1923,10 +1953,38 @@
}
pair<wstring, int> tr;
- if(useBilingual)
+ if(useBilingual && preBilingual == false)
{
tr = fstp.biltransWithQueue(*tmpword[i], false);
}
+ else if(preBilingual)
+ {
+ wstring sl = L"";
+ wstring tl = L"";
+ int seenParen = 0;
+ for(wstring::const_iterator it = tmpword[0]->begin(); it != tmpword[0]->end(); it++)
+ {
+ if(*it == L'/')
+ {
+ seenParen++;
+ continue;
+ }
+ if(seenParen == 0)
+ {
+ sl.push_back(*it);
+ }
+ else if(seenParen == 1)
+ {
+ tl.push_back(*it);
+ }
+ else if(seenParen > 1)
+ {
+ break;
+ }
+ }
+ tmpword[0]->assign(sl);
+ tr = pair<wstring, int>(tl, 1);
+ }
else
{
tr = pair<wstring, int>(*tmpword[i], false);
@@ -1962,6 +2020,7 @@
ms.init(me->getInitial());
}
+/* HERE */
void
Transfer::applyWord(wstring const &word_str)
{
@@ -2004,6 +2063,18 @@
}
void
+Transfer::setPreBilingual(bool value)
+{
+ preBilingual = value;
+}
+
+bool
+Transfer::getPreBilingual(void) const
+{
+ return preBilingual;
+}
+
+void
Transfer::setUseBilingual(bool value)
{
useBilingual = value;
------------------------------------------------------------------------------
Write once. Port to many.
Get the SDK and tools to simplify cross-platform app development. Create
new or port existing apps to sell to consumers worldwide. Explore the
Intel AppUpSM program developer opportunity. appdeveloper.intel.com/join
http://p.sf.net/sfu/intel-appdev
_______________________________________________
Apertium-stuff mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/apertium-stuff