This is an automated email from the git hooks/post-receive script. rene pushed a commit to branch master in repository hunspell.
commit 35ec2cc065ffc738f18a67c12d8ebb078de2da0c Author: Rene Engelhard <[email protected]> Date: Wed Mar 28 16:29:52 2007 +0200 Imported Debian patch 1.1.5-3 --- debian/changelog | 11 + debian/control | 27 +++ debian/hunspell-tools.install | 3 + debian/ispellaff2myspell | 472 ++++++++++++++++++++++++++++++++++++++++++ debian/rules | 6 +- 5 files changed, 518 insertions(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 7a1253c..af95a68 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,14 @@ +hunspell (1.1.5-3) experimental; urgency=low + + * add new hunspell-tools package superseding libmyspell-devs tools + (package not built anymore) + - add ispellaff2myspell from libmyspell-dev here + - install munch and unmunch + - conflict and replace (against) libmyspell-dev + * install src/tools/example.cxx + + -- Rene Engelhard <[email protected]> Wed, 28 Mar 2007 16:29:52 +0200 + hunspell (1.1.5-2) experimental; urgency=low * fix hunspell.hxx diff --git a/debian/control b/debian/control index 745867c..b6f4f7d 100644 --- a/debian/control +++ b/debian/control @@ -78,3 +78,30 @@ Description: spell checker and morphological analyzer (program) This package contains the program with the Ispell-like terminal and pipe interfaces. +Package: hunspell-tools +Architecture: any +Depends: ${shlibs:Depends}, ${perl:Depends} +Conflicts: libmyspell-dev +Replaces: libmyspell-dev +Description: tools for hunspell + Hunspell is a spell checker and morphological analyzer library and program + designed for languages with rich morphology and complex word compounding or + character encoding. It is based on MySpell and features an Ispell-like + terminal interface using Curses library, an Ispell pipe interface and an + OpenOffice.org UNO module. + . + Main features: + - Unicode support (first 65535 Unicode character) + - morphological analysis (in custom item and arrangement style) + - Max. 65535 affix classes and twofold affix stripping (for agglutinative + languages, like Azeri, Basque, Estonian, Finnish, Hungarian, Turkish, etc.) + - Support complex compoundings (for example, Hungarian and German) + - Support language specific algorithms (for example, handling Azeri + and Turkish dotted i, or German sharp s) + - Handling conditional affixes, circumfixes, fogemorphemes, + forbidden words, pseudoroots and homonyms. + . + This package contains a the munch/unmunch tools of hunspell and + ispellaff2myspell for converting ispell affix files for myspell/hunspell + format. + diff --git a/debian/hunspell-tools.install b/debian/hunspell-tools.install new file mode 100644 index 0000000..6032946 --- /dev/null +++ b/debian/hunspell-tools.install @@ -0,0 +1,3 @@ +debian/ispellaff2myspell usr/bin +debian/ispellaff2myspell.1 usr/share/man/man1 +debian/tmp/usr/bin/*munch diff --git a/debian/ispellaff2myspell b/debian/ispellaff2myspell new file mode 100644 index 0000000..6f2e79e --- /dev/null +++ b/debian/ispellaff2myspell @@ -0,0 +1,472 @@ +#!/usr/bin/perl -w +# -*- coding: iso-8859-1 -*- +# $Id: ispellaff2myspell,v 1.29 2005/07/04 12:21:55 agmartin Exp $ +# +# (C) 2002-2005 Agustin Martin Domingo <[email protected]> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + +sub usage { + print "ispellaff2myspell: A program to convert ispell affix tables to myspell format +(C) 2002-2005 Agustin Martin Domingo <agustin.martin\@hispalinux.es> License: GPL + +Usage: + ispellaff2myspell [options] <affixfile> + + Options: + --affixfile=s Affix file + --bylocale Use current locale setup for upper/lowercase + conversion + --charset=s Use specified charset for upper/lowercase + conversion (defaults to latin1) + --debug Print debugging info + --extraflags Allow some non alphabetic flags + --lowercase=s Lowercase string + --myheader=s Header file + --printcomments Print commented lines in output + --replacements=s Replacements file + --split=i Split flags with more that i entries + --uppercase=s Uppercase string + --wordlist=s Still unused + + Currently allowed valued for charset are: latin1, latin2, latin3 + +This script does not create the dict file. Something like + +( echo `cat mydict.words+ | wc -l`; cat mydict.words+ ) > mydict.dict + +should do the work, with mydict.words+ being the ispell munched wordlist + +"; + exit; +} + +sub debugprint { + if ( $debug ){ + print STDERR "@_"; + } +} + +sub shipoutflag{ + my $flag_entries=scalar @flag_array; + + if ( $flag_entries != 0 ){ + if ( $split ){ + while ( @flag_array ){ + my @flag_subarray=splice(@flag_array,0,$split); + my $subflag_entries=scalar @flag_subarray; + if ( scalar @flag_array ){ + print "$myaffix $flagname $flagcombine $subflag_entries S\n"; + } else { + print "$myaffix $flagname $flagcombine $subflag_entries\n"; + } + print join("\n",@flag_subarray); + print "\n\n"; + } + } else { + print "$myaffix $flagname $flagcombine $flag_entries\n"; + print join("\n",@flag_array); + print "\n\n"; + } + } + @flag_array=(); + $flagname=''; + $flagcombine=''; +} + +sub mylc{ + my $inputstring=shift; + my $outputstring; + + if ( $bylocale ){ + { + use locale; + $outputstring = lc $inputstring; + } + } else { + if ( $charset eq "latin0" ){ + $lowercase='a-z���������������������������������'; + $uppercase='A-Z�������������������������������'; + } elsif ( $charset eq "latin1" ){ + $lowercase='a-z������������������������������'; + $uppercase='A-Z������������������������������'; + } elsif ( $charset eq "latin2" ){ + $lowercase='a-z����������������������������������������'; + $uppercase='A-Z����������������������������������������'; + } elsif ( $charset eq "latin3" ){ + $lowercase='a-z������������������������������������'; + $uppercase='A-Z������������������������������������'; +# } elsif ( $charset eq "other_charset" ){ +# die "latin2 still unimplemented"; + } else { + if ( not $lowercase and not $uppercase ){ + die "Unsupported charset [$charset] + +use explicitely --lowercase=string and --uppercase=string +options. Remember that both string must match exactly, but +case changed. +"; + } + } + $outputstring=$inputstring; + eval "\$outputstring=~tr/$uppercase/$lowercase/"; + } + return $outputstring; +} + +sub validate_flag (){ + my $flag = shift; + if ($flag=~m/[a-zA-Z]+/){ + return $flag; + } elsif ( $hasextraflags ){ + foreach ( keys %theextraflags ){ + if ($flag =~ m/^$_/){ + $flag =~ s/^$_//; + return $flag; + } + } + } + return ''; +} + +sub process_replacements{ + my $file = shift; + my @replaces = (); + + open (REPLACE,"< $file") || + die "Error: Could not open replacements file: $file\n"; + while (<REPLACE>){ + next unless m/^REP[\s\t]*\D.*/; + next if m/^REP\s+[0-9]+/; + s/\015\012//; + s/\015//; + chomp; + push @replaces, $_; + } + close REPLACE; + my $number = scalar @replaces; + print "REP $number\n"; + foreach ( @replaces ){ + print $_ . "\n"; + } +} + +# ----------------------------------------------------------- +# Now the progran start, after the functions are defined +# ----------------------------------------------------------- + +use Getopt::Long; + +# Initializing option values +$affixfile = ''; +$bylocale = ''; +$charset = ''; +$debug = ''; +$lowercase = ''; +$myheader = ''; +$printcomments = ''; +$replacements = ''; +$split = ''; +$uppercase = ''; +$wordlist = ''; +$hasextraflags = ''; +@flag_array = (); +%theextraflags = (); +# Initializing root values +$rootremove = "0"; +$rootname = ''; +$addtoroot = ''; +$comment = ''; +# Initializing flag values +$flagname = ''; +$flagcombine = ''; +$inflags = ''; + +GetOptions ('affixfile=s' => \$affixfile, + 'bylocale' => \$bylocale, + 'charset=s' => \$charset, + 'debug' => \$debug, + 'extraflags:s' => sub { + $hasextraflags = 1; + shift; + $theflag = shift; + $theextraflags{$theflag}++ if $theflag}, + 'lowercase=s' => \$lowercase, + 'myheader=s' => \$myheader, + 'printcomments' => \$printcomments, + 'replacements=s'=> \$replacements, + 'split=i' => \$split, + 'uppercase=s' => \$uppercase, + 'wordlist=s' => \$wordlist) or usage; + +if ( not $affixfile ){ + $affixfile=shift or usage; +} + +if ( $charset and ( $lowercase or $uppercase )){ + die "Error: charset and lowercase/uppercase options +are incompatible. Use either charset or lowercase/uppercase options to +specify the patterns +" +} elsif ( not $lowercase and not $uppercase and not $charset ){ + $charset="latin1"; +} + +if ( scalar(keys %theextraflags) == 0 && $hasextraflags ){ + $theextraflags{"\\\\"}++; +} + +debugprint "$affixfile $charset"; + +open (AFFIXFILE,"< $affixfile") || + die "Error: Could not open affix file: $affixfile"; + +if ( $myheader ){ + my $myspell_header=`cat $myheader`; + print $myspell_header . "\n"; +} + +while (<AFFIXFILE>){ + chomp; + if (/^\s*\#.*/){ + debugprint "Ignoring line $.\n"; + print "$_\n" if $printcomments; + } elsif (/^\s*$/){ + debugprint "Ignoring line $.\n"; + } elsif (/^\s*prefixes/){ + debugprint "Prefixes starting in line $.\n"; + $affix="PFX"; + } elsif (/^\s*suffixes/){ + debugprint "Suffixes starting in line $.\n"; + $affix="SFX"; + } elsif (/^[\s\t]*flag.*/){ + next if not $affix; # In case we are still in the preamble + shipoutflag if $inflags; + $inflags="yes"; + s/^[\s\t]*flag[\s\t]*//; + s/[\s\t]*:.*$//; + debugprint "Found flag $_ in line $.\n"; + + if (/\*/){ + s/[\*\s]//g; + $flagcombine="Y"; + debugprint "Flag renamed to $_ with combine=$flagcombine\n"; + } else { + $flagcombine="N"; + } + + if ( $flagname = &validate_flag($_) ){ + $myaffix = $affix; + } else { + $myaffix = "\# $affix"; + $flagname = $_; + print STDERR "Ignoring invalid flag $flagname in line $.\n"; + } + } elsif ( $affix and $inflags ) { + ($rootname,@comments) = split('#',$_); + $comment = '# ' . join('#',@comments); + + $rootname =~ s/\s*//g; + $rootname = mylc $rootname; + ($rootname,$addtoroot) = split('>',$rootname); + + if ( $addtoroot =~ s/^\-//g ){ + ($rootremove,$addtoroot) = split(',',$addtoroot); + $addtoroot = "0" unless $addtoroot; + $addtoroot = "0" if ( $addtoroot eq "-"); + } else { + $rootremove = "0"; + } + $addtoroot =~ s/\\\-/\-/g; # prefix ANTI\- to anti- + + if ( $rootname eq '.' && $rootremove ne "0" ){ + $rootname = $rootremove; + } + + debugprint "$rootname, $addtoroot, $rootremove\n"; + if ( $printcomments ){ + $affix_line=sprintf("%s %s %-5s %-11s %-24s %s", + $myaffix, $flagname, $rootremove, + $addtoroot, $rootname, $comment); + } else { + $affix_line=sprintf("%s %s %-5s %-11s %s", + $myaffix, $flagname, $rootremove, + $addtoroot, $rootname); + } + $rootremove = "0"; + $rootname = ''; + $addtoroot = ''; + $comment = ''; + @comments = (); + push @flag_array,$affix_line; + debugprint "$affix_line\n"; + } else { + # + } +} +shipoutflag; + +close AFFIXFILE; + +if ( $replacements ){ + &process_replacements($replacements); +} + +__END__ + +=head1 NAME + +B<ispellaff2myspell> - A program to convert ispell affix tables to myspell format. + +=head1 SYNOPSIS + + ispellaff2myspell [options] <affixfile> --myheader your_header + + Options: + + --affixfile=s Affix file + --bylocale Use current locale setup for upper/lowercase + conversion + --charset=s Use specified charset for upper/lowercase + conversion (defaults to latin1) + --debug Print debugging info + --extraflags=s Allow some non alphabetic flags + --lowercase=s Lowercase string + --myheader=s Header file + --printcomments Print commented lines in output + --replacements=s Replacements file + --split=i Split flags with more that i entries + --uppercase=s Uppercase string + +=head1 DESCRIPTION + +B<ispellaff2myspell> is a script that will convert ispell affix tables +to myspell format in a more or less successful way. + +This script does not create the dict file. Something like + +( echo `cat mydict.words+ | wc -l`; cat mydict.words+ ) > mydict.dict + +should do the work, with mydict.words+ being the munched wordlist + +=head1 OPTIONS + +=over 8 + +=item B<--affixfile=s> + +Affix file. You can put it directly in the command line. + +=item B<--bylocale> + +Use current locale setup for upper/lowercase conversion. Make sure +that the selected locale match the dictionary one, or you might get +into trouble. + +=item B<--charset=s> + +Use specified charset for upper/lowercase conversion (defaults to latin1). +Currently allowed values for charset are: latin0, latin1, latin2, latin3. + +=item B<--debug> + +Print some debugging info. + +=item B<--extraflags:s> + +Allows some non alphabetic flags. + +When invoked with no value the supported flags are currently those +corresponding to chars represented with the escape char B<\> as +first char. B<\> will be stripped. + +When given with the flag prefix will allow that flag and strip the +given prefix. Be careful when giving the prefix to properly escape chars, +e.g. you will need B<-e "\\\\"> or B<-e '\\'> for flags like B<\[> to be stripped to +B<[>. Otherwise you might even get errors. Use B<-e "^"> to allow all +flags and pass them unmodified. + +You will need a call to -e for each flag type, e.g., +B<-e "\\\\" -e "~\\\\"> (or B<-e '\\' -e '~\\'>). + +When a prefix is explicitely set, the default value (anything starting by B<\>) +is disabled and you need to enable it explicitely as in previous example. + +=item B<--lowercase=s> + +Lowercase string. Manually set the string of lowercase chars. This +requires B<--uppercase> having exactly that string but uppercase. + +=item B<--myheader=s> + +Header file. The myspell aff header. You need to write it +manually. This can contain everything you want to be before the affix table + +=item B<--printcomments> + +Print commented lines in output. + +=item B<--replacements=file> + +Add a pre-defined replacements table taken from 'file' to the .aff file. +Will skip lines not beginning with REP, and set the replacements number +appropriately. + +=item B<--split=i> + +Split flags with more that i entries. This can be of interest for flags +having a lot of entries. Will split the flag in chunks containing B<i> +entries. + +=item B<--uppercase=s> + +Uppercase string. Manually set the sring of uppercase chars. This +requires B<--lowercase> having exactly that string but lowercase. + +=back + +If your encoding is currently unsupported you can send me a file with +the two strings of lower and uppercase chars. Note that they must match +exactly but case changed. It will look something like + + $lowercase='a-z������������������������������'; + $uppercase='A-Z������������������������������'; + +=head1 SEE ALSO + +The OpenOffice.org Lingucomponent Project home page + +L<http://lingucomponent.openoffice.org/index.html> + +and the document + +L<http://lingucomponent.openoffice.org/affix.readme> + +that provides information about the basics of the myspell affix file format. + +You can also take a look at + + /usr/share/doc/libmyspell-dev/affix.readme.gz + /usr/share/doc/libmyspell-dev/README.compoundwords + /usr/share/doc/libmyspell-dev/README.replacetable + +in your Debian system. + +=head1 AUTHORS + +Agustin Martin <[email protected]> + +=cut diff --git a/debian/rules b/debian/rules index 92cc2eb..8b687cb 100755 --- a/debian/rules +++ b/debian/rules @@ -46,6 +46,8 @@ build-stamp: config.status $(MAKE) $(MAKE) check + pod2man debian/ispellaff2myspell \ + > debian/ispellaff2myspell.1 touch build-stamp @@ -56,6 +58,7 @@ clean: -$(MAKE) distclean ./debian/rules unpatch + rm -f debian/ispellaff2myspell.1 dh_clean @@ -75,7 +78,7 @@ binary-arch: build install dh_testroot dh_installchangelogs ChangeLog dh_installdocs - dh_installexamples + dh_installexamples src/tools/example.cxx dh_install dh_installman dh_link @@ -85,6 +88,7 @@ binary-arch: build install dh_makeshlibs dh_installdeb dh_shlibdeps + dh_perl dh_gencontrol dh_md5sums dh_builddeb -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-openoffice/hunspell.git

