Below is the full bag of tricks: #!/bin/bash # set the language id (name of hunspell dic without extension) if [ ! $1 ] ; then echo "ENTER THE NAME OF THE DICTIONARY FILE WITHOUT .DIC AS A PARAMTER" else if [ -f $1.dic ] ; then if [ -f $1.aff ] ; then LANG=$1 # try to unmunch echo "UNMUNCHING" unmunch $LANG.dic $LANG.aff | sed "s/\/.*$//g" > $LANG.txt # use dictionary as input anyway, removing flags echo "MAKING DICTIONARY WITHOUT FLAGS" cat $LANG.dic | sed "s/\/.*$//g" >> $LANG.tmp # add a small modifyer to the dic words to generate more alternatives echo "ADDING MODIFIED DICTIONARY ITEMS FOR MORE SUGGESTIONS" cat $LANG.tmp >> $LANG.txt cat $LANG.tmp | sed "s/.$/x/g" >> $LANG.txt rm $LANG.tmp # add random words (languages that looks a bit like it or just noise) echo "ADDING OTHER WORDS IF AVAILABLE" if [ -f other.txt ] ; then cat other.txt >> $LANG.txt else echo " no other.txt present; add one (from similar language) for better results" fi # sorting and getting unique echo "MAKING WORDS IN LIST UNIQUE" cat $LANG.txt | sort | uniq > $LANG.in rm $LANG.txt # use the input to generate suggestions echo "USING HUNSPELL TO GET SUGGESTIONS (TERRIBLY SLOW!)" hunspell -i utf-8 -d $LANG -a $LANG.in | grep "&" > $LANG.suggestions # edit the suggestions into words echo "ADD SUGGESTIONS TO WORDS LIST" cat $LANG.suggestions | sed "s/&.*: //g" | sed "s/, /\n/g" | sort | uniq >> $LANG.in rm $LANG.suggestions # get all correct words echo "SPELLCHECK ALL WORDS TO GET CORRECT WORDS" hunspell -i utf-8 -G -d $LANG $LANG.in > $LANG.okay else echo "$1.aff IS MISSING" fi else echo "$1.dic IS MISSING" fi fi
------------------------------------------------------------------------------ _______________________________________________ Languagetool-devel mailing list Languagetool-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-devel