Below is the full bag of tricks:

#!/bin/bash
# set the language id (name of hunspell dic without extension)
if [ ! $1 ] ; then
  echo "ENTER THE NAME OF THE DICTIONARY FILE WITHOUT .DIC AS A PARAMTER"
else
  if [ -f $1.dic ] ; then
    if [ -f $1.aff ] ; then
      LANG=$1
      # try to unmunch
      echo "UNMUNCHING"
      unmunch $LANG.dic $LANG.aff | sed "s/\/.*$//g" > $LANG.txt
      # use dictionary as input anyway, removing flags
      echo "MAKING DICTIONARY WITHOUT FLAGS"
      cat $LANG.dic | sed "s/\/.*$//g" >> $LANG.tmp
      # add a small modifyer to the dic words to generate more alternatives
      echo "ADDING MODIFIED DICTIONARY ITEMS FOR MORE SUGGESTIONS"
      cat $LANG.tmp >> $LANG.txt
      cat $LANG.tmp | sed "s/.$/x/g" >> $LANG.txt
      rm $LANG.tmp
      # add random words (languages that looks a bit like it or just noise)
      echo "ADDING OTHER WORDS IF AVAILABLE"
      if [ -f other.txt ] ; then
        cat other.txt >> $LANG.txt
      else
        echo "  no other.txt present; add one (from similar language) for better
results"
      fi
      # sorting and getting unique
      echo "MAKING WORDS IN LIST UNIQUE"
      cat $LANG.txt | sort | uniq > $LANG.in
      rm $LANG.txt
      # use the input to generate suggestions
      echo "USING HUNSPELL TO GET SUGGESTIONS (TERRIBLY SLOW!)"
      hunspell -i utf-8 -d $LANG -a $LANG.in | grep "&" > $LANG.suggestions
      # edit the suggestions into words
      echo "ADD SUGGESTIONS TO WORDS LIST"
      cat $LANG.suggestions | sed "s/&.*: //g" | sed "s/, /\n/g" | sort |
uniq >> $LANG.in
      rm $LANG.suggestions
      # get all correct words
      echo "SPELLCHECK ALL WORDS TO GET CORRECT WORDS"
      hunspell -i utf-8 -G -d $LANG $LANG.in > $LANG.okay
    else
      echo "$1.aff IS MISSING"
    fi
  else
    echo "$1.dic  IS MISSING"
  fi
fi




------------------------------------------------------------------------------
_______________________________________________
Languagetool-devel mailing list
Languagetool-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/languagetool-devel

Reply via email to