Source: frog Source-Version: 0.13.7-1 Severity: important Tags: patch Usertags: icu63
Dear Maintainer, ICU 63.1 recently released, packaged and uploaded to experimental. Its transition is going to start soon. However your package fails to build with this version. I attach a patch which fixes the problem. Please check if it works with the version in Sid and upload the package when it's feasible for you. Thanks, Laszlo/GCS
Description: fix FTBFS with ICU 63.1 Add icu namespace. Author: Laszlo Boszormenyi (GCS) <gcs@debian.org> Last-Update: 2018-11-09 --- --- frog-0.13.7.orig/include/frog/mblem_mod.h +++ frog-0.13.7/include/frog/mblem_mod.h @@ -52,7 +52,7 @@ class Mblem { bool init( const TiCC::Configuration& ); void addDeclaration( folia::Document& doc ) const; void Classify( folia::Word * ); - void Classify( const UnicodeString& ); + void Classify( const icu::UnicodeString& ); std::vector<std::pair<std::string,std::string> > getResult() const; void filterTag( const std::string& ); void makeUnique(); @@ -64,8 +64,8 @@ class Mblem { void create_MBlem_defaults(); bool readsettings( const std::string& dir, const std::string& fname ); void addLemma( folia::Word *, const std::string&) ; - std::string make_instance( const UnicodeString& in ); - void getFoLiAResult( folia::Word *, const UnicodeString& ); + std::string make_instance( const icu::UnicodeString& in ); + void getFoLiAResult( folia::Word *, const icu::UnicodeString& ); Timbl::TimblAPI *myLex; std::string punctuation; size_t history; --- frog-0.13.7.orig/include/frog/mbma_brackets.h +++ frog-0.13.7/include/frog/mbma_brackets.h @@ -71,12 +71,12 @@ public: Status status() const { return _status; }; void set_status( const Status s ) { _status = s; }; - virtual UnicodeString morpheme() const { return "";}; + virtual icu::UnicodeString morpheme() const { return "";}; virtual std::string inflection() const { return ""; }; virtual std::string original() const { return ""; }; virtual int infixpos() const { return -1; }; virtual bool isglue() const { return false; }; - virtual UnicodeString put( bool = true ) const; + virtual icu::UnicodeString put( bool = true ) const; virtual BaseBracket *append( BaseBracket * ){ abort(); }; virtual bool isNested() { return false; }; virtual void resolveGlue(){ abort(); }; @@ -102,9 +102,9 @@ public: class BracketLeaf: public BaseBracket { public: BracketLeaf( const RulePart&, int, TiCC::LogStream& ); - BracketLeaf( CLEX::Type, const UnicodeString&, int, TiCC::LogStream& ); - UnicodeString put( bool = true ) const; - UnicodeString morpheme() const { return morph; }; + BracketLeaf( CLEX::Type, const icu::UnicodeString&, int, TiCC::LogStream& ); + icu::UnicodeString put( bool = true ) const; + icu::UnicodeString morpheme() const { return morph; }; std::string inflection() const { return inflect; }; std::string original() const { return orig; }; int infixpos() const { return ifpos; }; @@ -115,7 +115,7 @@ public: private: int ifpos; bool glue; - UnicodeString morph; + icu::UnicodeString morph; std::string orig; std::string inflect; }; @@ -127,7 +127,7 @@ class BracketNest: public BaseBracket { ~BracketNest(); bool isNested() { return true; }; void clearEmptyNodes(); - UnicodeString put( bool = true ) const; + icu::UnicodeString put( bool = true ) const; bool testMatch( std::list<BaseBracket*>& result, const std::list<BaseBracket*>::iterator& rpos, std::list<BaseBracket*>::iterator& bpos ); --- frog-0.13.7.orig/include/frog/mbma_mod.h +++ frog-0.13.7/include/frog/mbma_mod.h @@ -65,7 +65,7 @@ class Mbma { bool init( const TiCC::Configuration& ); void addDeclaration( folia::Document& doc ) const; void Classify( folia::Word * ); - void Classify( const UnicodeString& ); + void Classify( const icu::UnicodeString& ); void filterHeadTag( const std::string& ); void filterSubTags( const std::vector<std::string>& ); void assign_compounds(); @@ -73,8 +73,8 @@ class Mbma { std::vector<std::pair<std::string,std::string>> getResults( ) const; void setDeepMorph( bool b ){ doDeepMorph = b; }; void clearAnalysis(); - Rule* matchRule( const std::vector<std::string>&, const UnicodeString& ); - std::vector<Rule*> execute( const UnicodeString& , + Rule* matchRule( const std::vector<std::string>&, const icu::UnicodeString& ); + std::vector<Rule*> execute( const icu::UnicodeString& , const std::vector<std::string>& ); static std::map<std::string,std::string> TAGconv; static std::string mbma_tagset; @@ -85,10 +85,10 @@ class Mbma { bool readsettings( const std::string&, const std::string& ); void fillMaps(); void init_cgn( const std::string&, const std::string& ); - Transliterator * init_trans(); - UnicodeString filterDiacritics( const UnicodeString& ) const; - void getFoLiAResult( folia::Word *, const UnicodeString& ) const; - std::vector<std::string> make_instances( const UnicodeString& word ); + icu::Transliterator * init_trans(); + icu::UnicodeString filterDiacritics( const icu::UnicodeString& ) const; + void getFoLiAResult( folia::Word *, const icu::UnicodeString& ) const; + std::vector<std::string> make_instances( const icu::UnicodeString& word ); CLEX::Type getFinalTag( const std::list<BaseBracket*>& ); int debugFlag; void addMorph( folia::MorphologyLayer *, @@ -105,7 +105,7 @@ class Mbma { std::vector<Rule*> analysis; std::string version; TiCC::LogStream *mbmaLog; - Transliterator *transliterator; + icu::Transliterator *transliterator; Tokenizer::UnicodeFilter *filter; bool doDeepMorph; }; --- frog-0.13.7.orig/include/frog/mbma_rule.h +++ frog-0.13.7/include/frog/mbma_rule.h @@ -43,11 +43,11 @@ public: void get_edits( const std::string& ); CLEX::Type ResultClass; std::vector<CLEX::Type> RightHand; - UnicodeString ins; - UnicodeString del; - UnicodeString hide; - UnicodeString uchar; - UnicodeString morpheme; + icu::UnicodeString ins; + icu::UnicodeString del; + icu::UnicodeString hide; + icu::UnicodeString uchar; + icu::UnicodeString morpheme; std::string inflect; bool is_affix; bool is_glue; @@ -62,13 +62,13 @@ class BracketNest; class Rule { public: Rule( const std::vector<std::string>&, - const UnicodeString&, + const icu::UnicodeString&, TiCC::LogStream&, int ); ~Rule(); std::vector<std::string> extract_morphemes() const; std::string morpheme_string( bool = false ) const; - UnicodeString getKey( bool ); + icu::UnicodeString getKey( bool ); bool performEdits(); void getCleanInflect(); void reduceZeroNodes(); @@ -77,8 +77,8 @@ public: std::vector<RulePart> rules; int debugFlag; CLEX::Type tag; - UnicodeString sortkey; - UnicodeString orig_word; + icu::UnicodeString sortkey; + icu::UnicodeString orig_word; std::string description; std::string inflection; Compound::Type compound; --- frog-0.13.7.orig/src/Parser.cxx +++ frog-0.13.7/src/Parser.cxx @@ -822,7 +822,7 @@ parseData Parser::prepareParse( const ve string head; string mod; for ( const auto& mwu : mwuv ){ - UnicodeString tmp; + icu::UnicodeString tmp; #pragma omp critical(foliaupdate) { tmp = mwu->text(); @@ -853,7 +853,7 @@ parseData Parser::prepareParse( const ve i += mwuv.size()-1; } else { - UnicodeString tmp; + icu::UnicodeString tmp; #pragma omp critical(foliaupdate) { tmp = word->text(); --- frog-0.13.7.orig/src/iob_tagger_mod.cxx +++ frog-0.13.7/src/iob_tagger_mod.cxx @@ -265,7 +265,7 @@ void IOBTagger::Classify( const vector<W if ( !swords.empty() ) { string sentence; // the tagger needs the whole sentence for ( const auto& sword : swords ){ - UnicodeString word; + icu::UnicodeString word; #pragma omp critical(foliaupdate) { word = sword->text(); --- frog-0.13.7.orig/src/mblem_mod.cxx +++ frog-0.13.7/src/mblem_mod.cxx @@ -169,11 +169,11 @@ Mblem::~Mblem(){ delete mblemLog; } -string Mblem::make_instance( const UnicodeString& in ) { +string Mblem::make_instance( const icu::UnicodeString& in ) { if (debug) { LOG << "making instance from: " << in << endl; } - UnicodeString instance = ""; + icu::UnicodeString instance = ""; size_t length = in.length(); for ( size_t i=0; i < history; i++) { size_t j = length - history + i; @@ -266,7 +266,7 @@ void Mblem::makeUnique( ){ } } -void Mblem::getFoLiAResult( Word *word, const UnicodeString& uWord ){ +void Mblem::getFoLiAResult( Word *word, const icu::UnicodeString& uWord ){ if ( mblemResult.empty() ){ // just return the word as a lemma string result = UnicodeToUTF8( uWord ); @@ -294,7 +294,7 @@ void Mblem::addDeclaration( Document& do void Mblem::Classify( Word *sword ){ if ( sword->isinstance(PlaceHolder_t ) ) return; - UnicodeString uword; + icu::UnicodeString uword; string pos; string token_class; #pragma omp critical(foliaupdate) @@ -322,7 +322,7 @@ void Mblem::Classify( Word *sword ){ // we have to strip a few letters to get a lemma auto const& it2 = it1->second.find( token_class ); if ( it2 != it1->second.end() ){ - uword = UnicodeString( uword, 0, uword.length() - it2->second ); + uword = icu::UnicodeString( uword, 0, uword.length() - it2->second ); string word = UnicodeToUTF8(uword); addLemma( sword, word ); return; @@ -343,7 +343,7 @@ void Mblem::Classify( Word *sword ){ getFoLiAResult( sword, uword ); } -void Mblem::Classify( const UnicodeString& uWord ){ +void Mblem::Classify( const icu::UnicodeString& uWord ){ mblemResult.clear(); string inst = make_instance(uWord); string classString; @@ -360,7 +360,7 @@ void Mblem::Classify( const UnicodeStrin int index = 0; while ( index < numParts ) { string partS = parts[index++]; - UnicodeString lemma; + icu::UnicodeString lemma; string restag; string::size_type pos = partS.find("+"); if ( pos == string::npos ){ @@ -376,9 +376,9 @@ void Mblem::Classify( const UnicodeStrin throw runtime_error( "invalid editstring: " + partS ); restag = edits[0]; // the first one is the POS tag - UnicodeString insstr; - UnicodeString delstr; - UnicodeString prefix; + icu::UnicodeString insstr; + icu::UnicodeString delstr; + icu::UnicodeString prefix; for ( const auto& edit : edits ){ if ( edit == edits.front() ){ continue; @@ -422,7 +422,7 @@ void Mblem::Classify( const UnicodeStrin LOG << "prefixpos = " << prefixpos << endl; } if (prefixpos >= 0) { - lemma = UnicodeString( uWord, 0L, prefixpos ); + lemma = icu::UnicodeString( uWord, 0L, prefixpos ); prefixpos = prefixpos + prefix.length(); } if (debug){ @@ -441,13 +441,13 @@ void Mblem::Classify( const UnicodeStrin lemma += uWord; } else { - UnicodeString part = UnicodeString( uWord, prefixpos, uWord.length() - delstr.length() - prefixpos ); + icu::UnicodeString part = icu::UnicodeString( uWord, prefixpos, uWord.length() - delstr.length() - prefixpos ); lemma += part + insstr; } } else if ( insstr.isEmpty() ){ // no replacement, just take part after the prefix - lemma += UnicodeString( uWord, prefixpos, uWord.length() ); // uWord; + lemma += icu::UnicodeString( uWord, prefixpos, uWord.length() ); // uWord; } else { // but replace if possible --- frog-0.13.7.orig/src/mblem_prog.cxx +++ frog-0.13.7/src/mblem_prog.cxx @@ -168,7 +168,7 @@ void Test( istream& in ){ if ( useTagger ){ vector<TagResult> tagrv = tagger.tagLine( s ); for ( const auto& tr : tagrv ){ - UnicodeString uWord = folia::UTF8ToUnicode(tr.word()); + icu::UnicodeString uWord = folia::UTF8ToUnicode(tr.word()); myMblem.Classify( uWord ); myMblem.filterTag( tr.assignedTag() ); vector<pair<string,string> > res = myMblem.getResult(); @@ -184,7 +184,7 @@ void Test( istream& in ){ vector<string> parts; TiCC::split( s, parts ); for ( const auto& w : parts ){ - UnicodeString uWord = folia::UTF8ToUnicode(w); + icu::UnicodeString uWord = folia::UTF8ToUnicode(w); myMblem.Classify( uWord ); vector<pair<string,string> > res = myMblem.getResult(); string line = w + "\t"; --- frog-0.13.7.orig/src/mbma_brackets.cxx +++ frog-0.13.7/src/mbma_brackets.cxx @@ -283,7 +283,7 @@ BracketLeaf::BracketLeaf( const RulePart } BracketLeaf::BracketLeaf( CLEX::Type t, - const UnicodeString& us, + const icu::UnicodeString& us, int flag, LogStream& l ): BaseBracket( t, vector<CLEX::Type>(), flag, l ), @@ -314,17 +314,17 @@ BracketNest::~BracketNest(){ } } -UnicodeString BaseBracket::put( bool full ) const { - UnicodeString result = "[err?]"; +icu::UnicodeString BaseBracket::put( bool full ) const { + icu::UnicodeString result = "[err?]"; if ( full ){ - UnicodeString s = UTF8ToUnicode(toString(cls)); + icu::UnicodeString s = UTF8ToUnicode(toString(cls)); result += s; } return result; } -UnicodeString BracketLeaf::put( bool full ) const { - UnicodeString result; +icu::UnicodeString BracketLeaf::put( bool full ) const { + icu::UnicodeString result; if ( !morph.isEmpty() ){ result += "["; result += morph; @@ -341,10 +341,10 @@ UnicodeString BracketLeaf::put( bool ful return result; } -UnicodeString BracketNest::put( bool full ) const { - UnicodeString result = "[ "; +icu::UnicodeString BracketNest::put( bool full ) const { + icu::UnicodeString result = "[ "; for ( auto const& it : parts ){ - UnicodeString m = it->put( full ); + icu::UnicodeString m = it->put( full ); if ( !m.isEmpty() ){ result += m + " "; } --- frog-0.13.7.orig/src/mbma_mod.cxx +++ frog-0.13.7/src/mbma_mod.cxx @@ -109,9 +109,9 @@ void Mbma::init_cgn( const string& main, } } -Transliterator *Mbma::init_trans( ){ +icu::Transliterator *Mbma::init_trans( ){ UErrorCode stat = U_ZERO_ERROR; - Transliterator *t = Transliterator::createInstance( "NFD; [:M:] Remove; NFC", + icu::Transliterator *t = icu::Transliterator::createInstance( "NFD; [:M:] Remove; NFC", UTRANS_FORWARD, stat ); if ( U_FAILURE( stat ) ){ @@ -213,14 +213,14 @@ void Mbma::cleanUp(){ clearAnalysis(); } -vector<string> Mbma::make_instances( const UnicodeString& word ){ +vector<string> Mbma::make_instances( const icu::UnicodeString& word ){ vector<string> insts; insts.reserve( word.length() ); for ( long i=0; i < word.length(); ++i ) { if (debugFlag > 10){ LOG << "itt #:" << i << endl; } - UnicodeString inst; + icu::UnicodeString inst; for ( long j=i ; j <= i + RIGHT + LEFT; ++j ) { if (debugFlag > 10){ LOG << " " << j-LEFT << ": "; @@ -308,7 +308,7 @@ void Mbma::clearAnalysis(){ } Rule* Mbma::matchRule( const std::vector<std::string>& ana, - const UnicodeString& word ){ + const icu::UnicodeString& word ){ Rule *rule = new Rule( ana, word, *mbmaLog, debugFlag ); if ( rule->performEdits() ){ rule->reduceZeroNodes(); @@ -340,7 +340,7 @@ Rule* Mbma::matchRule( const std::vector } } -vector<Rule*> Mbma::execute( const UnicodeString& word, +vector<Rule*> Mbma::execute( const icu::UnicodeString& word, const vector<string>& classes ){ vector<vector<string> > allParts = generate_all_perms( classes ); if ( debugFlag ){ @@ -683,9 +683,9 @@ void Mbma::filterSubTags( const vector<s // we still might have doubles. (different Rule's yielding the same result) // reduce these // - map<UnicodeString, Rule*> unique; + map<icu::UnicodeString, Rule*> unique; for ( const auto& ait : highConf ){ - UnicodeString tmp = ait->getKey( doDeepMorph ); + icu::UnicodeString tmp = ait->getKey( doDeepMorph ); unique[tmp] = ait; } // so now we have map of 'equal' analysis. @@ -737,7 +737,7 @@ void Mbma::assign_compounds(){ } } -void Mbma::getFoLiAResult( Word *fword, const UnicodeString& uword ) const { +void Mbma::getFoLiAResult( Word *fword, const icu::UnicodeString& uword ) const { if ( analysis.size() == 0 ){ // fallback option: use the word and pretend it's a morpheme ;-) if ( debugFlag ){ @@ -779,9 +779,9 @@ void Mbma::addDeclaration( Document& doc } } -UnicodeString Mbma::filterDiacritics( const UnicodeString& in ) const { +icu::UnicodeString Mbma::filterDiacritics( const icu::UnicodeString& in ) const { if ( transliterator ){ - UnicodeString result = in; + icu::UnicodeString result = in; transliterator->transliterate( result ); return result; } @@ -794,7 +794,7 @@ void Mbma::Classify( Word* sword ){ if ( sword->isinstance(PlaceHolder_t) ){ return; } - UnicodeString uWord; + icu::UnicodeString uWord; PosAnnotation *pos; string head; string token_class; @@ -826,7 +826,7 @@ void Mbma::Classify( Word* sword ){ } } else { - UnicodeString lWord = uWord; + icu::UnicodeString lWord = uWord; if ( head != "SPEC" ){ lWord.toLower(); } @@ -846,9 +846,9 @@ void Mbma::Classify( Word* sword ){ } } -void Mbma::Classify( const UnicodeString& word ){ +void Mbma::Classify( const icu::UnicodeString& word ){ clearAnalysis(); - UnicodeString uWord = filterDiacritics( word ); + icu::UnicodeString uWord = filterDiacritics( word ); vector<string> insts = make_instances( uWord ); vector<string> classes; classes.reserve( insts.size() ); --- frog-0.13.7.orig/src/mbma_prog.cxx +++ frog-0.13.7/src/mbma_prog.cxx @@ -187,7 +187,7 @@ void Test( istream& in ){ if ( useTagger ){ vector<TagResult> tagv = tagger.tagLine( s ); for ( const auto& tr : tagv ){ - UnicodeString uWord = folia::UTF8ToUnicode( tr.word() ); + icu::UnicodeString uWord = folia::UTF8ToUnicode( tr.word() ); vector<string> v; size_t num = TiCC::split_at_first_of( tr.assignedTag(), v, "(,)" ); @@ -225,7 +225,7 @@ void Test( istream& in ){ vector<string> parts; TiCC::split( s, parts ); for ( auto const& w : parts ){ - UnicodeString uWord = folia::UTF8ToUnicode(w); + icu::UnicodeString uWord = folia::UTF8ToUnicode(w); uWord.toLower(); myMbma.Classify( uWord ); myMbma.assign_compounds(); --- frog-0.13.7.orig/src/mbma_rule.cxx +++ frog-0.13.7/src/mbma_rule.cxx @@ -236,7 +236,7 @@ RulePart::RulePart( const string& rs, co } Rule::Rule( const vector<string>& parts, - const UnicodeString& s, + const icu::UnicodeString& s, TiCC::LogStream& ls, int flag ): debugFlag( flag ), @@ -301,7 +301,7 @@ vector<string> Rule::extract_morphemes( vector<string> morphemes; morphemes.reserve( rules.size() ); for ( const auto& it : rules ){ - UnicodeString morpheme = it.morpheme; + icu::UnicodeString morpheme = it.morpheme; if ( !morpheme.isEmpty() ){ morphemes.push_back( UnicodeToUTF8(morpheme) ); } @@ -312,7 +312,7 @@ vector<string> Rule::extract_morphemes( string Rule::morpheme_string( bool structured ) const { string result; if ( structured ){ - UnicodeString us = brackets->put(true); + icu::UnicodeString us = brackets->put(true); result = UnicodeToUTF8( us ); } else { @@ -343,7 +343,7 @@ bool Rule::performEdits(){ for ( int j=0; j < cur->del.length(); ++j ){ if ( (k + j) < rules.size() ){ if ( rules[k+j].uchar != cur->del[j] ){ - UnicodeString tmp(cur->del[j]); + icu::UnicodeString tmp(cur->del[j]); LOG << "Hmm: deleting " << cur->del << " is impossible. (" << rules[k+j].uchar << " != " << tmp << ")." << endl; @@ -352,7 +352,7 @@ bool Rule::performEdits(){ } } else { - UnicodeString tmp(cur->del[j]); + icu::UnicodeString tmp(cur->del[j]); LOG << "Hmm: deleting " << cur->del << " is impossible. (beyond end of the rule)" << endl; @@ -370,7 +370,7 @@ bool Rule::performEdits(){ } bool inserted = false; - UnicodeString part; // store to-be-inserted particles here! + icu::UnicodeString part; // store to-be-inserted particles here! if ( !cur->hide.isEmpty() ){ last->morpheme += cur->uchar; // add to prevvoius morheme cur->uchar = ""; @@ -490,10 +490,10 @@ void Rule::resolve_inflections(){ } } -UnicodeString Rule::getKey( bool deep ){ +icu::UnicodeString Rule::getKey( bool deep ){ if ( deep ){ if ( sortkey.isEmpty() ){ - UnicodeString tmp; + icu::UnicodeString tmp; stringstream ss; ss << brackets << endl; tmp = UTF8ToUnicode(ss.str()); @@ -503,7 +503,7 @@ UnicodeString Rule::getKey( bool deep ){ } else { vector<string> morphs = extract_morphemes(); - UnicodeString tmp; + icu::UnicodeString tmp; // create an unique string for ( auto const& mor : morphs ){ tmp += UTF8ToUnicode(mor) + "++"; --- frog-0.13.7.orig/src/mwu_chunker_mod.cxx +++ frog-0.13.7/src/mwu_chunker_mod.cxx @@ -116,7 +116,7 @@ void Mwu::reset(){ } void Mwu::add( Word *word ){ - UnicodeString tmp; + icu::UnicodeString tmp; #pragma omp critical(foliaupdate) { tmp = word->text(); --- frog-0.13.7.orig/src/ner_tagger_mod.cxx +++ frog-0.13.7/src/ner_tagger_mod.cxx @@ -404,7 +404,7 @@ void NERTagger::Classify( const vector<W vector<string> words; string sentence; // the tagger needs the whole sentence for ( const auto& sw : swords ){ - UnicodeString word; + icu::UnicodeString word; #pragma omp critical(foliaupdate) { word = sw->text(); --- frog-0.13.7.orig/src/pos_tagger_mod.cxx +++ frog-0.13.7/src/pos_tagger_mod.cxx @@ -250,7 +250,7 @@ void POSTagger::Classify( const vector<W if ( !swords.empty() ) { string sentence; // the tagger needs the whole sentence for ( size_t w = 0; w < swords.size(); ++w ) { - UnicodeString word; + icu::UnicodeString word; #pragma omp critical(foliaupdate) { word = swords[w]->text();