[Moses-support] Add a new LM feature in Moses

HOANG Cong Duy Vu Thu, 14 Aug 2014 19:47:10 -0700

Hi,

I would like to add a new simple LM named HybLanguageModelKen (HybKen.h and
HybKen.cpp) which will inherit from LanguageModelKen.


In Factory.cpp, I added as follows:

...
//#include "moses/LM/Ken.h"
#include "moses/LM/HybKen.h"
...

class KenFactory : public FeatureFactory
{
public:
  void Create(const std::string &line) {
    DefaultSetup(ConstructKenLM(line));
  }
};

class HybKenFactory : public FeatureFactory
{
public:
  void Create(const std::string &line) {
    DefaultSetup(ConstructHybKenLM(line));
  }
};

...
Add("KENLM", new KenFactory());

Add("HKENLM", new HybKenFactory());

...

I've created HybKen.h as follows:

#ifndef moses_LanguageModelHybKen_h
#define moses_LanguageModelHybKen_h

//#include <string>
//#include <boost/shared_ptr.hpp>

//#include "lm/word_index.hh"

//#include "moses/LM/Base.h"
//#include "moses/Hypothesis.h"
//#include "moses/TypeDef.h"
//#include "moses/Word.h"

#include "moses/LM/Ken.h"
namespace Moses
{

LanguageModel *ConstructHybKenLM(const std::string &line);

//! This will also load. Returns a templated KenLM class
LanguageModel *ConstructHybKenLM(const std::string &line, const std::string
&file, const std::string &fileM, FactorType factorType, bool lazy);

void LoadMapping(const std::string &f, std::map<std::string, std::string>&
m);

/*
 * An implementation of single factor LM using Kenneth's code.
 */
template <class Model> class LanguageModelHybKen : public
LanguageModelKen<Model>
{
...

Factory.cpp, HybKen.h and HybKen.cpp are attached for your reference.

But I always got the compilation error message: "*moses/FF/Factory.cpp:166:
error: undefined reference to 'Moses::ConstructHybKenLM(std::string const&)*
'".
I understand that Moses::ConstructHybKenLM(std::string const&) is already
defined in Moses namespace.

May I ask for your help?

Thank you!

--
Cheers,
Vu

#include "moses/FF/Factory.h"
#include "moses/StaticData.h"

#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryScope3.h"
#include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"

#include "moses/FF/LexicalReordering/LexicalReordering.h"

#include "moses/FF/BleuScoreFeature.h"
#include "moses/FF/TargetWordInsertionFeature.h"
#include "moses/FF/SourceWordDeletionFeature.h"
#include "moses/FF/GlobalLexicalModel.h"
#include "moses/FF/GlobalLexicalModelUnlimited.h"
#include "moses/FF/UnknownWordPenaltyProducer.h"
#include "moses/FF/WordTranslationFeature.h"
#include "moses/FF/TargetBigramFeature.h"
#include "moses/FF/TargetNgramFeature.h"
#include "moses/FF/PhraseBoundaryFeature.h"
#include "moses/FF/PhrasePairFeature.h"
#include "moses/FF/PhraseLengthFeature.h"
#include "moses/FF/DistortionScoreProducer.h"
#include "moses/FF/SparseHieroReorderingFeature.h"
#include "moses/FF/WordPenaltyProducer.h"
#include "moses/FF/InputFeature.h"
#include "moses/FF/PhrasePenalty.h"
#include "moses/FF/OSM-Feature/OpSequenceModel.h"
#include "moses/FF/ControlRecombination.h"
#include "moses/FF/ExternalFeature.h"
#include "moses/FF/ConstrainedDecoding.h"
#include "moses/FF/CoveredReferenceFeature.h"
#include "moses/FF/TreeStructureFeature.h"
#include "moses/FF/SoftMatchingFeature.h"
#include "moses/FF/SourceGHKMTreeInputMatchFeature.h"
#include "moses/FF/HyperParameterAsWeight.h"
#include "moses/FF/SetSourcePhrase.h"
#include "CountNonTerms.h"
#include "ReferenceComparison.h"
#include "RuleScope.h"
#include "MaxSpanFreeNonTermSource.h"
#include "NieceTerminal.h"
#include "SpanLength.h"
#include "SyntaxRHS.h"
#include "SkeletonChangeInput.h"

#include "moses/FF/SkeletonStatelessFF.h"
#include "moses/FF/SkeletonStatefulFF.h"
#include "moses/LM/SkeletonLM.h"
#include "moses/TranslationModel/SkeletonPT.h"

#ifdef HAVE_CMPH
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
#endif
#ifdef PT_UG
#include "moses/TranslationModel/UG/mmsapt.h"
#endif
#ifdef HAVE_PROBINGPT
#include "moses/TranslationModel/ProbingPT/ProbingPT.h"
#endif

//#include "moses/LM/Ken.h"
#include "moses/LM/HybKen.h"

#ifdef LM_IRST
#include "moses/LM/IRST.h"
#endif

#ifdef LM_SRI
#include "moses/LM/SRI.h"
#endif

#ifdef LM_MAXENT_SRI
#include "moses/LM/MaxEntSRI.h"
#endif

#ifdef LM_RAND
#include "moses/LM/Rand.h"
#endif

#ifdef HAVE_SYNLM
#include "moses/SyntacticLanguageModel.h"
#endif

#ifdef LM_NEURAL
#include "moses/LM/NeuralLMWrapper.h"
#endif

#ifdef LM_DALM
#include "moses/LM/DALMWrapper.h"
#endif

#ifdef LM_LBL
#include "moses/LM/oxlm/LBLLM.h"
#endif

#include "ExampleSLFF.h"

#include "ExampleSFFF.h"

#include "util/exception.hh"

#include <vector>

namespace Moses
{

class FeatureFactory
{
public:
  virtual ~FeatureFactory() {}

  virtual void Create(const std::string &line) = 0;

protected:
  template <class F> static void DefaultSetup(F *feature);

  FeatureFactory() {}
};

template <class F> void FeatureFactory::DefaultSetup(F *feature)
{
  StaticData &static_data = StaticData::InstanceNonConst();
  const string &featureName = feature->GetScoreProducerDescription();
  std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);

  if (feature->IsTuneable() || weights.size()) {
    // if it's tuneable, ini file MUST have weights
    // even it it's not tuneable, people can still set the weights in the ini file
    static_data.SetWeights(feature, weights);
  } else if (feature->GetNumScoreComponents() > 0) {
    std::vector<float> defaultWeights = feature->DefaultWeights();
    static_data.SetWeights(feature, defaultWeights);
  }
}

namespace
{

template <class F> class DefaultFeatureFactory : public FeatureFactory
{
public:
  void Create(const std::string &line) {
    DefaultSetup(new F(line));
  }
};

class KenFactory : public FeatureFactory
{
public:
  void Create(const std::string &line) {
    DefaultSetup(ConstructKenLM(line));
  }
};

class HybKenFactory : public FeatureFactory
{
public:
  void Create(const std::string &line) {
    DefaultSetup(ConstructHybKenLM(line));
  }
};

} // namespace

FeatureRegistry::FeatureRegistry()
{
// Feature with same name as class
#define MOSES_FNAME(name) Add(#name, new DefaultFeatureFactory< name >());
// Feature with different name than class.
#define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >());

  MOSES_FNAME2("PhraseDictionaryBinary", PhraseDictionaryTreeAdaptor);
  MOSES_FNAME(PhraseDictionaryOnDisk);
  MOSES_FNAME(PhraseDictionaryMemory);
  MOSES_FNAME(PhraseDictionaryScope3);
  MOSES_FNAME(PhraseDictionaryMultiModel);
  MOSES_FNAME(PhraseDictionaryMultiModelCounts);
  MOSES_FNAME(PhraseDictionaryALSuffixArray);
  MOSES_FNAME(PhraseDictionaryDynSuffixArray);
  MOSES_FNAME(PhraseDictionaryTransliteration);
  MOSES_FNAME(PhraseDictionaryFuzzyMatch);

  MOSES_FNAME(GlobalLexicalModel);
  //MOSES_FNAME(GlobalLexicalModelUnlimited); This was commented out in the original
  MOSES_FNAME(SourceWordDeletionFeature);
  MOSES_FNAME(TargetWordInsertionFeature);
  MOSES_FNAME(PhraseBoundaryFeature);
  MOSES_FNAME(PhraseLengthFeature);
  MOSES_FNAME(WordTranslationFeature);
  MOSES_FNAME(TargetBigramFeature);
  MOSES_FNAME(TargetNgramFeature);
  MOSES_FNAME(PhrasePairFeature);
  MOSES_FNAME(LexicalReordering);
  MOSES_FNAME2("Generation", GenerationDictionary);
  MOSES_FNAME(BleuScoreFeature);
  MOSES_FNAME2("Distortion", DistortionScoreProducer);
  MOSES_FNAME2("WordPenalty", WordPenaltyProducer);
  MOSES_FNAME(InputFeature);
  MOSES_FNAME(OpSequenceModel);
  MOSES_FNAME(PhrasePenalty);
  MOSES_FNAME2("UnknownWordPenalty", UnknownWordPenaltyProducer);
  MOSES_FNAME(ControlRecombination);
  MOSES_FNAME(ConstrainedDecoding);
  MOSES_FNAME(CoveredReferenceFeature);
  MOSES_FNAME(ExternalFeature);
  MOSES_FNAME(SourceGHKMTreeInputMatchFeature);
  MOSES_FNAME(TreeStructureFeature);
  MOSES_FNAME(SoftMatchingFeature);
  MOSES_FNAME(HyperParameterAsWeight);
  MOSES_FNAME(SetSourcePhrase);
  MOSES_FNAME(CountNonTerms);
  MOSES_FNAME(ReferenceComparison);
  MOSES_FNAME(RuleScope);
  MOSES_FNAME(MaxSpanFreeNonTermSource);
  MOSES_FNAME(NieceTerminal);
  MOSES_FNAME(SparseHieroReorderingFeature);
  MOSES_FNAME(SpanLength);
  MOSES_FNAME(SyntaxRHS);
  MOSES_FNAME(SkeletonChangeInput);

  MOSES_FNAME(SkeletonStatelessFF);
  MOSES_FNAME(SkeletonStatefulFF);
  MOSES_FNAME(SkeletonLM);
  MOSES_FNAME(SkeletonPT);

  MOSES_FNAME2("ExampleSLFF", ExampleSLFF);//stateless feature function example
  MOSES_FNAME2("ExampleSFFF", ExampleSFFF);//stateful feature function example

#ifdef HAVE_CMPH
  MOSES_FNAME(PhraseDictionaryCompact);
#endif
#ifdef PT_UG
  MOSES_FNAME(Mmsapt);
  MOSES_FNAME2("PhraseDictionaryBitextSampling",Mmsapt); // that's an alias for Mmsapt!
#endif
#ifdef HAVE_PROBINGPT
  MOSES_FNAME(ProbingPT);
#endif

#ifdef HAVE_SYNLM
  MOSES_FNAME(SyntacticLanguageModel);
#endif

#ifdef LM_IRST
  MOSES_FNAME2("IRSTLM", LanguageModelIRST);
#endif

#ifdef LM_SRI
  MOSES_FNAME2("SRILM", LanguageModelSRI);
#endif

#ifdef LM_MAXENT_SRI
  MOSES_FNAME2("MaxEntLM", LanguageModelMaxEntSRI);
#endif
#ifdef LM_RAND
  MOSES_FNAME2("RANDLM", LanguageModelRandLM);
#endif
#ifdef LM_NEURAL
  MOSES_FNAME2("NeuralLM", NeuralLMWrapper);
#endif
#ifdef LM_DALM
  MOSES_FNAME2("DALM", LanguageModelDALM);
#endif
#ifdef LM_LBL
  MOSES_FNAME2("LBLLM-LM", LBLLM<oxlm::LM>);
  MOSES_FNAME2("LBLLM-FactoredLM", LBLLM<oxlm::FactoredLM>);
  MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM<oxlm::FactoredMaxentLM>);
#endif

  Add("KENLM", new KenFactory());

  Add("HKENLM", new HybKenFactory());
}

FeatureRegistry::~FeatureRegistry()
{
}

void FeatureRegistry::Add(const std::string &name, FeatureFactory *factory)
{
  std::pair<std::string, boost::shared_ptr<FeatureFactory> > to_ins(name, boost::shared_ptr<FeatureFactory>(factory));
  UTIL_THROW_IF2(!registry_.insert(to_ins).second, "Duplicate feature name " << name);
}

namespace
{
class UnknownFeatureException : public util::Exception {};
}

void FeatureRegistry::Construct(const std::string &name, const std::string &line)
{
  Map::iterator i = registry_.find(name);
  UTIL_THROW_IF(i == registry_.end(), UnknownFeatureException, "Feature name " << name << " is not registered.");
  i->second->Create(line);
}

void FeatureRegistry::PrintFF() const
{
	vector<string> ffs;
	std::cerr << "Available feature functions:" << std::endl;
	Map::const_iterator iter;
	for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
		const string &ffName = iter->first;
		ffs.push_back(ffName);
	}

	vector<string>::const_iterator iterVec;
	std::sort(ffs.begin(), ffs.end());
	for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
		const string &ffName = *iterVec;
		std::cerr << ffName << " ";
	}

	std::cerr << std::endl;
}

} // namespace Moses

// $Id$

/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#include <cstring>
#include <iostream>
#include <memory>
#include <stdlib.h>
#include <boost/shared_ptr.hpp>

#include "lm/binary_format.hh"
#include "lm/enumerate_vocab.hh"
#include "lm/left.hh"
#include "lm/model.hh"
#include "util/exception.hh"

#include "HybKen.h"
#include "Base.h"
#include "moses/FF/FFState.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
#include "moses/FactorCollection.h"
#include "moses/Phrase.h"
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/ChartHypothesis.h"
#include "moses/Incremental.h"
#include "moses/UserMessage.h"

using namespace std;

namespace Moses
{

template <class Model> LanguageModelHybKen<Model>::LanguageModelHybKen(const std::string &line, const std::string &file, const std::string& fileM, FactorType factorType, bool lazy)
  :LanguageModelKen(line)
  ,m_factorType(factorType)
{
  lm::ngram::Config config;
  IFVERBOSE(1) {
    config.messages = &std::cerr;
  }
  else {
    config.messages = NULL;
  }
  FactorCollection &collection = FactorCollection::Instance();
  MappingBuilder builder(collection, m_lmIdLookup);
  config.enumerate_vocab = &builder;
  config.load_method = lazy ? util::LAZY : util::POPULATE_OR_READ;

  m_ngram.reset(new Model(file.c_str(), config));

  m_beginSentenceFactor = collection.AddFactor(BOS_);

  //load mapping file (fileMapping)  
  LoadMapping(fileM, m_mapW2P);
}

template <class Model> LanguageModelHybKen<Model>::LanguageModelHybKen(const LanguageModelHybKen<Model> &copy_from)
  :LanguageModel(copy_from.GetArgLine()),
   m_ngram(copy_from.m_ngram),
// TODO: don't copy this.
   m_lmIdLookup(copy_from.m_lmIdLookup),
   m_factorType(copy_from.m_factorType),
   m_beginSentenceFactor(copy_from.m_beginSentenceFactor),
   m_mapW2P(copy_from.m_mapW2P)
{
}

Word LanguageModelHybKen<Model>::GetTag(const Word& word)
{
   std::map<string, string>::iterator iter;
   if ((iter = m_mapW2P.find((std::string)word)) != m_mapW2P.end())//found
     return (Word)iter->second;

   return word;//otherwise
}

template <class Model> void LanguageModelHybKen<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
{
  fullScore = 0;
  ngramScore = 0;
  oovCount = 0;

  if (!phrase.GetSize()) return;

  lm::ngram::ChartState discarded_sadly;
  lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);

  size_t position;
  if (m_beginSentenceFactor == GetTag(phrase.GetWord(0)).GetFactor(m_factorType)) {
    scorer.BeginSentence();
    position = 1;
  } else {
    position = 0;
  }

  size_t ngramBoundary = m_ngram->Order() - 1;

  size_t end_loop = std::min(ngramBoundary, phrase.GetSize());
  for (; position < end_loop; ++position) {
    const Word &word = GetTag(phrase.GetWord(position));
    if (word.IsNonTerminal()) {
      fullScore += scorer.Finish();
      scorer.Reset();
    } else {
      lm::WordIndex index = TranslateID(word);
      scorer.Terminal(index);
      if (!index) ++oovCount;
    }
  }
  float before_boundary = fullScore + scorer.Finish();
  for (; position < phrase.GetSize(); ++position) {
    const Word &word = GetTag(phrase.GetWord(position));
    if (word.IsNonTerminal()) {
      fullScore += scorer.Finish();
      scorer.Reset();
    } else {
      lm::WordIndex index = TranslateID(word);
      scorer.Terminal(index);
      if (!index) ++oovCount;
    }
  }
  fullScore += scorer.Finish();

  ngramScore = TransformLMScore(fullScore - before_boundary);
  fullScore = TransformLMScore(fullScore);
}

template <class Model> FFState *LanguageModelHybKen<Model>::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
{
  const lm::ngram::State &in_state = static_cast<const KenLMState&>(*ps).state;

  std::auto_ptr<KenLMState> ret(new KenLMState());

  if (!hypo.GetCurrTargetLength()) {
    ret->state = in_state;
    return ret.release();
  }

  const std::size_t begin = hypo.GetCurrTargetWordsRange().GetStartPos();
  //[begin, end) in STL-like fashion.
  const std::size_t end = hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
  const std::size_t adjust_end = std::min(end, begin + m_ngram->Order() - 1);

  std::size_t position = begin;
  typename Model::State aux_state;
  typename Model::State *state0 = &ret->state, *state1 = &aux_state;

  float score = m_ngram->Score(in_state, TranslateID(GetTag(hypo.GetWord(position))), *state0);
  ++position;
  for (; position < adjust_end; ++position) {
    score += m_ngram->Score(*state0, TranslateID(GetTag(hypo.GetWord(position))), *state1);
    std::swap(state0, state1);
  }

  if (hypo.IsSourceCompleted()) {
    // Score end of sentence.
    std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
    const lm::WordIndex *last = LastIDs(hypo, &indices.front());
    score += m_ngram->FullScoreForgotState(&indices.front(), last, m_ngram->GetVocabulary().EndSentence(), ret->state).prob;
  } else if (adjust_end < end) {
    // Get state after adding a long phrase.
    std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
    const lm::WordIndex *last = LastIDs(hypo, &indices.front());
    m_ngram->GetState(&indices.front(), last, ret->state);
  } else if (state0 != &ret->state) {
    // Short enough phrase that we can just reuse the state.
    ret->state = *state0;
  }

  score = TransformLMScore(score);

  if (OOVFeatureEnabled()) {
    std::vector<float> scores(2);
    scores[0] = score;
    scores[1] = 0.0;
    out->PlusEquals(this, scores);
  } else {
    out->PlusEquals(this, score);
  }

  return ret.release();
}

template <class Model> FFState *LanguageModelHybKen<Model>::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const
{
  LanguageModelChartStateKenLM *newState = new LanguageModelChartStateKenLM();
  lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState->GetChartState());
  const TargetPhrase &target = hypo.GetCurrTargetPhrase();
  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
    target.GetAlignNonTerm().GetNonTermIndexMap();

  const size_t size = hypo.GetCurrTargetPhrase().GetSize();
  size_t phrasePos = 0;
  // Special cases for first word.
  if (size) {
    const Word &word = GetTag(hypo.GetCurrTargetPhrase().GetWord(0));
    if (word.GetFactor(m_factorType) == m_beginSentenceFactor) {
      // Begin of sentence
      ruleScore.BeginSentence();
      phrasePos++;
    } else if (word.IsNonTerminal()) {
      // Non-terminal is first so we can copy instead of rescoring.
      const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
      const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetFFState(featureID))->GetChartState();
      float prob = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
      ruleScore.BeginNonTerminal(prevState, prob);
      phrasePos++;
    }
  }

  for (; phrasePos < size; phrasePos++) {
    const Word &word = GetTag(hypo.GetCurrTargetPhrase().GetWord(phrasePos));
    if (word.IsNonTerminal()) {
      const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
      const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetFFState(featureID))->GetChartState();
      float prob = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
      ruleScore.NonTerminal(prevState, prob);
    } else {
      ruleScore.Terminal(TranslateID(word));
    }
  }

  float score = ruleScore.Finish();
  score = TransformLMScore(score);
  accumulator->Assign(this, score);
  return newState;
}

template <class Model> void LanguageModelHybKen<Model>::ReportHistoryOrder(std::ostream &out, const Phrase &phrase) const
{
  out << "|lm=(";
  if (!phrase.GetSize()) return;

  typename Model::State aux_state;
  typename Model::State start_of_sentence_state = m_ngram->BeginSentenceState();
  typename Model::State *state0 = &start_of_sentence_state;
  typename Model::State *state1 = &aux_state;

  for (std::size_t position=0; position<phrase.GetSize(); position++) {
    const lm::WordIndex idx = TranslateID(GetTag(phrase.GetWord(position)));
    lm::FullScoreReturn ret(m_ngram->FullScore(*state0, idx, *state1));
    if (position) out << ",";
    out << (int) ret.ngram_length << ":" << TransformLMScore(ret.prob);
    if (idx == 0) out << ":unk";
    std::swap(state0, state1);
  }
  out << ")| ";
}

LanguageModel *ConstructHybKenLM(const std::string &line)
{
  FactorType factorType = 0;
  string filePath, filePathM = "";
  bool lazy = false;

  vector<string> toks = Tokenize(line);
  for (size_t i = 1; i < toks.size(); ++i) {
    vector<string> args = Tokenize(toks[i], "=");
    UTIL_THROW_IF2(args.size() != 2,
    		"Incorrect format of KenLM property: " << toks[i]);

    if (args[0] == "factor") {
      factorType = Scan<FactorType>(args[1]);
    } else if (args[0] == "order") {
      //nGramOrder = Scan<size_t>(args[1]);
    } else if (args[0] == "path") {
      filePath = args[1];
    } else if (args[0] == "pathM") {
      filePathM = args[1];
    } else if (args[0] == "lazyken") {
      lazy = Scan<bool>(args[1]);
    } else if (args[0] == "name") {
      // that's ok. do nothing, passes onto LM constructor
    }
  }

  return ConstructHybKenLM(line, filePath, filePathM, factorType, lazy);
}

LanguageModel *ConstructHybKenLM(const std::string &line, const std::string &file, const std::string &fileM, FactorType factorType, bool lazy)
{
    lm::ngram::ModelType model_type;
    if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {

      switch(model_type) {
      case lm::ngram::PROBING:
        return new ConstructHybKenLM<lm::ngram::ProbingModel>(line, file, fileM, factorType, lazy);
      case lm::ngram::REST_PROBING:
        return new ConstructHybKenLM<lm::ngram::RestProbingModel>(line, file, fileM, factorType, lazy);
      case lm::ngram::TRIE:
        return new ConstructHybKenLM<lm::ngram::TrieModel>(line, file, fileM, factorType, lazy);
      case lm::ngram::QUANT_TRIE:
        return new ConstructHybKenLM<lm::ngram::QuantTrieModel>(line, file, fileM, factorType, lazy);
      case lm::ngram::ARRAY_TRIE:
        return new ConstructHybKenLM<lm::ngram::ArrayTrieModel>(line, file, fileM, factorType, lazy);
      case lm::ngram::QUANT_ARRAY_TRIE:
        return new ConstructHybKenLM<lm::ngram::QuantArrayTrieModel>(line, file, fileM, factorType, lazy);
      default:
    	UTIL_THROW2("Unrecognized kenlm model type " << model_type);
      }
    } else {
      return new ConstructHybKenLM<lm::ngram::ProbingModel>(line, file, fileM, factorType, lazy);
    }
}

void LoadMapping(const std::string &f, map<std::string, std::string>& m)
{
	if ("" == f) return;

	m.clear();

	std::ifstream inpf;
	inpf.open(f.c_str(), std::ios::in | std::ios::binary);

	if (!inpf.is_open())
	{
		return;
	}

	std::string line;
	while (!inpf.eof())
	{
		std::getline(inpf, line);

		if ("" == line) continue;
		
		std::vector<std::string> toks = Tokenize(line);
		if (toks.size() == 2)
		{
			m.insert(std::make_pair(toks[0], toks[1]));
		}
	}

	inpf.close();
}

}

// $Id$

/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#ifndef moses_LanguageModelHybKen_h
#define moses_LanguageModelHybKen_h

//#include <string>
//#include <boost/shared_ptr.hpp>

//#include "lm/word_index.hh"

//#include "moses/LM/Base.h"
//#include "moses/Hypothesis.h"
//#include "moses/TypeDef.h"
//#include "moses/Word.h"

#include "moses/LM/Ken.h"

namespace Moses
{

LanguageModel *ConstructHybKenLM(const std::string &line);

//! This will also load. Returns a templated KenLM class
LanguageModel *ConstructHybKenLM(const std::string &line, const std::string &file, const std::string &fileM, FactorType factorType, bool lazy);

void LoadMapping(const std::string &f, std::map<std::string, std::string>& m);

/*
 * An implementation of single factor LM using Kenneth's code.
 */
template <class Model> class LanguageModelHybKen : public LanguageModelKen<Model>
{
public:
  LanguageModelHybKen(const std::string &line, const std::string &file, const std::string &fileM, FactorType factorType, bool lazy);

  //virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const;

  virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;

  virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;

  virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;

  //virtual void IncrementalCallback(Incremental::Manager &manager) const;
  virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const;

  //virtual bool IsUseable(const FactorMask &mask) const;

protected:

  // These lines are required to make the parent class's protected members visible to this class
  using LanguageModelKen<Model>::m_ngram;
  using LanguageModelKen<Model>::m_beginSentenceFactor;
  using LanguageModelKen<Model>::m_factorType;
  using LanguageModelKen<Model>::TranslateID;

private:
  LanguageModelHybKen(const LanguageModelHybKen<Model> &copy_from);

  //std::vector<lm::WordIndex> m_lmIdLookup;

  Word GetTag(const Word& word);

public:

  std::map<std::string, std::string> m_mapW2P;

};

} // namespace Moses

#endif

_______________________________________________
Moses-support mailing list
[email protected]
http://mailman.mit.edu/mailman/listinfo/moses-support

[Moses-support] Add a new LM feature in Moses

Reply via email to