Hi,
I would like to add a new simple LM named HybLanguageModelKen (HybKen.h and
HybKen.cpp) which will inherit from LanguageModelKen.
In Factory.cpp, I added as follows:
...
//#include "moses/LM/Ken.h"
#include "moses/LM/HybKen.h"
...
class KenFactory : public FeatureFactory
{
public:
void Create(const std::string &line) {
DefaultSetup(ConstructKenLM(line));
}
};
class HybKenFactory : public FeatureFactory
{
public:
void Create(const std::string &line) {
DefaultSetup(ConstructHybKenLM(line));
}
};
...
Add("KENLM", new KenFactory());
Add("HKENLM", new HybKenFactory());
...
I've created HybKen.h as follows:
#ifndef moses_LanguageModelHybKen_h
#define moses_LanguageModelHybKen_h
//#include <string>
//#include <boost/shared_ptr.hpp>
//#include "lm/word_index.hh"
//#include "moses/LM/Base.h"
//#include "moses/Hypothesis.h"
//#include "moses/TypeDef.h"
//#include "moses/Word.h"
#include "moses/LM/Ken.h"
namespace Moses
{
LanguageModel *ConstructHybKenLM(const std::string &line);
//! This will also load. Returns a templated KenLM class
LanguageModel *ConstructHybKenLM(const std::string &line, const std::string
&file, const std::string &fileM, FactorType factorType, bool lazy);
void LoadMapping(const std::string &f, std::map<std::string, std::string>&
m);
/*
* An implementation of single factor LM using Kenneth's code.
*/
template <class Model> class LanguageModelHybKen : public
LanguageModelKen<Model>
{
...
Factory.cpp, HybKen.h and HybKen.cpp are attached for your reference.
But I always got the compilation error message: "*moses/FF/Factory.cpp:166:
error: undefined reference to 'Moses::ConstructHybKenLM(std::string const&)*
'".
I understand that Moses::ConstructHybKenLM(std::string const&) is already
defined in Moses namespace.
May I ask for your help?
Thank you!
--
Cheers,
Vu
#include "moses/FF/Factory.h"
#include "moses/StaticData.h"
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryScope3.h"
#include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
#include "moses/FF/LexicalReordering/LexicalReordering.h"
#include "moses/FF/BleuScoreFeature.h"
#include "moses/FF/TargetWordInsertionFeature.h"
#include "moses/FF/SourceWordDeletionFeature.h"
#include "moses/FF/GlobalLexicalModel.h"
#include "moses/FF/GlobalLexicalModelUnlimited.h"
#include "moses/FF/UnknownWordPenaltyProducer.h"
#include "moses/FF/WordTranslationFeature.h"
#include "moses/FF/TargetBigramFeature.h"
#include "moses/FF/TargetNgramFeature.h"
#include "moses/FF/PhraseBoundaryFeature.h"
#include "moses/FF/PhrasePairFeature.h"
#include "moses/FF/PhraseLengthFeature.h"
#include "moses/FF/DistortionScoreProducer.h"
#include "moses/FF/SparseHieroReorderingFeature.h"
#include "moses/FF/WordPenaltyProducer.h"
#include "moses/FF/InputFeature.h"
#include "moses/FF/PhrasePenalty.h"
#include "moses/FF/OSM-Feature/OpSequenceModel.h"
#include "moses/FF/ControlRecombination.h"
#include "moses/FF/ExternalFeature.h"
#include "moses/FF/ConstrainedDecoding.h"
#include "moses/FF/CoveredReferenceFeature.h"
#include "moses/FF/TreeStructureFeature.h"
#include "moses/FF/SoftMatchingFeature.h"
#include "moses/FF/SourceGHKMTreeInputMatchFeature.h"
#include "moses/FF/HyperParameterAsWeight.h"
#include "moses/FF/SetSourcePhrase.h"
#include "CountNonTerms.h"
#include "ReferenceComparison.h"
#include "RuleScope.h"
#include "MaxSpanFreeNonTermSource.h"
#include "NieceTerminal.h"
#include "SpanLength.h"
#include "SyntaxRHS.h"
#include "SkeletonChangeInput.h"
#include "moses/FF/SkeletonStatelessFF.h"
#include "moses/FF/SkeletonStatefulFF.h"
#include "moses/LM/SkeletonLM.h"
#include "moses/TranslationModel/SkeletonPT.h"
#ifdef HAVE_CMPH
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
#endif
#ifdef PT_UG
#include "moses/TranslationModel/UG/mmsapt.h"
#endif
#ifdef HAVE_PROBINGPT
#include "moses/TranslationModel/ProbingPT/ProbingPT.h"
#endif
//#include "moses/LM/Ken.h"
#include "moses/LM/HybKen.h"
#ifdef LM_IRST
#include "moses/LM/IRST.h"
#endif
#ifdef LM_SRI
#include "moses/LM/SRI.h"
#endif
#ifdef LM_MAXENT_SRI
#include "moses/LM/MaxEntSRI.h"
#endif
#ifdef LM_RAND
#include "moses/LM/Rand.h"
#endif
#ifdef HAVE_SYNLM
#include "moses/SyntacticLanguageModel.h"
#endif
#ifdef LM_NEURAL
#include "moses/LM/NeuralLMWrapper.h"
#endif
#ifdef LM_DALM
#include "moses/LM/DALMWrapper.h"
#endif
#ifdef LM_LBL
#include "moses/LM/oxlm/LBLLM.h"
#endif
#include "ExampleSLFF.h"
#include "ExampleSFFF.h"
#include "util/exception.hh"
#include <vector>
namespace Moses
{
class FeatureFactory
{
public:
virtual ~FeatureFactory() {}
virtual void Create(const std::string &line) = 0;
protected:
template <class F> static void DefaultSetup(F *feature);
FeatureFactory() {}
};
template <class F> void FeatureFactory::DefaultSetup(F *feature)
{
StaticData &static_data = StaticData::InstanceNonConst();
const string &featureName = feature->GetScoreProducerDescription();
std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);
if (feature->IsTuneable() || weights.size()) {
// if it's tuneable, ini file MUST have weights
// even it it's not tuneable, people can still set the weights in the ini file
static_data.SetWeights(feature, weights);
} else if (feature->GetNumScoreComponents() > 0) {
std::vector<float> defaultWeights = feature->DefaultWeights();
static_data.SetWeights(feature, defaultWeights);
}
}
namespace
{
template <class F> class DefaultFeatureFactory : public FeatureFactory
{
public:
void Create(const std::string &line) {
DefaultSetup(new F(line));
}
};
class KenFactory : public FeatureFactory
{
public:
void Create(const std::string &line) {
DefaultSetup(ConstructKenLM(line));
}
};
class HybKenFactory : public FeatureFactory
{
public:
void Create(const std::string &line) {
DefaultSetup(ConstructHybKenLM(line));
}
};
} // namespace
FeatureRegistry::FeatureRegistry()
{
// Feature with same name as class
#define MOSES_FNAME(name) Add(#name, new DefaultFeatureFactory< name >());
// Feature with different name than class.
#define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >());
MOSES_FNAME2("PhraseDictionaryBinary", PhraseDictionaryTreeAdaptor);
MOSES_FNAME(PhraseDictionaryOnDisk);
MOSES_FNAME(PhraseDictionaryMemory);
MOSES_FNAME(PhraseDictionaryScope3);
MOSES_FNAME(PhraseDictionaryMultiModel);
MOSES_FNAME(PhraseDictionaryMultiModelCounts);
MOSES_FNAME(PhraseDictionaryALSuffixArray);
MOSES_FNAME(PhraseDictionaryDynSuffixArray);
MOSES_FNAME(PhraseDictionaryTransliteration);
MOSES_FNAME(PhraseDictionaryFuzzyMatch);
MOSES_FNAME(GlobalLexicalModel);
//MOSES_FNAME(GlobalLexicalModelUnlimited); This was commented out in the original
MOSES_FNAME(SourceWordDeletionFeature);
MOSES_FNAME(TargetWordInsertionFeature);
MOSES_FNAME(PhraseBoundaryFeature);
MOSES_FNAME(PhraseLengthFeature);
MOSES_FNAME(WordTranslationFeature);
MOSES_FNAME(TargetBigramFeature);
MOSES_FNAME(TargetNgramFeature);
MOSES_FNAME(PhrasePairFeature);
MOSES_FNAME(LexicalReordering);
MOSES_FNAME2("Generation", GenerationDictionary);
MOSES_FNAME(BleuScoreFeature);
MOSES_FNAME2("Distortion", DistortionScoreProducer);
MOSES_FNAME2("WordPenalty", WordPenaltyProducer);
MOSES_FNAME(InputFeature);
MOSES_FNAME(OpSequenceModel);
MOSES_FNAME(PhrasePenalty);
MOSES_FNAME2("UnknownWordPenalty", UnknownWordPenaltyProducer);
MOSES_FNAME(ControlRecombination);
MOSES_FNAME(ConstrainedDecoding);
MOSES_FNAME(CoveredReferenceFeature);
MOSES_FNAME(ExternalFeature);
MOSES_FNAME(SourceGHKMTreeInputMatchFeature);
MOSES_FNAME(TreeStructureFeature);
MOSES_FNAME(SoftMatchingFeature);
MOSES_FNAME(HyperParameterAsWeight);
MOSES_FNAME(SetSourcePhrase);
MOSES_FNAME(CountNonTerms);
MOSES_FNAME(ReferenceComparison);
MOSES_FNAME(RuleScope);
MOSES_FNAME(MaxSpanFreeNonTermSource);
MOSES_FNAME(NieceTerminal);
MOSES_FNAME(SparseHieroReorderingFeature);
MOSES_FNAME(SpanLength);
MOSES_FNAME(SyntaxRHS);
MOSES_FNAME(SkeletonChangeInput);
MOSES_FNAME(SkeletonStatelessFF);
MOSES_FNAME(SkeletonStatefulFF);
MOSES_FNAME(SkeletonLM);
MOSES_FNAME(SkeletonPT);
MOSES_FNAME2("ExampleSLFF", ExampleSLFF);//stateless feature function example
MOSES_FNAME2("ExampleSFFF", ExampleSFFF);//stateful feature function example
#ifdef HAVE_CMPH
MOSES_FNAME(PhraseDictionaryCompact);
#endif
#ifdef PT_UG
MOSES_FNAME(Mmsapt);
MOSES_FNAME2("PhraseDictionaryBitextSampling",Mmsapt); // that's an alias for Mmsapt!
#endif
#ifdef HAVE_PROBINGPT
MOSES_FNAME(ProbingPT);
#endif
#ifdef HAVE_SYNLM
MOSES_FNAME(SyntacticLanguageModel);
#endif
#ifdef LM_IRST
MOSES_FNAME2("IRSTLM", LanguageModelIRST);
#endif
#ifdef LM_SRI
MOSES_FNAME2("SRILM", LanguageModelSRI);
#endif
#ifdef LM_MAXENT_SRI
MOSES_FNAME2("MaxEntLM", LanguageModelMaxEntSRI);
#endif
#ifdef LM_RAND
MOSES_FNAME2("RANDLM", LanguageModelRandLM);
#endif
#ifdef LM_NEURAL
MOSES_FNAME2("NeuralLM", NeuralLMWrapper);
#endif
#ifdef LM_DALM
MOSES_FNAME2("DALM", LanguageModelDALM);
#endif
#ifdef LM_LBL
MOSES_FNAME2("LBLLM-LM", LBLLM<oxlm::LM>);
MOSES_FNAME2("LBLLM-FactoredLM", LBLLM<oxlm::FactoredLM>);
MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM<oxlm::FactoredMaxentLM>);
#endif
Add("KENLM", new KenFactory());
Add("HKENLM", new HybKenFactory());
}
FeatureRegistry::~FeatureRegistry()
{
}
void FeatureRegistry::Add(const std::string &name, FeatureFactory *factory)
{
std::pair<std::string, boost::shared_ptr<FeatureFactory> > to_ins(name, boost::shared_ptr<FeatureFactory>(factory));
UTIL_THROW_IF2(!registry_.insert(to_ins).second, "Duplicate feature name " << name);
}
namespace
{
class UnknownFeatureException : public util::Exception {};
}
void FeatureRegistry::Construct(const std::string &name, const std::string &line)
{
Map::iterator i = registry_.find(name);
UTIL_THROW_IF(i == registry_.end(), UnknownFeatureException, "Feature name " << name << " is not registered.");
i->second->Create(line);
}
void FeatureRegistry::PrintFF() const
{
vector<string> ffs;
std::cerr << "Available feature functions:" << std::endl;
Map::const_iterator iter;
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
const string &ffName = iter->first;
ffs.push_back(ffName);
}
vector<string>::const_iterator iterVec;
std::sort(ffs.begin(), ffs.end());
for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
const string &ffName = *iterVec;
std::cerr << ffName << " ";
}
std::cerr << std::endl;
}
} // namespace Moses
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <cstring>
#include <iostream>
#include <memory>
#include <stdlib.h>
#include <boost/shared_ptr.hpp>
#include "lm/binary_format.hh"
#include "lm/enumerate_vocab.hh"
#include "lm/left.hh"
#include "lm/model.hh"
#include "util/exception.hh"
#include "HybKen.h"
#include "Base.h"
#include "moses/FF/FFState.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
#include "moses/FactorCollection.h"
#include "moses/Phrase.h"
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "moses/ChartHypothesis.h"
#include "moses/Incremental.h"
#include "moses/UserMessage.h"
using namespace std;
namespace Moses
{
template <class Model> LanguageModelHybKen<Model>::LanguageModelHybKen(const std::string &line, const std::string &file, const std::string& fileM, FactorType factorType, bool lazy)
:LanguageModelKen(line)
,m_factorType(factorType)
{
lm::ngram::Config config;
IFVERBOSE(1) {
config.messages = &std::cerr;
}
else {
config.messages = NULL;
}
FactorCollection &collection = FactorCollection::Instance();
MappingBuilder builder(collection, m_lmIdLookup);
config.enumerate_vocab = &builder;
config.load_method = lazy ? util::LAZY : util::POPULATE_OR_READ;
m_ngram.reset(new Model(file.c_str(), config));
m_beginSentenceFactor = collection.AddFactor(BOS_);
//load mapping file (fileMapping)
LoadMapping(fileM, m_mapW2P);
}
template <class Model> LanguageModelHybKen<Model>::LanguageModelHybKen(const LanguageModelHybKen<Model> ©_from)
:LanguageModel(copy_from.GetArgLine()),
m_ngram(copy_from.m_ngram),
// TODO: don't copy this.
m_lmIdLookup(copy_from.m_lmIdLookup),
m_factorType(copy_from.m_factorType),
m_beginSentenceFactor(copy_from.m_beginSentenceFactor),
m_mapW2P(copy_from.m_mapW2P)
{
}
Word LanguageModelHybKen<Model>::GetTag(const Word& word)
{
std::map<string, string>::iterator iter;
if ((iter = m_mapW2P.find((std::string)word)) != m_mapW2P.end())//found
return (Word)iter->second;
return word;//otherwise
}
template <class Model> void LanguageModelHybKen<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
{
fullScore = 0;
ngramScore = 0;
oovCount = 0;
if (!phrase.GetSize()) return;
lm::ngram::ChartState discarded_sadly;
lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);
size_t position;
if (m_beginSentenceFactor == GetTag(phrase.GetWord(0)).GetFactor(m_factorType)) {
scorer.BeginSentence();
position = 1;
} else {
position = 0;
}
size_t ngramBoundary = m_ngram->Order() - 1;
size_t end_loop = std::min(ngramBoundary, phrase.GetSize());
for (; position < end_loop; ++position) {
const Word &word = GetTag(phrase.GetWord(position));
if (word.IsNonTerminal()) {
fullScore += scorer.Finish();
scorer.Reset();
} else {
lm::WordIndex index = TranslateID(word);
scorer.Terminal(index);
if (!index) ++oovCount;
}
}
float before_boundary = fullScore + scorer.Finish();
for (; position < phrase.GetSize(); ++position) {
const Word &word = GetTag(phrase.GetWord(position));
if (word.IsNonTerminal()) {
fullScore += scorer.Finish();
scorer.Reset();
} else {
lm::WordIndex index = TranslateID(word);
scorer.Terminal(index);
if (!index) ++oovCount;
}
}
fullScore += scorer.Finish();
ngramScore = TransformLMScore(fullScore - before_boundary);
fullScore = TransformLMScore(fullScore);
}
template <class Model> FFState *LanguageModelHybKen<Model>::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
{
const lm::ngram::State &in_state = static_cast<const KenLMState&>(*ps).state;
std::auto_ptr<KenLMState> ret(new KenLMState());
if (!hypo.GetCurrTargetLength()) {
ret->state = in_state;
return ret.release();
}
const std::size_t begin = hypo.GetCurrTargetWordsRange().GetStartPos();
//[begin, end) in STL-like fashion.
const std::size_t end = hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
const std::size_t adjust_end = std::min(end, begin + m_ngram->Order() - 1);
std::size_t position = begin;
typename Model::State aux_state;
typename Model::State *state0 = &ret->state, *state1 = &aux_state;
float score = m_ngram->Score(in_state, TranslateID(GetTag(hypo.GetWord(position))), *state0);
++position;
for (; position < adjust_end; ++position) {
score += m_ngram->Score(*state0, TranslateID(GetTag(hypo.GetWord(position))), *state1);
std::swap(state0, state1);
}
if (hypo.IsSourceCompleted()) {
// Score end of sentence.
std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
const lm::WordIndex *last = LastIDs(hypo, &indices.front());
score += m_ngram->FullScoreForgotState(&indices.front(), last, m_ngram->GetVocabulary().EndSentence(), ret->state).prob;
} else if (adjust_end < end) {
// Get state after adding a long phrase.
std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
const lm::WordIndex *last = LastIDs(hypo, &indices.front());
m_ngram->GetState(&indices.front(), last, ret->state);
} else if (state0 != &ret->state) {
// Short enough phrase that we can just reuse the state.
ret->state = *state0;
}
score = TransformLMScore(score);
if (OOVFeatureEnabled()) {
std::vector<float> scores(2);
scores[0] = score;
scores[1] = 0.0;
out->PlusEquals(this, scores);
} else {
out->PlusEquals(this, score);
}
return ret.release();
}
template <class Model> FFState *LanguageModelHybKen<Model>::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const
{
LanguageModelChartStateKenLM *newState = new LanguageModelChartStateKenLM();
lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState->GetChartState());
const TargetPhrase &target = hypo.GetCurrTargetPhrase();
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
target.GetAlignNonTerm().GetNonTermIndexMap();
const size_t size = hypo.GetCurrTargetPhrase().GetSize();
size_t phrasePos = 0;
// Special cases for first word.
if (size) {
const Word &word = GetTag(hypo.GetCurrTargetPhrase().GetWord(0));
if (word.GetFactor(m_factorType) == m_beginSentenceFactor) {
// Begin of sentence
ruleScore.BeginSentence();
phrasePos++;
} else if (word.IsNonTerminal()) {
// Non-terminal is first so we can copy instead of rescoring.
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetFFState(featureID))->GetChartState();
float prob = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
ruleScore.BeginNonTerminal(prevState, prob);
phrasePos++;
}
}
for (; phrasePos < size; phrasePos++) {
const Word &word = GetTag(hypo.GetCurrTargetPhrase().GetWord(phrasePos));
if (word.IsNonTerminal()) {
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetFFState(featureID))->GetChartState();
float prob = UntransformLMScore(prevHypo->GetScoreBreakdown().GetScoresForProducer(this)[0]);
ruleScore.NonTerminal(prevState, prob);
} else {
ruleScore.Terminal(TranslateID(word));
}
}
float score = ruleScore.Finish();
score = TransformLMScore(score);
accumulator->Assign(this, score);
return newState;
}
template <class Model> void LanguageModelHybKen<Model>::ReportHistoryOrder(std::ostream &out, const Phrase &phrase) const
{
out << "|lm=(";
if (!phrase.GetSize()) return;
typename Model::State aux_state;
typename Model::State start_of_sentence_state = m_ngram->BeginSentenceState();
typename Model::State *state0 = &start_of_sentence_state;
typename Model::State *state1 = &aux_state;
for (std::size_t position=0; position<phrase.GetSize(); position++) {
const lm::WordIndex idx = TranslateID(GetTag(phrase.GetWord(position)));
lm::FullScoreReturn ret(m_ngram->FullScore(*state0, idx, *state1));
if (position) out << ",";
out << (int) ret.ngram_length << ":" << TransformLMScore(ret.prob);
if (idx == 0) out << ":unk";
std::swap(state0, state1);
}
out << ")| ";
}
LanguageModel *ConstructHybKenLM(const std::string &line)
{
FactorType factorType = 0;
string filePath, filePathM = "";
bool lazy = false;
vector<string> toks = Tokenize(line);
for (size_t i = 1; i < toks.size(); ++i) {
vector<string> args = Tokenize(toks[i], "=");
UTIL_THROW_IF2(args.size() != 2,
"Incorrect format of KenLM property: " << toks[i]);
if (args[0] == "factor") {
factorType = Scan<FactorType>(args[1]);
} else if (args[0] == "order") {
//nGramOrder = Scan<size_t>(args[1]);
} else if (args[0] == "path") {
filePath = args[1];
} else if (args[0] == "pathM") {
filePathM = args[1];
} else if (args[0] == "lazyken") {
lazy = Scan<bool>(args[1]);
} else if (args[0] == "name") {
// that's ok. do nothing, passes onto LM constructor
}
}
return ConstructHybKenLM(line, filePath, filePathM, factorType, lazy);
}
LanguageModel *ConstructHybKenLM(const std::string &line, const std::string &file, const std::string &fileM, FactorType factorType, bool lazy)
{
lm::ngram::ModelType model_type;
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
switch(model_type) {
case lm::ngram::PROBING:
return new ConstructHybKenLM<lm::ngram::ProbingModel>(line, file, fileM, factorType, lazy);
case lm::ngram::REST_PROBING:
return new ConstructHybKenLM<lm::ngram::RestProbingModel>(line, file, fileM, factorType, lazy);
case lm::ngram::TRIE:
return new ConstructHybKenLM<lm::ngram::TrieModel>(line, file, fileM, factorType, lazy);
case lm::ngram::QUANT_TRIE:
return new ConstructHybKenLM<lm::ngram::QuantTrieModel>(line, file, fileM, factorType, lazy);
case lm::ngram::ARRAY_TRIE:
return new ConstructHybKenLM<lm::ngram::ArrayTrieModel>(line, file, fileM, factorType, lazy);
case lm::ngram::QUANT_ARRAY_TRIE:
return new ConstructHybKenLM<lm::ngram::QuantArrayTrieModel>(line, file, fileM, factorType, lazy);
default:
UTIL_THROW2("Unrecognized kenlm model type " << model_type);
}
} else {
return new ConstructHybKenLM<lm::ngram::ProbingModel>(line, file, fileM, factorType, lazy);
}
}
void LoadMapping(const std::string &f, map<std::string, std::string>& m)
{
if ("" == f) return;
m.clear();
std::ifstream inpf;
inpf.open(f.c_str(), std::ios::in | std::ios::binary);
if (!inpf.is_open())
{
return;
}
std::string line;
while (!inpf.eof())
{
std::getline(inpf, line);
if ("" == line) continue;
std::vector<std::string> toks = Tokenize(line);
if (toks.size() == 2)
{
m.insert(std::make_pair(toks[0], toks[1]));
}
}
inpf.close();
}
}
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifndef moses_LanguageModelHybKen_h
#define moses_LanguageModelHybKen_h
//#include <string>
//#include <boost/shared_ptr.hpp>
//#include "lm/word_index.hh"
//#include "moses/LM/Base.h"
//#include "moses/Hypothesis.h"
//#include "moses/TypeDef.h"
//#include "moses/Word.h"
#include "moses/LM/Ken.h"
namespace Moses
{
LanguageModel *ConstructHybKenLM(const std::string &line);
//! This will also load. Returns a templated KenLM class
LanguageModel *ConstructHybKenLM(const std::string &line, const std::string &file, const std::string &fileM, FactorType factorType, bool lazy);
void LoadMapping(const std::string &f, std::map<std::string, std::string>& m);
/*
* An implementation of single factor LM using Kenneth's code.
*/
template <class Model> class LanguageModelHybKen : public LanguageModelKen<Model>
{
public:
LanguageModelHybKen(const std::string &line, const std::string &file, const std::string &fileM, FactorType factorType, bool lazy);
//virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
//virtual void IncrementalCallback(Incremental::Manager &manager) const;
virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const;
//virtual bool IsUseable(const FactorMask &mask) const;
protected:
// These lines are required to make the parent class's protected members visible to this class
using LanguageModelKen<Model>::m_ngram;
using LanguageModelKen<Model>::m_beginSentenceFactor;
using LanguageModelKen<Model>::m_factorType;
using LanguageModelKen<Model>::TranslateID;
private:
LanguageModelHybKen(const LanguageModelHybKen<Model> ©_from);
//std::vector<lm::WordIndex> m_lmIdLookup;
Word GetTag(const Word& word);
public:
std::map<std::string, std::string> m_mapW2P;
};
} // namespace Moses
#endif
_______________________________________________
Moses-support mailing list
[email protected]
http://mailman.mit.edu/mailman/listinfo/moses-support