On 4/4/2010 23:55, Giuseppe Attardi wrote:
Has anyone developed a tool for querying a phrase table directly?

-- Beppe

I modified the code submitted by Felipe Sanchez to get it to work with the latest release of Moses. Unfortunatley it only works with some tables, while on larger ones (gzip of 2.8GB) it crashes freeing memory,
apparently in the call to FileExists(), just at the beginning.

Can please someone try it on a different installation to check if the problem persists?

The program can be called like this:

    pmoses --table tablefile word1 ... wordn

The first time it gets called tableFile is converted to binary format.

Thank you

-- Beppe

/*
 * Copyright (C) 2009 Felipe Sánchez-Martínez
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 */

#include <string>
#include <vector>

#include "TypeDef.h"
#include "PhraseDictionaryTreeAdaptor.h"
#include "Phrase.h"
#include "TargetPhraseCollection.h"
#include "LMList.h"
#include "ScoreComponentCollection.h"

using namespace std;
using namespace Moses;

//Delete white spaces from the end and the begining of the string
string trim(string str) {
  string::iterator it;
  
  while ((str.length()>0)&&((*(it=str.begin()))==' ')) {
    str.erase(it);
  }
           
  while ((str.length()>0)&&((*(it=(str.end()-1)))==' ')) {
    str.erase(it);
  }
                    
  for (unsigned i=0; i<str.length(); i++) {
    if ((str[i]==' ') && ((i+1)<str.length()) && (str[i+1]==' ')) {
      str=str.erase(i,1);
      i--;
    }
  }
  return str;
}

int main (int argc, char *argv[]) {
  vector<FactorType> input, output;
  vector<float> weight;
  size_t numScoreComponent=5;
  unsigned numInputScores=0;
  int tableLimit=0;
  int weightWP=0;
  LMList lmList;
  string filePath="/path/to/phrase-table";

  string source_str="";
  for (unsigned i=1; i<argc; i++) {
    if (!strcmp(argv[i],"--table")) {
      if (++i >= argc) {
        cerr << "Error: Missing argument to --table" << endl;
        exit(1);
      } else
        filePath = argv[i];
    } else if (!strcmp(argv[i],"--help")) {
      cerr << "Usage: " <<  argv[0] << " [options] phrase" << endl
           << "   --help\tprint this message" << endl
           << "   --table path\tpath to phrase table" << endl;
      exit(2);
    } else {
      if (source_str.length()>0)
        source_str += " ";
      source_str += argv[i];
    }
  }

  cerr<<"numScoreComponent: "<<numScoreComponent<<endl;
  cerr<<"numInputScores: "<<numInputScores<<endl;

  cerr<<"Table limit: "<<tableLimit<<endl;
  cerr<<"WeightWordPenalty: "<<weightWP<<endl;
  cerr<<"Source phrase: \""<<source_str<<"\""<<endl;

  input.push_back(0);
  output.push_back(0);
  
  weight.push_back(0);
  weight.push_back(0);
  weight.push_back(0);
  weight.push_back(0);
  weight.push_back(0);                          

  const PhraseDictionaryFeature* feature = new 
PhraseDictionaryFeature(numScoreComponent, numInputScores, input, output, 
filePath, weight, tableLimit);
  PhraseDictionaryTreeAdaptor *pd=new 
PhraseDictionaryTreeAdaptor(numScoreComponent, numInputScores, feature);
  
  if (!pd->Load(input, output, filePath, weight, tableLimit, lmList, weightWP)) 
{
    delete pd;
    return false;
  }
                                
  cerr<<"-------------------------------------------------"<<endl;
  FactorDirection direction;
  Phrase phrase(direction);
  phrase.CreateFromString(input, source_str, "|");
  TargetPhraseCollection *tpc = (TargetPhraseCollection*) 
pd->GetTargetPhraseCollection(phrase);

  if (tpc) {
    TargetPhraseCollection::iterator iterTargetPhrase;
    for (iterTargetPhrase = tpc->begin(); iterTargetPhrase != tpc->end();  
++iterTargetPhrase) {
      //cerr<<(*(*iterTargetPhrase))<<endl;
    
      stringstream strs;
      strs<<static_cast<const Phrase&>(*(*iterTargetPhrase));   
      cerr<<source_str<<" => \""<< trim(strs.str()) <<"\" ";
      ScoreComponentCollection scc = (*iterTargetPhrase)->GetScoreBreakdown();
      cerr<<"Scores: ";
      for (unsigned i=0; i<scc.size(); i++)
        cerr<<scc[i]<<" ";
      cerr<<endl;
    }
  }
  cerr<<"-------------------------------------------------"<<endl;
}

_______________________________________________
Moses-support mailing list
[email protected]
http://mailman.mit.edu/mailman/listinfo/moses-support

Reply via email to