00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "Decoder.h"
00021 #include "moses/Manager.h"
00022 #include "moses/ChartManager.h"
00023 #include "moses/Sentence.h"
00024 #include "moses/InputType.h"
00025 #include "moses/Phrase.h"
00026 #include "moses/TrellisPathList.h"
00027 #include "moses/ChartTrellisPathList.h"
00028 #include "moses/ChartTrellisPath.h"
00029
00030 using namespace std;
00031 using namespace Moses;
00032
00033
00034 namespace Mira
00035 {
00036
00040 static char* strToChar(const string& s)
00041 {
00042 char* c = new char[s.size()+1];
00043 strcpy(c,s.c_str());
00044 return c;
00045 }
00046
00047 MosesDecoder::MosesDecoder(const string& inifile, int debuglevel, int argc, vector<string> decoder_params)
00048 : m_manager(NULL)
00049 {
00050 static int BASE_ARGC = 8;
00051 Parameter* params = new Parameter();
00052 char ** mosesargv = new char*[BASE_ARGC + argc];
00053 mosesargv[0] = strToChar("-f");
00054 mosesargv[1] = strToChar(inifile);
00055 mosesargv[2] = strToChar("-v");
00056 stringstream dbgin;
00057 dbgin << debuglevel;
00058 mosesargv[3] = strToChar(dbgin.str());
00059 mosesargv[4] = strToChar("-use-persistent-cache");
00060 mosesargv[5] = strToChar("0");
00061 mosesargv[6] = strToChar("-persistent-cache-size");
00062 mosesargv[7] = strToChar("0");
00063
00064 for (int i = 0; i < argc; ++i) {
00065 char *cstr = &(decoder_params[i])[0];
00066 mosesargv[BASE_ARGC + i] = cstr;
00067 }
00068
00069 if (!params->LoadParam(BASE_ARGC + argc,mosesargv)) {
00070 cerr << "Loading static data failed, exit." << endl;
00071 exit(1);
00072 }
00073 StaticData::LoadDataStatic(params, "mira");
00074 for (int i = 0; i < BASE_ARGC; ++i) {
00075 delete[] mosesargv[i];
00076 }
00077 delete[] mosesargv;
00078
00079
00080 assert(false);
00081 }
00082
00083 void MosesDecoder::cleanup(bool chartDecoding)
00084 {
00085 delete m_manager;
00086 if (chartDecoding)
00087 delete m_chartManager;
00088 else
00089 delete m_sentence;
00090 }
00091
00092 vector< vector<const Word*> > MosesDecoder::getNBest(const std::string& source,
00093 size_t sentenceid,
00094 size_t nBestSize,
00095 float bleuObjectiveWeight,
00096 float bleuScoreWeight,
00097 vector< ScoreComponentCollection>& featureValues,
00098 vector< float>& bleuScores,
00099 vector< float>& modelScores,
00100 size_t numReturnedTranslations,
00101 bool realBleu,
00102 bool distinct,
00103 bool avgRefLength,
00104 size_t rank,
00105 size_t epoch,
00106 string filename)
00107 {
00108 StaticData &staticData = StaticData::InstanceNonConst();
00109 bool chartDecoding = staticData.IsChart();
00110 initialize(staticData, source, sentenceid, bleuObjectiveWeight, bleuScoreWeight, avgRefLength, chartDecoding);
00111
00112
00113 if (chartDecoding) {
00114 return runChartDecoder(source, sentenceid, nBestSize, bleuObjectiveWeight, bleuScoreWeight,
00115 featureValues, bleuScores, modelScores, numReturnedTranslations, realBleu, distinct, rank, epoch);
00116 } else {
00117 SearchAlgorithm search = staticData.GetSearchAlgorithm();
00118 return runDecoder(source, sentenceid, nBestSize, bleuObjectiveWeight, bleuScoreWeight,
00119 featureValues, bleuScores, modelScores, numReturnedTranslations, realBleu, distinct, rank, epoch,
00120 search, filename);
00121 }
00122 }
00123
00124 vector< vector<const Word*> > MosesDecoder::runDecoder(const std::string& source,
00125 size_t sentenceid,
00126 size_t nBestSize,
00127 float bleuObjectiveWeight,
00128 float bleuScoreWeight,
00129 vector< ScoreComponentCollection>& featureValues,
00130 vector< float>& bleuScores,
00131 vector< float>& modelScores,
00132 size_t numReturnedTranslations,
00133 bool realBleu,
00134 bool distinct,
00135 size_t rank,
00136 size_t epoch,
00137 SearchAlgorithm& search,
00138 string filename)
00139 {
00140
00141 m_manager = new Moses::Manager(0,*m_sentence, search);
00142 m_manager->ProcessSentence();
00143 TrellisPathList nBestList;
00144 m_manager->CalcNBest(nBestSize, nBestList, distinct);
00145
00146
00147 if (filename != "") {
00148 ofstream out(filename.c_str());
00149 if (!out) {
00150 ostringstream msg;
00151 msg << "Unable to open " << filename;
00152 throw runtime_error(msg.str());
00153 }
00154
00155
00156 out.close();
00157 }
00158
00159
00160 Moses::TrellisPathList::const_iterator iter;
00161 for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
00162 const Moses::TrellisPath &path = **iter;
00163 featureValues.push_back(path.GetScoreBreakdown());
00164 float bleuScore, dynBleuScore, realBleuScore;
00165 if (realBleu) realBleuScore = m_bleuScoreFeature->CalculateBleu(path.GetTargetPhrase());
00166 else dynBleuScore = getBleuScore(featureValues.back());
00167 bleuScore = realBleu ? realBleuScore : dynBleuScore;
00168 bleuScores.push_back(bleuScore);
00169
00170
00171 float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
00172 modelScores.push_back(scoreWithoutBleu);
00173
00174 if (iter != nBestList.begin())
00175 cerr << endl;
00176 cerr << "Rank " << rank << ", epoch " << epoch << ", \"" << path.GetTargetPhrase() << "\", score: "
00177 << scoreWithoutBleu << ", Bleu: " << bleuScore << ", total: " << path.GetTotalScore();
00178 if (m_bleuScoreFeature->Enabled() && realBleu)
00179 cerr << " (d-bleu: " << dynBleuScore << ", r-bleu: " << realBleuScore << ") ";
00180
00181
00182 setBleuScore(featureValues.back(), 0);
00183 }
00184
00185
00186 vector< vector<const Word*> > translations;
00187 for (size_t i=0; i < numReturnedTranslations && i < nBestList.GetSize(); ++i) {
00188 const TrellisPath &path = nBestList.at(i);
00189 Phrase phrase = path.GetTargetPhrase();
00190
00191 vector<const Word*> translation;
00192 for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
00193 const Word &word = phrase.GetWord(pos);
00194 Word *newWord = new Word(word);
00195 translation.push_back(newWord);
00196 }
00197 translations.push_back(translation);
00198 }
00199
00200 return translations;
00201 }
00202
00203 vector< vector<const Word*> > MosesDecoder::runChartDecoder(const std::string& source,
00204 size_t sentenceid,
00205 size_t nBestSize,
00206 float bleuObjectiveWeight,
00207 float bleuScoreWeight,
00208 vector< ScoreComponentCollection>& featureValues,
00209 vector< float>& bleuScores,
00210 vector< float>& modelScores,
00211 size_t numReturnedTranslations,
00212 bool realBleu,
00213 bool distinct,
00214 size_t rank,
00215 size_t epoch)
00216 {
00217
00218 m_chartManager = new ChartManager(*m_sentence);
00219 m_chartManager->ProcessSentence();
00220 ChartTrellisPathList nBestList;
00221 m_chartManager->CalcNBest(nBestSize, nBestList, distinct);
00222
00223
00224 ChartTrellisPathList::const_iterator iter;
00225 for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
00226 const Moses::ChartTrellisPath &path = **iter;
00227 featureValues.push_back(path.GetScoreBreakdown());
00228 float bleuScore, dynBleuScore, realBleuScore;
00229 dynBleuScore = getBleuScore(featureValues.back());
00230 realBleuScore = m_bleuScoreFeature->CalculateBleu(path.GetOutputPhrase());
00231 bleuScore = realBleu ? realBleuScore : dynBleuScore;
00232 bleuScores.push_back(bleuScore);
00233
00234
00235 float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
00236 modelScores.push_back(scoreWithoutBleu);
00237
00238 if (iter != nBestList.begin())
00239 cerr << endl;
00240 cerr << "Rank " << rank << ", epoch " << epoch << ", \"" << path.GetOutputPhrase() << "\", score: "
00241 << scoreWithoutBleu << ", Bleu: " << bleuScore << ", total: " << path.GetTotalScore();
00242 if (m_bleuScoreFeature->Enabled() && realBleu)
00243 cerr << " (d-bleu: " << dynBleuScore << ", r-bleu: " << realBleuScore << ") ";
00244
00245
00246 setBleuScore(featureValues.back(), 0);
00247 }
00248
00249
00250 vector< vector<const Word*> > translations;
00251 for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
00252 const ChartTrellisPath &path = **iter;
00253 Phrase phrase = path.GetOutputPhrase();
00254
00255 vector<const Word*> translation;
00256 for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
00257 const Word &word = phrase.GetWord(pos);
00258 Word *newWord = new Word(word);
00259 translation.push_back(newWord);
00260 }
00261 translations.push_back(translation);
00262 }
00263
00264 return translations;
00265 }
00266
00267 void MosesDecoder::outputNBestList(const std::string& source, size_t sentenceid,
00268 size_t nBestSize, float bleuObjectiveWeight, float bleuScoreWeight,
00269 bool distinctNbest, bool avgRefLength, string filename, ofstream& streamOut)
00270 {
00271 StaticData &staticData = StaticData::InstanceNonConst();
00272 bool chartDecoding = staticData.IsChart();
00273 initialize(staticData, source, sentenceid, bleuObjectiveWeight, bleuScoreWeight, avgRefLength, chartDecoding);
00274
00275 if (chartDecoding) {
00276 m_chartManager = new ChartManager(*m_sentence);
00277 m_chartManager->ProcessSentence();
00278 ChartTrellisPathList nBestList;
00279 m_chartManager->CalcNBest(nBestSize, nBestList, distinctNbest);
00280
00281 cerr << "generate nbest list " << filename << endl;
00282 cerr << "not implemented.." << endl;
00283 exit(1);
00284 if (filename != "") {
00285 ofstream out(filename.c_str());
00286 if (!out) {
00287 ostringstream msg;
00288 msg << "Unable to open " << filename;
00289 throw runtime_error(msg.str());
00290 }
00291
00292
00293
00294 out.close();
00295 } else {
00296
00297 }
00298 } else {
00299
00300 m_manager = new Moses::Manager(0,*m_sentence, staticData.GetSearchAlgorithm());
00301 m_manager->ProcessSentence();
00302 TrellisPathList nBestList;
00303 m_manager->CalcNBest(nBestSize, nBestList, distinctNbest);
00304
00305 if (filename != "") {
00306 ofstream out(filename.c_str());
00307 if (!out) {
00308 ostringstream msg;
00309 msg << "Unable to open " << filename;
00310 throw runtime_error(msg.str());
00311 }
00312
00313
00314 out.close();
00315 } else {
00316
00317 streamOut.flush();
00318 }
00319 }
00320 }
00321
00322 void MosesDecoder::initialize(StaticData& staticData, const std::string& source, size_t sentenceid,
00323 float bleuObjectiveWeight, float bleuScoreWeight, bool avgRefLength, bool chartDecoding)
00324 {
00325 m_sentence = new Sentence();
00326 stringstream in(source + "\n");
00327 const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
00328 m_sentence->Read(in,inputFactorOrder);
00329
00330
00331
00332 staticData.ReLoadBleuScoreFeatureParameter(bleuObjectiveWeight*bleuScoreWeight);
00333
00334 m_bleuScoreFeature->SetCurrSourceLength((*m_sentence).GetSize());
00335 if (chartDecoding)
00336 m_bleuScoreFeature->SetCurrNormSourceLength((*m_sentence).GetSize()-2);
00337 else
00338 m_bleuScoreFeature->SetCurrNormSourceLength((*m_sentence).GetSize());
00339
00340 if (avgRefLength)
00341 m_bleuScoreFeature->SetCurrAvgRefLength(sentenceid);
00342 else
00343 m_bleuScoreFeature->SetCurrShortestRefLength(sentenceid);
00344 m_bleuScoreFeature->SetCurrReferenceNgrams(sentenceid);
00345 }
00346
00347 float MosesDecoder::getBleuScore(const ScoreComponentCollection& scores)
00348 {
00349 return scores.GetScoreForProducer(m_bleuScoreFeature);
00350 }
00351
00352 void MosesDecoder::setBleuScore(ScoreComponentCollection& scores, float bleu)
00353 {
00354 scores.Assign(m_bleuScoreFeature, bleu);
00355 }
00356
00357 ScoreComponentCollection MosesDecoder::getWeights()
00358 {
00359 return StaticData::Instance().GetAllWeights();
00360 }
00361
00362 void MosesDecoder::setWeights(const ScoreComponentCollection& weights)
00363 {
00364 StaticData::InstanceNonConst().SetAllWeights(weights);
00365 }
00366
00367 void MosesDecoder::updateHistory(const vector<const Word*>& words)
00368 {
00369 m_bleuScoreFeature->UpdateHistory(words);
00370 }
00371
00372 void MosesDecoder::updateHistory(const vector< vector< const Word*> >& words, vector<size_t>& sourceLengths, vector<size_t>& ref_ids, size_t rank, size_t epoch)
00373 {
00374 m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch);
00375 }
00376
00377 void MosesDecoder::printBleuFeatureHistory(std::ostream& out)
00378 {
00379 m_bleuScoreFeature->PrintHistory(out);
00380 }
00381
00382 size_t MosesDecoder::getClosestReferenceLength(size_t ref_id, int hypoLength)
00383 {
00384 return m_bleuScoreFeature->GetClosestRefLength(ref_id, hypoLength);
00385 }
00386
00387 size_t MosesDecoder::getShortestReferenceIndex(size_t ref_id)
00388 {
00389 return m_bleuScoreFeature->GetShortestRefIndex(ref_id);
00390 }
00391
00392 void MosesDecoder::setBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
00393 bool scaleByInverseLength, bool scaleByAvgInverseLength,
00394 float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu)
00395 {
00396 m_bleuScoreFeature->SetBleuParameters(disable, sentenceBleu, scaleByInputLength, scaleByAvgInputLength,
00397 scaleByInverseLength, scaleByAvgInverseLength,
00398 scaleByX, historySmoothing, scheme, simpleHistoryBleu);
00399 }
00400 }
00401