00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00028 #include <cstring>
00029 #include <iostream>
00030 #include <fstream>
00031 #include <map>
00032 #include <string>
00033 #include <vector>
00034
00035 #include <boost/program_options.hpp>
00036 #include <boost/scoped_ptr.hpp>
00037
00038 #include "moses/InputPath.h"
00039 #include "moses/Parameter.h"
00040 #include "moses/TranslationModel/PhraseDictionary.h"
00041 #include "moses/Timer.h"
00042 #include "moses/StaticData.h"
00043
00044 #include "util/file_piece.hh"
00045 #include "util/string_piece.hh"
00046 #include "util/tokenize_piece.hh"
00047 #include "util/double-conversion/double-conversion.h"
00048 #include "util/exception.hh"
00049
00050
00051 using namespace Moses;
00052 using namespace std;
00053
00054 namespace po = boost::program_options;
00055 typedef multimap<float,string> Lines;
00056
00057 static void usage(const po::options_description& desc, char const** argv)
00058 {
00059 cerr << "Usage: " + string(argv[0]) + " [options] input-file output-file" << endl;
00060 cerr << desc << endl;
00061 }
00062
00063
00064 static void outputTopN(Lines lines, size_t maxPhrases, ostream& out)
00065 {
00066 size_t count = 0;
00067 for (Lines::const_reverse_iterator i = lines.rbegin(); i != lines.rend(); ++i) {
00068 out << i->second << endl;
00069 ++count;
00070 if (count >= maxPhrases) break;
00071 }
00072 }
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098 int main(int argc, char const** argv)
00099 {
00100 bool help;
00101 string input_file;
00102 string config_file;
00103 size_t maxPhrases = 100;
00104
00105
00106 po::options_description desc("Allowed options");
00107 desc.add_options()
00108 ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
00109 ("input-file,i", po::value<string>(&input_file), "Input file")
00110 ("config-file,f", po::value<string>(&config_file), "Config file")
00111 ("max-phrases,n", po::value<size_t>(&maxPhrases), "Maximum target phrases per source phrase")
00112 ;
00113
00114 po::options_description cmdline_options;
00115 cmdline_options.add(desc);
00116 po::variables_map vm;
00117 po::parsed_options parsed = po::command_line_parser(argc,argv).
00118 options(cmdline_options).run();
00119 po::store(parsed, vm);
00120 po::notify(vm);
00121 if (help) {
00122 usage(desc, argv);
00123 exit(0);
00124 }
00125 if (input_file.empty()) {
00126 cerr << "ERROR: Please specify an input file" << endl << endl;
00127 usage(desc, argv);
00128 exit(1);
00129 }
00130 if (config_file.empty()) {
00131 cerr << "ERROR: Please specify a config file" << endl << endl;
00132 usage(desc, argv);
00133 exit(1);
00134 }
00135
00136 vector<string> mosesargs;
00137 mosesargs.push_back(argv[0]);
00138 mosesargs.push_back("-f");
00139 mosesargs.push_back(config_file);
00140
00141 boost::scoped_ptr<Parameter> params(new Parameter());
00142 char const** mosesargv = new char const*[mosesargs.size()];
00143 for (size_t i = 0; i < mosesargs.size(); ++i) {
00144 mosesargv[i] = mosesargs[i].c_str();
00145
00146
00147 }
00148
00149 if (!params->LoadParam(mosesargs.size(), mosesargv)) {
00150 params->Explain();
00151 exit(1);
00152 }
00153
00154 ResetUserTime();
00155 if (!StaticData::LoadDataStatic(params.get(),argv[0])) {
00156 exit(1);
00157 }
00158
00159 const StaticData &staticData = StaticData::Instance();
00160
00161
00162 PhraseDictionary* phraseTable = NULL;
00163 const vector<FeatureFunction*>& ffs = FeatureFunction::GetFeatureFunctions();
00164 for (size_t i = 0; i < ffs.size(); ++i) {
00165 PhraseDictionary* maybePhraseTable = dynamic_cast< PhraseDictionary*>(ffs[i]);
00166 if (maybePhraseTable) {
00167 UTIL_THROW_IF(phraseTable,util::Exception,"Can only score translations with one phrase table");
00168 phraseTable = maybePhraseTable;
00169 }
00170 }
00171 UTIL_THROW_IF(!phraseTable,util::Exception,"Unable to find scoring phrase table");
00172
00173
00174
00175
00176
00177
00178 std::ostream *progress = NULL;
00179 IFVERBOSE(1) progress = &std::cerr;
00180 util::FilePiece in(input_file.c_str(), progress);
00181
00182
00183 vector<float> scoreVector;
00184 StringPiece line;
00185
00186 double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
00187
00188 string previous;
00189 Lines lines;
00190
00191
00192 while(true) {
00193 try {
00194 line = in.ReadLine();
00195 } catch (const util::EndOfFileException &e) {
00196 break;
00197 }
00198
00199 util::TokenIter<util::MultiCharacter> pipes(line, "|||");
00200 StringPiece sourcePhraseString(*pipes);
00201 StringPiece targetPhraseString(*++pipes);
00202 StringPiece scoreString(*++pipes);
00203 scoreVector.clear();
00204 for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
00205 int processed;
00206 float score = converter.StringToFloat(s->data(), s->length(), &processed);
00207 UTIL_THROW_IF2(isnan(score), "Bad score " << *s);
00208 scoreVector.push_back(FloorScore(TransformScore(score)));
00209 }
00210
00211 if (sourcePhraseString != previous) {
00212 outputTopN(lines, maxPhrases, cout);
00213 previous = sourcePhraseString.as_string();
00214 lines.clear();
00215 }
00216
00217 ScoreComponentCollection scores;
00218 scores.Assign(phraseTable,scoreVector);
00219 float score = scores.InnerProduct(staticData.GetAllWeights());
00220 lines.insert(pair<float,string>(score,line.as_string()));
00221
00222 }
00223 if (!lines.empty()) {
00224 outputTopN(lines, maxPhrases, cout);
00225 }
00226
00227
00228
00229
00230
00231 return 0;
00232 }