00001
00002
00003
00004
00005
00006
00007
00008
00028 #include <cmath>
00029 #include <cstddef>
00030 #include <cstdlib>
00031 #include <ctime>
00032 #include <cassert>
00033 #include <iostream>
00034 #include <string>
00035 #include <vector>
00036 #include <utility>
00037 #include <algorithm>
00038
00039 #include <boost/program_options.hpp>
00040 #include <boost/scoped_ptr.hpp>
00041
00042 #include "util/exception.hh"
00043 #include "util/random.hh"
00044
00045 #include "BleuScorer.h"
00046 #include "HopeFearDecoder.h"
00047 #include "MiraFeatureVector.h"
00048 #include "MiraWeightVector.h"
00049
00050 #include "Scorer.h"
00051 #include "ScorerFactory.h"
00052
00053 using namespace std;
00054 using namespace MosesTuning;
00055
00056 namespace po = boost::program_options;
00057
00058 int main(int argc, char** argv)
00059 {
00060 bool help;
00061 string denseInitFile;
00062 string sparseInitFile;
00063 string type = "nbest";
00064 string sctype = "BLEU";
00065 string scconfig = "";
00066 vector<string> scoreFiles;
00067 vector<string> featureFiles;
00068 vector<string> referenceFiles;
00069 string hgDir;
00070 int seed;
00071 string outputFile;
00072 float c = 0.01;
00073 float decay = 0.999;
00074 int n_iters = 60;
00075 bool streaming = false;
00076 bool streaming_out = false;
00077 bool no_shuffle = false;
00078 bool model_bg = false;
00079 bool verbose = false;
00080 bool safe_hope = false;
00081 size_t hgPruning = 50;
00082
00083
00084 po::options_description desc("Allowed options");
00085 desc.add_options()
00086 ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
00087 ("type,t", po::value<string>(&type), "Either nbest or hypergraph")
00088 ("sctype", po::value<string>(&sctype), "the scorer type (default BLEU)")
00089 ("scconfig,c", po::value<string>(&scconfig), "configuration string passed to scorer")
00090 ("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
00091 ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
00092 ("hgdir,H", po::value<string> (&hgDir), "Directory containing hypergraphs")
00093 ("reference,R", po::value<vector<string> > (&referenceFiles), "Reference files, only required for hypergraph mira")
00094 ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
00095 ("output-file,o", po::value<string>(&outputFile), "Output file")
00096 ("cparam,C", po::value<float>(&c), "MIRA C-parameter, lower for more regularization (default 0.01)")
00097 ("decay,D", po::value<float>(&decay), "BLEU background corpus decay rate (default 0.999)")
00098 ("iters,J", po::value<int>(&n_iters), "Number of MIRA iterations to run (default 60)")
00099 ("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features. This should have 'name= value' on each line, or (legacy) should be the Moses mert 'init.opt' format.")
00100 ("sparse-init,s", po::value<string>(&sparseInitFile), "Weight file for sparse features")
00101 ("streaming", po::value(&streaming)->zero_tokens()->default_value(false), "Stream n-best lists to save memory, implies --no-shuffle")
00102 ("streaming-out", po::value(&streaming_out)->zero_tokens()->default_value(false), "Stream weights to stdout after each sentence")
00103 ("no-shuffle", po::value(&no_shuffle)->zero_tokens()->default_value(false), "Don't shuffle hypotheses before each epoch")
00104 ("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background")
00105 ("verbose", po::value(&verbose)->zero_tokens()->default_value(false), "Verbose updates")
00106 ("safe-hope", po::value(&safe_hope)->zero_tokens()->default_value(false), "Mode score's influence on hope decoding is limited")
00107 ("hg-prune", po::value<size_t>(&hgPruning), "Prune hypergraphs to have this many edges per reference word")
00108 ;
00109
00110 po::options_description cmdline_options;
00111 cmdline_options.add(desc);
00112 po::variables_map vm;
00113 po::store(po::command_line_parser(argc,argv).
00114 options(cmdline_options).run(), vm);
00115 po::notify(vm);
00116 if (help) {
00117 cout << "Usage: " + string(argv[0]) + " [options]" << endl;
00118 cout << desc << endl;
00119 exit(0);
00120 }
00121
00122 cerr << "kbmira with c=" << c << " decay=" << decay << " no_shuffle=" << no_shuffle << endl;
00123
00124 if (vm.count("random-seed")) {
00125 cerr << "Initialising random seed to " << seed << endl;
00126 util::rand_init(seed);
00127 } else {
00128 cerr << "Initialising random seed from system clock" << endl;
00129 util::rand_init();
00130 }
00131
00132
00133 pair<MiraWeightVector*, size_t> ret = InitialiseWeights(denseInitFile, sparseInitFile, type, verbose);
00134 boost::scoped_ptr<MiraWeightVector> wv(ret.first);
00135 size_t initDenseSize = ret.second;
00136
00137
00138 if(sctype != "BLEU" && type == "hypergraph") {
00139 UTIL_THROW(util::Exception, "hypergraph mira only supports BLEU");
00140 }
00141 boost::scoped_ptr<Scorer> scorer(ScorerFactory::getScorer(sctype, scconfig));
00142
00143
00144 vector<ValType> bg(scorer->NumberOfScores(), 1);
00145
00146 boost::scoped_ptr<HopeFearDecoder> decoder;
00147 if (type == "nbest") {
00148 decoder.reset(new NbestHopeFearDecoder(featureFiles, scoreFiles, streaming, no_shuffle, safe_hope, scorer.get()));
00149 } else if (type == "hypergraph") {
00150 decoder.reset(new HypergraphHopeFearDecoder(hgDir, referenceFiles, initDenseSize, streaming, no_shuffle, safe_hope, hgPruning, *wv, scorer.get()));
00151 } else {
00152 UTIL_THROW(util::Exception, "Unknown batch mira type: '" << type << "'");
00153 }
00154
00155
00156 if (!streaming_out)
00157 cerr << "Initial BLEU = " << decoder->Evaluate(wv->avg()) << endl;
00158 ValType bestBleu = 0;
00159 for(int j=0; j<n_iters; j++) {
00160
00161 int iNumExamples = 0;
00162 int iNumUpdates = 0;
00163 ValType totalLoss = 0.0;
00164 size_t sentenceIndex = 0;
00165 for(decoder->reset(); !decoder->finished(); decoder->next()) {
00166 HopeFearData hfd;
00167 decoder->HopeFear(bg,*wv,&hfd);
00168
00169
00170 if (!hfd.hopeFearEqual && hfd.hopeBleu > hfd.fearBleu) {
00171
00172 MiraFeatureVector diff = hfd.hopeFeatures - hfd.fearFeatures;
00173
00174
00175 ValType delta = hfd.hopeBleu - hfd.fearBleu;
00176
00177 ValType diff_score = wv->score(diff);
00178 ValType loss = delta - diff_score;
00179 if(verbose) {
00180 cerr << "Updating sent " << sentenceIndex << endl;
00181 cerr << "Wght: " << *wv << endl;
00182 cerr << "Hope: " << hfd.hopeFeatures << " BLEU:" << hfd.hopeBleu << " Score:" << wv->score(hfd.hopeFeatures) << endl;
00183 cerr << "Fear: " << hfd.fearFeatures << " BLEU:" << hfd.fearBleu << " Score:" << wv->score(hfd.fearFeatures) << endl;
00184 cerr << "Diff: " << diff << " BLEU:" << delta << " Score:" << diff_score << endl;
00185 cerr << "Loss: " << loss << " Scale: " << 1 << endl;
00186 cerr << endl;
00187 }
00188 if(loss > 0) {
00189 ValType eta = min(c, loss / diff.sqrNorm());
00190 wv->update(diff,eta);
00191 totalLoss+=loss;
00192 iNumUpdates++;
00193 }
00194
00195 for(size_t k=0; k<bg.size(); k++) {
00196 bg[k]*=decay;
00197 if(model_bg)
00198 bg[k]+=hfd.modelStats[k];
00199 else
00200 bg[k]+=hfd.hopeStats[k];
00201 }
00202 }
00203 iNumExamples++;
00204 ++sentenceIndex;
00205 if (streaming_out)
00206 cout << *wv << endl;
00207 }
00208
00209 cerr << iNumUpdates << "/" << iNumExamples << " updates"
00210 << ", avg loss = " << (totalLoss / iNumExamples);
00211
00212
00213
00214 AvgWeightVector avg = wv->avg();
00215 ValType bleu = decoder->Evaluate(avg);
00216 cerr << ", BLEU = " << bleu << endl;
00217 if(bleu > bestBleu) {
00218
00219
00220
00221
00222
00223
00224
00225
00226 ostream* out;
00227 ofstream outFile;
00228 if (!outputFile.empty() ) {
00229 outFile.open(outputFile.c_str());
00230 if (!(outFile)) {
00231 cerr << "Error: Failed to open " << outputFile << endl;
00232 exit(1);
00233 }
00234 out = &outFile;
00235 } else {
00236 out = &cout;
00237 }
00238 for(size_t i=0; i<avg.size(); i++) {
00239 if(i<initDenseSize)
00240 *out << "F" << i << " " << avg.weight(i) << endl;
00241 else {
00242 if(abs(avg.weight(i))>1e-8)
00243 *out << SparseVector::decode(i-initDenseSize) << " " << avg.weight(i) << endl;
00244 }
00245 }
00246 outFile.close();
00247 bestBleu = bleu;
00248 }
00249 }
00250 cerr << "Best BLEU = " << bestBleu << endl;
00251 }
00252
00253
00254
00255
00256