Moses: mosesdecoder/moses/Parameter.cpp Source File

00001 // $Id$
00002 
00003 /***********************************************************************
00004 Moses - factored phrase-based language decoder
00005 Copyright (C) 2006 University of Edinburgh
00006 
00007 This library is free software; you can redistribute it and/or
00008 modify it under the terms of the GNU Lesser General Public
00009 License as published by the Free Software Foundation; either
00010 version 2.1 of the License, or (at your option) any later version.
00011 
00012 This library is distributed in the hope that it will be useful,
00013 but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015 Lesser General Public License for more details.
00016 
00017 You should have received a copy of the GNU Lesser General Public
00018 License along with this library; if not, write to the Free Software
00019 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00020 ***********************************************************************/
00021 
00022 #include <ctime>
00023 #include <iostream>
00024 #include <iterator>
00025 #include <fstream>
00026 #include <sstream>
00027 #include <algorithm>
00028 #include "Parameter.h"
00029 #include "Util.h"
00030 #include "InputFileStream.h"
00031 #include "StaticData.h"
00032 #include "UserMessage.h"
00033 
00034 using namespace std;
00035 
00036 namespace Moses
00037 {
00039 Parameter::Parameter()
00040 {
00041   AddParam("beam-threshold", "b", "threshold for threshold pruning");
00042   AddParam("config", "f", "location of the configuration file");
00043   AddParam("continue-partial-translation", "cpt", "start from nonempty hypothesis");
00044   AddParam("decoding-graph-backoff", "dpb", "only use subsequent decoding paths for unknown spans of given length");
00045   AddParam("dlm-model", "Order, factor and vocabulary file for discriminative LM. Use * for filename to indicate unlimited vocabulary.");
00046   AddParam("drop-unknown", "du", "drop unknown words instead of copying them");
00047   AddParam("disable-discarding", "dd", "disable hypothesis discarding");
00048   AddParam("factor-delimiter", "fd", "specify a different factor delimiter than the default");
00049   AddParam("generation-file", "location and properties of the generation table");
00050   AddParam("global-lexical-file", "gl", "discriminatively trained global lexical translation model file");
00051   AddParam("glm-feature", "discriminatively trained global lexical translation feature, sparse producer");
00052   AddParam("input-factors", "list of factors in the input");
00053   AddParam("input-file", "i", "location of the input file to be translated");
00054   AddParam("inputtype", "text (0), confusion network (1), word lattice (2) (default = 0)");
00055   AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
00056   AddParam("lmodel-file", "location and properties of the language models");
00057   AddParam("lmodel-dub", "dictionary upper bounds of language models");
00058   AddParam("lmodel-oov-feature", "add language model oov feature, one per model");
00059   AddParam("mapping", "description of decoding steps");
00060   AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
00061   AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
00062   AddParam("max-phrase-length", "maximum phrase length (default 20)");
00063   AddParam("n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
00064   AddParam("lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list");
00065   AddParam("n-best-factor", "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
00066   AddParam("print-all-derivations", "to print all derivations in search graph");
00067   AddParam("output-factors", "list of factors in the output");
00068   AddParam("phrase-drop-allowed", "da", "if present, allow dropping of source words"); //da = drop any (word); see -du for comparison
00069   AddParam("report-all-factors", "report all factors in output, not just first");
00070   AddParam("report-all-factors-in-n-best", "Report all factors in n-best-lists. Default is false");
00071 #ifdef HAVE_SYNLM
00072   AddParam("slmodel-file", "location of the syntactic language model file(s)");
00073   AddParam("slmodel-factor", "factor to use with syntactic language model");
00074   AddParam("slmodel-beam", "beam width to use with syntactic language model's parser");
00075 #endif
00076   AddParam("stack", "s", "maximum stack size for histogram pruning");
00077   AddParam("stack-diversity", "sd", "minimum number of hypothesis of each coverage in stack (default 0)");
00078   AddParam("threads","th", "number of threads to use in decoding (defaults to single-threaded)");
00079   AddParam("translation-details", "T", "for each best hypothesis, report translation details to the given file");
00080   AddParam("ttable-file", "location and properties of the translation tables");
00081   AddParam("translation-option-threshold", "tot", "threshold for translation options relative to best for input phrase");
00082   AddParam("early-discarding-threshold", "edt", "threshold for constructing hypotheses based on estimate cost");
00083   AddParam("verbose", "v", "verbosity level of the logging");
00084   AddParam("references", "Reference file(s) - used for bleu score feature");
00085   AddParam("output-factors", "list if factors in the output");
00086   AddParam("cache-path", "?");
00087   AddParam("distortion-limit", "dl", "distortion (reordering) limit in maximum number of words (0 = monotone, -1 = unlimited)");
00088   AddParam("monotone-at-punctuation", "mp", "do not reorder over punctuation");
00089   AddParam("distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
00090   AddParam("distortion", "configurations for each factorized/lexicalized reordering model.");
00091   AddParam("early-distortion-cost", "edc", "include estimate of distortion cost yet to be incurred in the score [Moore & Quirk 2007]. Default is no");
00092   AddParam("xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore'");
00093   AddParam("xml-brackets", "xb", "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode" );
00094   AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
00095   AddParam("lminimum-bayes-risk", "lmbr", "use lattice miminum Bayes risk to determine best translation");
00096   AddParam("mira", "do mira training");
00097   AddParam("consensus-decoding", "con", "use consensus decoding (De Nero et. al. 2009)");
00098   AddParam("mbr-size", "number of translation candidates considered in MBR decoding (default 200)");
00099   AddParam("mbr-scale", "scaling factor to convert log linear score probability in MBR decoding (default 1.0)");
00100   AddParam("lmbr-thetas", "theta(s) for lattice mbr calculation");
00101   AddParam("lmbr-pruning-factor", "average number of nodes/word wanted in pruned lattice");
00102   AddParam("lmbr-p", "unigram precision value for lattice mbr");
00103   AddParam("lmbr-r", "ngram precision decay value for lattice mbr");
00104   AddParam("lmbr-map-weight", "weight given to map solution when doing lattice MBR (default 0)");
00105   AddParam("lattice-hypo-set", "to use lattice as hypo set during lattice MBR");
00106   AddParam("clean-lm-cache", "clean language model caches after N translations (default N=1)");
00107   AddParam("use-persistent-cache", "cache translation options across sentences (default true)");
00108   AddParam("persistent-cache-size", "maximum size of cache for translation options (default 10,000 input phrases)");
00109   AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");
00110   AddParam("output-word-graph", "owg", "Output stack info as word graph. Takes filename, 0=only hypos in stack, 1=stack + nbest hypos");
00111   AddParam("time-out", "seconds after which is interrupted (-1=no time-out, default is -1)");
00112   AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
00113   AddParam("output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
00114   AddParam("unpruned-search-graph", "usg", "When outputting chart search graph, do not exclude dead ends. Note: stack pruning may have eliminated some hypotheses");
00115   AddParam("output-search-graph-slf", "slf", "Output connected hypotheses of search into specified directory, one file per sentence, in HTK standard lattice format (SLF)");
00116   AddParam("output-search-graph-hypergraph", "Output connected hypotheses of search into specified directory, one file per sentence, in a hypergraph format (see Kenneth Heafield's lazy hypergraph decoder)");
00117   AddParam("include-lhs-in-search-graph", "lhssg", "When outputting chart search graph, include the label of the LHS of the rule (useful when using syntax)");
00118 #ifdef HAVE_PROTOBUF
00119   AddParam("output-search-graph-pb", "pb", "Write phrase lattice to protocol buffer objects in the specified path.");
00120 #endif
00121   AddParam("cube-pruning-pop-limit", "cbp", "How many hypotheses should be popped for each stack. (default = 1000)");
00122   AddParam("cube-pruning-diversity", "cbd", "How many hypotheses should be created for each coverage. (default = 0)");
00123   AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing. (default = 0)");
00124   AddParam("constraint", "Location of the file with target sentences to produce constraining the search");
00125   AddParam("description", "Source language, target language, description");
00126   AddParam("max-chart-span", "maximum num. of source word chart rules can consume (default 10)");
00127   AddParam("non-terminals", "list of non-term symbols, space separated");
00128   AddParam("rule-limit", "a little like table limit. But for chart decoding rules. Default is DEFAULT_MAX_TRANS_OPT_SIZE");
00129   AddParam("source-label-overlap", "What happens if a span already has a label. 0=add more. 1=replace. 2=discard. Default is 0");
00130   AddParam("output-hypo-score", "Output the hypo score to stdout with the output string. For search error analysis. Default is false");
00131   AddParam("unknown-lhs", "file containing target lhs of unknown words. 1 per line: LHS prob");
00132   AddParam("phrase-pair-feature", "Source and target factors for phrase pair feature");
00133   AddParam("phrase-boundary-source-feature", "Source factors for phrase boundary feature");
00134   AddParam("phrase-boundary-target-feature", "Target factors for phrase boundary feature");
00135   AddParam("phrase-length-feature", "Count features for source length, target length, both of each phrase");
00136   AddParam("target-word-insertion-feature", "Count feature for each unaligned target word");
00137   AddParam("source-word-deletion-feature", "Count feature for each unaligned source word");
00138   AddParam("word-translation-feature", "Count feature for word translation according to word alignment");
00139   AddParam("cube-pruning-lazy-scoring", "cbls", "Don't fully score a hypothesis until it is popped");
00140   AddParam("parsing-algorithm", "Which parsing algorithm to use. 0=CYK+, 1=scope-3. (default = 0)");
00141   AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing, 4=stack with batched lm requests (default = 0)");
00142   AddParam("constraint", "Location of the file with target sentences to produce constraining the search");
00143   AddParam("link-param-count", "Number of parameters on word links when using confusion networks or lattices (default = 1)");
00144   AddParam("description", "Source language, target language, description");
00145 
00146   AddParam("max-chart-span", "maximum num. of source word chart rules can consume (default 10)");
00147   AddParam("non-terminals", "list of non-term symbols, space separated");
00148   AddParam("rule-limit", "a little like table limit. But for chart decoding rules. Default is DEFAULT_MAX_TRANS_OPT_SIZE");
00149   AddParam("source-label-overlap", "What happens if a span already has a label. 0=add more. 1=replace. 2=discard. Default is 0");
00150   AddParam("output-hypo-score", "Output the hypo score to stdout with the output string. For search error analysis. Default is false");
00151   AddParam("unknown-lhs", "file containing target lhs of unknown words. 1 per line: LHS prob");
00152   AddParam("show-weights", "print feature weights and exit");
00153   AddParam("start-translation-id", "Id of 1st input. Default = 0");
00154   AddParam("output-unknowns", "Output the unknown (OOV) words to the given file, one line per sentence");
00155 
00156   // Compact phrase table and reordering table.
00157   AddParam("minlexr-memory", "Load lexical reordering table in minlexr format into memory");
00158   AddParam("minphr-memory", "Load phrase table in minphr format into memory");
00159 
00160   AddParam("print-alignment-info", "Output word-to-word alignment to standard out, separated from translation by |||. Word-to-word alignments are takne from the phrase table if any. Default is false");
00161   AddParam("include-segmentation-in-n-best", "include phrasal segmentation in the n-best list. default is false");
00162   AddParam("print-alignment-info-in-n-best", "Include word-to-word alignment in the n-best list. Word-to-word alignments are takne from the phrase table if any. Default is false");
00163   AddParam("alignment-output-file", "print output word alignments into given file");
00164   AddParam("sort-word-alignment", "Sort word alignments for more consistent display. 0=no sort (default), 1=target order");
00165   AddParam("report-segmentation", "t", "report phrase segmentation in the output");
00166   AddParam("link-param-count", "DEPRECATED. DO NOT USE. Number of parameters on word links when using confusion networks or lattices (default = 1)");
00167 
00168   AddParam("weight-slm", "slm", "DEPRECATED. DO NOT USE. weight(s) for syntactic language model");
00169   AddParam("weight-bl", "bl", "DEPRECATED. DO NOT USE. weight for bleu score feature");
00170   AddParam("weight-d", "d", "DEPRECATED. DO NOT USE. weight(s) for distortion (reordering components)");
00171   AddParam("weight-dlm", "dlm", "DEPRECATED. DO NOT USE. weight for discriminative LM feature function (on top of sparse weights)");
00172   AddParam("weight-lr", "lr", "DEPRECATED. DO NOT USE. weight(s) for lexicalized reordering, if not included in weight-d");
00173   AddParam("weight-generation", "g", "DEPRECATED. DO NOT USE. weight(s) for generation components");
00174   AddParam("weight-i", "I", "DEPRECATED. DO NOT USE. weight(s) for word insertion - used for parameters from confusion network and lattice input links");
00175   AddParam("weight-l", "lm", "DEPRECATED. DO NOT USE. weight(s) for language models");
00176   AddParam("weight-lex", "lex", "DEPRECATED. DO NOT USE. weight for global lexical model");
00177   AddParam("weight-glm", "glm", "DEPRECATED. DO NOT USE. weight for global lexical feature, sparse producer");
00178   AddParam("weight-wt", "wt", "DEPRECATED. DO NOT USE. weight for word translation feature");
00179   AddParam("weight-pp", "pp", "DEPRECATED. DO NOT USE. weight for phrase pair feature");
00180   AddParam("weight-pb", "pb", "DEPRECATED. DO NOT USE. weight for phrase boundary feature");
00181   AddParam("weight-t", "tm", "DEPRECATED. DO NOT USE. weights for translation model components");
00182   AddParam("weight-w", "w", "DEPRECATED. DO NOT USE. weight for word penalty");
00183   AddParam("weight-u", "u", "DEPRECATED. DO NOT USE. weight for unknown word penalty");
00184   AddParam("weight-e", "e", "DEPRECATED. DO NOT USE. weight for word deletion");
00185   AddParam("text-type", "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
00186   AddParam("input-scores", "DEPRECATED. DO NOT USE. 2 numbers on 2 lines - [1] of scores on each edge of a confusion network or lattice input (default=1). [2] Number of 'real' word scores (0 or 1. default=0)");
00187 
00188   AddParam("weight-file", "wf", "feature weights file. Do *not* put weights for 'core' features in here - they go in moses.ini");
00189 
00190   AddParam("weight", "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");
00191   AddParam("weight-overwrite", "special parameter for mert. All on 1 line. Overrides weights specified in 'weights' argument");
00192   AddParam("feature-overwrite", "Override arguments in a particular featureu function with a particular key");
00193 
00194   AddParam("feature", "");
00195   AddParam("print-id", "prefix translations with id. Default if false");
00196 
00197   AddParam("alternate-weight-setting", "aws", "alternate set of weights to used per xml specification");
00198 }
00199 
00200 Parameter::~Parameter()
00201 {
00202 }
00203 
00205 void Parameter::AddParam(const string &paramName, const string &description)
00206 {
00207   m_valid[paramName] = true;
00208   m_description[paramName] = description;
00209 }
00210 
00212 void Parameter::AddParam(const string &paramName, const string &abbrevName, const string &description)
00213 {
00214   m_valid[paramName] = true;
00215   m_valid[abbrevName] = true;
00216   m_abbreviation[paramName] = abbrevName;
00217   m_fullname[abbrevName] = paramName;
00218   m_description[paramName] = description;
00219 }
00220 
00222 void Parameter::Explain()
00223 {
00224   cerr << "Usage:" << endl;
00225   for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++) {
00226     const string paramName = iterParam->first;
00227     const string paramDescription = iterParam->second;
00228     cerr <<  "\t-" << paramName;
00229     PARAM_STRING::const_iterator iterAbbr = m_abbreviation.find( paramName );
00230     if ( iterAbbr != m_abbreviation.end() )
00231       cerr <<  " (" << iterAbbr->second << ")";
00232     cerr <<  ": " << paramDescription << endl;
00233   }
00234 }
00235 
00239 bool Parameter::isOption(const char* token)
00240 {
00241   if (! token) return false;
00242   std::string tokenString(token);
00243   size_t length = tokenString.size();
00244   if (length > 0 && tokenString.substr(0,1) != "-") return false;
00245   if (length > 1 && tokenString.substr(1,1).find_first_not_of("0123456789") == 0) return true;
00246   return false;
00247 }
00248 
00250 bool Parameter::LoadParam(const string &filePath)
00251 {
00252   const char *argv[] = {"executable", "-f", filePath.c_str() };
00253   return LoadParam(3, (char**) argv);
00254 }
00255 
00257 bool Parameter::LoadParam(int argc, char* argv[])
00258 {
00259   // config file (-f) arg mandatory
00260   string configPath;
00261   if ( (configPath = FindParam("-f", argc, argv)) == ""
00262        && (configPath = FindParam("-config", argc, argv)) == "") {
00263     PrintCredit();
00264     Explain();
00265 
00266     cerr << endl;
00267     UserMessage::Add("No configuration file was specified.  Use -config or -f");
00268     cerr << endl;
00269     return false;
00270   } else {
00271     if (!ReadConfigFile(configPath)) {
00272       UserMessage::Add("Could not read "+configPath);
00273       return false;
00274     }
00275   }
00276 
00277   // overwrite parameters with values from switches
00278   for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++) {
00279     const string paramName = iterParam->first;
00280     OverwriteParam("-" + paramName, paramName, argc, argv);
00281   }
00282 
00283   // ... also shortcuts
00284   for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin(); iterParam != m_abbreviation.end(); iterParam++) {
00285     const string paramName = iterParam->first;
00286     const string paramShortName = iterParam->second;
00287     OverwriteParam("-" + paramShortName, paramName, argc, argv);
00288   }
00289 
00290   // logging of parameters that were set in either config or switch
00291   int verbose = 1;
00292   if (m_setting.find("verbose") != m_setting.end() &&
00293       m_setting["verbose"].size() > 0)
00294     verbose = Scan<int>(m_setting["verbose"][0]);
00295   if (verbose >= 1) { // only if verbose
00296     TRACE_ERR( "Defined parameters (per moses.ini or switch):" << endl);
00297     for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ; iterParam != m_setting.end(); iterParam++) {
00298       TRACE_ERR( "\t" << iterParam->first << ": ");
00299       for ( size_t i = 0; i < iterParam->second.size(); i++ )
00300         TRACE_ERR( iterParam->second[i] << " ");
00301       TRACE_ERR( endl);
00302     }
00303   }
00304 
00305   // convert old weights args to new format
00306   if (!isParamSpecified("feature"))
00307     ConvertWeightArgs();
00308   CreateWeightsMap();
00309   WeightOverwrite();
00310 
00311   // check for illegal parameters
00312   bool noErrorFlag = true;
00313   for (int i = 0 ; i < argc ; i++) {
00314     if (isOption(argv[i])) {
00315       string paramSwitch = (string) argv[i];
00316       string paramName = paramSwitch.substr(1);
00317       if (m_valid.find(paramName) == m_valid.end()) {
00318         UserMessage::Add("illegal switch: " + paramSwitch);
00319         noErrorFlag = false;
00320       }
00321     }
00322   }
00323 
00324   //Save("/tmp/moses.ini.new");
00325 
00326   // check if parameters make sense
00327   return Validate() && noErrorFlag;
00328 }
00329 
00330 std::vector<float> &Parameter::GetWeights(const std::string &name)
00331 {
00332   std::vector<float> &ret = m_weights[name];
00333 
00334   cerr << "WEIGHT " << name << "=";
00335   for (size_t i = 0; i < ret.size(); ++i) {
00336     cerr << ret[i] << ",";
00337   }
00338   cerr << endl;
00339   return ret;
00340 }
00341 
00342 void Parameter::SetWeight(const std::string &name, size_t ind, float weight)
00343 {
00344   PARAM_VEC &newWeights = m_setting["weight"];
00345   string line = name + SPrint(ind) + "= " + SPrint(weight);
00346   newWeights.push_back(line);
00347 }
00348 
00349 void Parameter::SetWeight(const std::string &name, size_t ind, const vector<float> &weights)
00350 {
00351   PARAM_VEC &newWeights = m_setting["weight"];
00352   string line = name + SPrint(ind) + "=";
00353 
00354   for (size_t i = 0; i < weights.size(); ++i) {
00355     line += " " + SPrint(weights[i]);
00356   }
00357   newWeights.push_back(line);
00358 }
00359 
00360 void Parameter::AddWeight(const std::string &name, size_t ind, const std::vector<float> &weights)
00361 {
00362   PARAM_VEC &newWeights = m_setting["weight"];
00363 
00364   string sought = name + SPrint(ind) + "=";
00365   for (size_t i = 0; i < newWeights.size(); ++i) {
00366     string &line = newWeights[i];
00367     if (line.find(sought) == 0) {
00368       // found existing weight, most likely to be input weights. Append to this line
00369       for (size_t i = 0; i < weights.size(); ++i) {
00370         line += " " + SPrint(weights[i]);
00371       }
00372       return;
00373     }
00374   }
00375 
00376   // nothing found. Just set
00377   SetWeight(name, ind, weights);
00378 }
00379 
00380 void Parameter::ConvertWeightArgsSingleWeight(const string &oldWeightName, const string &newWeightName)
00381 {
00382   size_t ind = 0;
00383   PARAM_MAP::iterator iterMap;
00384 
00385   iterMap = m_setting.find(oldWeightName);
00386   if (iterMap != m_setting.end()) {
00387     const PARAM_VEC &weights = iterMap->second;
00388     for (size_t i = 0; i < weights.size(); ++i) {
00389       SetWeight(newWeightName, ind, Scan<float>(weights[i]));
00390     }
00391 
00392     m_setting.erase(iterMap);
00393   }
00394 }
00395 
00396 void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
00397 {
00398   // process input weights 1st
00399   if (isParamSpecified("weight-i")) {
00400     vector<float> inputWeights = Scan<float>(m_setting["weight-i"]);
00401     PARAM_VEC &numInputScores = m_setting["input-scores"];
00402     if (inputWeights.size() == 1) {
00403       CHECK(numInputScores.size() == 0);
00404       numInputScores.push_back("1");
00405       numInputScores.push_back("0");
00406     } else if (inputWeights.size() == 2) {
00407       CHECK(numInputScores.size() == 0);
00408       numInputScores.push_back("1");
00409       numInputScores.push_back("1");
00410     }
00411 
00412     SetWeight("PhraseDictionaryBinary", 0, inputWeights);
00413   }
00414 
00415   // convert actually pt feature
00416   VERBOSE(2,"Creating phrase table features" << endl);
00417 
00418   size_t numInputScores = 0;
00419   size_t numRealWordsInInput = 0;
00420   map<string, size_t> ptIndices;
00421 
00422   if (GetParam("input-scores").size()) {
00423     numInputScores = Scan<size_t>(GetParam("input-scores")[0]);
00424   }
00425 
00426   if (GetParam("input-scores").size() > 1) {
00427     numRealWordsInInput = Scan<size_t>(GetParam("input-scores")[1]);
00428   }
00429 
00430   // load phrase translation tables
00431   if (GetParam("ttable-file").size() > 0) {
00432     // weights
00433     const vector<string> &translationVector = GetParam("ttable-file");
00434     vector<size_t>  maxTargetPhrase         = Scan<size_t>(GetParam("ttable-limit"));
00435 
00436     if(maxTargetPhrase.size() == 1 && translationVector.size() > 1) {
00437       VERBOSE(1, "Using uniform ttable-limit of " << maxTargetPhrase[0] << " for all translation tables." << endl);
00438       for(size_t i = 1; i < translationVector.size(); i++)
00439         maxTargetPhrase.push_back(maxTargetPhrase[0]);
00440     } else if(maxTargetPhrase.size() != 1 && maxTargetPhrase.size() < translationVector.size()) {
00441       stringstream strme;
00442       strme << "You specified " << translationVector.size() << " translation tables, but only " << maxTargetPhrase.size() << " ttable-limits.";
00443       UserMessage::Add(strme.str());
00444       return;
00445     }
00446 
00447     // MAIN LOOP
00448     const PARAM_VEC &oldWeights = m_setting[oldWeightName];
00449 
00450     size_t currOldInd = 0;
00451     for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++) {
00452       stringstream ptLine;
00453 
00454       vector<string> token = Tokenize(translationVector[currDict]);
00455 
00456       if(currDict == 0 && token.size() == 4) {
00457         UserMessage::Add("Phrase table specification in old 4-field format. No longer supported");
00458         return;
00459       }
00460       CHECK(token.size() >= 5);
00461 
00462       PhraseTableImplementation implementation = (PhraseTableImplementation) Scan<int>(token[0]);
00463 
00464       string ptType;
00465       switch (implementation) {
00466       case Memory:
00467         ptType = "PhraseDictionaryMemory";
00468         break;
00469       case Binary:
00470         ptType = "PhraseDictionaryBinary";
00471         break;
00472       case OnDisk:
00473         ptType = "PhraseDictionaryOnDisk";
00474         break;
00475       case SCFG:
00476         ptType = "PhraseDictionaryMemory";
00477         break;
00478       case Compact:
00479         ptType = "PhraseDictionaryCompact";
00480         break;
00481       default:
00482         break;
00483       }
00484 
00485       size_t ptInd;
00486       if (ptIndices.find(ptType) == ptIndices.end()) {
00487         ptIndices[ptType] = 0;
00488         ptInd = 0;
00489       } else {
00490         ptInd = ++ptIndices[ptType];
00491       }
00492 
00493       // weights
00494       size_t numFFInd = (token.size() == 4) ? 2 : 3;
00495       size_t numFF = Scan<size_t>(token[numFFInd]);
00496 
00497       vector<float> weights(numFF);
00498       for (size_t currFF = 0; currFF < numFF; ++currFF) {
00499         CHECK(currOldInd < oldWeights.size());
00500         float weight = Scan<float>(oldWeights[currOldInd]);
00501         weights[currFF] = weight;
00502 
00503         ++currOldInd;
00504       }
00505       AddWeight(ptType, ptInd, weights);
00506 
00507       // actual pt
00508       ptLine << ptType << " ";
00509       ptLine << "input-factor=" << token[1] << " ";
00510       ptLine << "output-factor=" << token[2] << " ";
00511       ptLine << "path=" << token[4] << " ";
00512 
00513       //characteristics of the phrase table
00514 
00515       vector<FactorType>  input   = Tokenize<FactorType>(token[1], ",")
00516                                     ,output  = Tokenize<FactorType>(token[2], ",");
00517       size_t numScoreComponent = Scan<size_t>(token[3]);
00518       string filePath= token[4];
00519 
00520       if(currDict==0) {
00521         // only the 1st pt. THis is shit
00522         // TODO. find what the assumptions made by confusion network about phrase table output which makes
00523         // it only work with binary file. This is a hack
00524         numScoreComponent += numInputScores + numRealWordsInInput;
00525       }
00526 
00527       ptLine << "num-features=" << numScoreComponent << " ";
00528       ptLine << "table-limit=" << maxTargetPhrase[currDict] << " ";
00529 
00530       if (implementation == SuffixArray) {
00531         ptLine << "target-path=" << token[5] << " ";
00532         ptLine << "alignment-path=" << token[6] << " ";
00533       }
00534 
00535       AddFeature(ptLine.str());
00536     } // for(size_t currDict = 0 ; currDict < translationVector.size(); currDict++) {
00537   } // if (GetParam("ttable-file").size() > 0) {
00538 
00539   m_setting.erase("weight-i");
00540   m_setting.erase(oldWeightName);
00541   m_setting.erase("ttable-file");
00542   m_setting.erase("ttable-limit");
00543 
00544 }
00545 
00546 void Parameter::AddFeature(const std::string &line)
00547 {
00548   PARAM_VEC &features = m_setting["feature"];
00549   features.push_back(line);
00550 }
00551 
00552 void Parameter::ConvertWeightArgsDistortion()
00553 {
00554   const string oldWeightName = "weight-d";
00555   const string oldLexReordingName = "distortion-file";
00556 
00557   // distortion / lex distortion
00558   const PARAM_VEC &oldWeights = GetParam(oldWeightName);
00559 
00560   if (oldWeights.size() > 0) {
00561     if (!isParamSpecified("search-algorithm") ||
00562         (GetParam("search-algorithm").size() > 0
00563          && (Trim(GetParam("search-algorithm")[0]) == "0"
00564              ||Trim(GetParam("search-algorithm")[0]) == "1"
00565             )
00566         )
00567        ) {
00568       // phrase-based. Add distance distortion to list of features
00569       AddFeature("Distortion");
00570       SetWeight("Distortion", 0, Scan<float>(oldWeights[0]));
00571     }
00572 
00573     // everything but the last is lex reordering model
00574 
00575     size_t currOldInd = 1;
00576     const PARAM_VEC &lextable = GetParam(oldLexReordingName);
00577 
00578     for (size_t indTable = 0; indTable < lextable.size(); ++indTable) {
00579       const string &line = lextable[indTable];
00580       vector<string> toks = Tokenize(line);
00581 
00582       size_t numFF = Scan<size_t>(toks[2]);
00583 
00584       vector<float> weights(numFF);
00585       for (size_t currFF = 0; currFF < numFF; ++currFF) {
00586         CHECK(currOldInd < oldWeights.size());
00587         float weight = Scan<float>(oldWeights[currOldInd]);
00588         weights[currFF] = weight;
00589 
00590         ++currOldInd;
00591       }
00592       SetWeight("LexicalReordering", indTable, weights);
00593 
00594       stringstream strme;
00595       strme << "LexicalReordering "
00596             << "type=" << toks[1] << " ";
00597 
00598       vector<FactorType> factors = Tokenize<FactorType>(toks[0], "-");
00599       CHECK(factors.size() == 2);
00600       strme << "input-factor=" << factors[0]
00601             << " output-factor=" << factors[1] << " ";
00602 
00603       strme << "num-features=" << toks[2] << " ";
00604       strme << "path=" << toks[3];
00605 
00606       AddFeature(strme.str());
00607     }
00608   }
00609 
00610   m_setting.erase(oldWeightName);
00611   m_setting.erase(oldLexReordingName);
00612 
00613 }
00614 
00615 void Parameter::ConvertWeightArgsLM()
00616 {
00617   const string oldWeightName = "weight-l";
00618   const string oldFeatureName = "lmodel-file";
00619 
00620   bool isChartDecoding = true;
00621   if (!isParamSpecified("search-algorithm") ||
00622       (GetParam("search-algorithm").size() > 0
00623        && (Trim(GetParam("search-algorithm")[0]) == "0"
00624            ||Trim(GetParam("search-algorithm")[0]) == "1"
00625           )
00626       )
00627      ) {
00628     isChartDecoding = false;
00629   }
00630 
00631   vector<int> oovWeights;
00632   if (isParamSpecified("lmodel-oov-feature")) {
00633     oovWeights = Scan<int>(m_setting["lmodel-oov-feature"]);
00634   }
00635 
00636   PARAM_MAP::iterator iterMap;
00637 
00638   iterMap = m_setting.find(oldWeightName);
00639   if (iterMap != m_setting.end()) {
00640 
00641     size_t currOldInd = 0;
00642     const PARAM_VEC &weights = iterMap->second;
00643     const PARAM_VEC &models = m_setting[oldFeatureName];
00644     for (size_t lmIndex = 0; lmIndex < models.size(); ++lmIndex) {
00645       const string &line = models[lmIndex];
00646       vector<string> modelToks = Tokenize(line);
00647 
00648       int lmType = Scan<int>(modelToks[0]);
00649 
00650       string newFeatureName;
00651       switch (lmType) {
00652       case 0:
00653         newFeatureName = "SRILM";
00654         break;
00655       case 1:
00656         newFeatureName = "IRSTLM";
00657         break;
00658       case 8:
00659       case 9:
00660         newFeatureName = "KENLM";
00661         break;
00662       default:
00663         abort();
00664       }
00665 
00666       size_t numFF = 1;
00667       if (oovWeights.size() > lmIndex)
00668         numFF += oovWeights[lmIndex];
00669 
00670       vector<float> weightsLM(numFF);
00671       for (size_t currFF = 0; currFF < numFF; ++currFF) {
00672         CHECK(currOldInd < weights.size());
00673         weightsLM[currFF] = Scan<float>(weights[currOldInd]);
00674         if (isChartDecoding) {
00675           weightsLM[currFF] = UntransformLMScore(weightsLM[currFF]);
00676         }
00677 
00678         ++currOldInd;
00679       }
00680 
00681       SetWeight(newFeatureName, lmIndex, weightsLM);
00682 
00683       string featureLine = newFeatureName + " "
00684                            + "factor=" + modelToks[1] + " "  // factor
00685                            + "order="  + modelToks[2] + " " // order
00686                            + "num-features=" + SPrint(numFF) + " ";
00687       if (lmType == 9) {
00688         featureLine += "lazyken=1 ";
00689       } else if (lmType == 8) {
00690         featureLine += "lazyken=0 ";
00691       }
00692 
00693       featureLine += "path=" + modelToks[3]; // file
00694 
00695       AddFeature(featureLine);
00696     } // for (size_t lmIndex = 0; lmIndex < models.size(); ++lmIndex) {
00697 
00698     m_setting.erase(iterMap);
00699   }
00700 
00701   m_setting.erase(oldFeatureName);
00702 }
00703 
00704 void Parameter::ConvertWeightArgsGeneration(const std::string &oldWeightName, const std::string &newWeightName)
00705 {
00706   string oldFeatureName = "generation-file";
00707 
00708   // distortion / lex distortion
00709   PARAM_VEC &oldWeights = m_setting[oldWeightName];
00710 
00711   if (oldWeights.size() > 0) {
00712     size_t currOldInd = 0;
00713     PARAM_VEC &models = m_setting[oldFeatureName];
00714 
00715     for (size_t indTable = 0; indTable < models.size(); ++indTable) {
00716       string &line = models[indTable];
00717       vector<string> modelToks = Tokenize(line);
00718 
00719       size_t numFF = Scan<size_t>(modelToks[2]);
00720 
00721       vector<float> weights(numFF);
00722       for (size_t currFF = 0; currFF < numFF; ++currFF) {
00723         CHECK(currOldInd < oldWeights.size());
00724         float weight = Scan<float>(oldWeights[currOldInd]);
00725         weights[currFF] = weight;
00726 
00727         ++currOldInd;
00728       }
00729       SetWeight(newWeightName, indTable, weights);
00730 
00731       stringstream strme;
00732       strme << "Generation "
00733             << "input-factor=" << modelToks[0] << " "
00734             << "output-factor=" << modelToks[1] << " "
00735             << "num-features=" << modelToks[2] << " "
00736             << "path=" << modelToks[3];
00737       AddFeature(strme.str());
00738     }
00739   }
00740 
00741   m_setting.erase(oldWeightName);
00742   m_setting.erase(oldFeatureName);
00743 }
00744 
00745 void Parameter::ConvertWeightArgsWordPenalty()
00746 {
00747   const std::string oldWeightName = "weight-w";
00748   const std::string newWeightName = "WordPenalty";
00749 
00750   bool isChartDecoding = true;
00751   if (!isParamSpecified("search-algorithm") ||
00752       (GetParam("search-algorithm").size() > 0
00753        && (Trim(GetParam("search-algorithm")[0]) == "0"
00754            ||Trim(GetParam("search-algorithm")[0]) == "1"
00755           )
00756       )
00757      ) {
00758     isChartDecoding = false;
00759   }
00760 
00761   PARAM_MAP::iterator iterMap;
00762 
00763   iterMap = m_setting.find(oldWeightName);
00764   if (iterMap != m_setting.end()) {
00765     const PARAM_VEC &weights = iterMap->second;
00766     for (size_t i = 0; i < weights.size(); ++i) {
00767       float weight = Scan<float>(weights[i]);
00768       if (isChartDecoding) {
00769         weight *= 0.434294482;
00770       }
00771       SetWeight(newWeightName, i, weight);
00772     }
00773 
00774     m_setting.erase(iterMap);
00775   }
00776 
00777 }
00778 
00779 void Parameter::ConvertWeightArgs()
00780 {
00781   // can't handle discr LM. must do it manually 'cos of bigram/n-gram split
00782   CHECK( m_setting.count("weight-dlm") == 0);
00783 
00784   // check that old & new format aren't mixed
00785   if (m_setting.count("weight") &&
00786       (m_setting.count("weight-i") || m_setting.count("weight-t") || m_setting.count("weight-w") ||
00787        m_setting.count("weight-l") || m_setting.count("weight-u") || m_setting.count("weight-lex") ||
00788        m_setting.count("weight-generation") || m_setting.count("weight-lr") || m_setting.count("weight-d")
00789       )) {
00790     cerr << "Do not mix old and new format for specify weights";
00791   }
00792 
00793   ConvertWeightArgsWordPenalty();
00794   ConvertWeightArgsLM();
00795   ConvertWeightArgsSingleWeight("weight-slm", "SyntacticLM");
00796   ConvertWeightArgsSingleWeight("weight-u", "UnknownWordPenalty");
00797   ConvertWeightArgsGeneration("weight-generation", "Generation");
00798   ConvertWeightArgsDistortion();
00799 
00800   // don't know or can't be bothered converting these weights
00801   ConvertWeightArgsSingleWeight("weight-lr", "LexicalReordering");
00802   ConvertWeightArgsSingleWeight("weight-bl", "BleuScoreFeature");
00803   ConvertWeightArgsSingleWeight("weight-glm", "GlobalLexicalModel");
00804   ConvertWeightArgsSingleWeight("weight-wt", "WordTranslationFeature");
00805   ConvertWeightArgsSingleWeight("weight-pp", "PhrasePairFeature");
00806   ConvertWeightArgsSingleWeight("weight-pb", "PhraseBoundaryFeature");
00807 
00808   ConvertWeightArgsSingleWeight("weight-e", "WordDeletion"); // TODO Can't find real name
00809   ConvertWeightArgsSingleWeight("weight-lex", "GlobalLexicalReordering"); // TODO Can't find real name
00810 
00811   AddFeature("WordPenalty");
00812   AddFeature("UnknownWordPenalty");
00813 
00814   ConvertWeightArgsPhraseModel("weight-t");
00815 
00816 }
00817 
00818 void Parameter::CreateWeightsMap()
00819 {
00820   PARAM_VEC &vec = m_setting["weight"];
00821   for (size_t i = 0; i < vec.size(); ++i) {
00822     const string &line = vec[i];
00823     vector<string> toks = Tokenize(line);
00824     CHECK(toks.size() >= 2);
00825 
00826     string name = toks[0];
00827     name = name.substr(0, name.size() - 1);
00828 
00829     vector<float> weights(toks.size() - 1);
00830     for (size_t i = 1; i < toks.size(); ++i) {
00831       float weight = Scan<float>(toks[i]);
00832       weights[i - 1] = weight;
00833     }
00834     m_weights[name] = weights;
00835   }
00836 
00837 }
00838 
00839 void Parameter::WeightOverwrite()
00840 {
00841   PARAM_VEC &vec = m_setting["weight-overwrite"];
00842 
00843   if (vec.size() == 0)
00844     return;
00845 
00846   // should only be 1 line
00847   CHECK(vec.size() == 1);
00848 
00849   string name("");
00850   vector<float> weights;
00851   vector<string> toks = Tokenize(vec[0]);
00852   for (size_t i = 0; i < toks.size(); ++i) {
00853     const string &tok = toks[i];
00854 
00855     if (tok.substr(tok.size() - 1, 1) == "=") {
00856       // start of new feature
00857 
00858       if (name != "") {
00859         // save previous ff
00860         m_weights[name] = weights;
00861         weights.clear();
00862       }
00863 
00864       name = tok.substr(0, tok.size() - 1);
00865     } else {
00866       // a weight for curr ff
00867       float weight = Scan<float>(toks[i]);
00868       weights.push_back(weight);
00869     }
00870   }
00871 
00872   m_weights[name] = weights;
00873 
00874 }
00875 
00877 bool Parameter::Validate()
00878 {
00879   bool noErrorFlag = true;
00880 
00881   PARAM_MAP::const_iterator iterParams;
00882   for (iterParams = m_setting.begin(); iterParams != m_setting.end(); ++iterParams) {
00883     const std::string &key = iterParams->first;
00884 
00885     if (m_valid.find(key) == m_valid.end()) {
00886       UserMessage::Add("Unknown parameter " + key);
00887       noErrorFlag = false;
00888     }
00889   }
00890 
00891   if (m_setting["lmodel-dub"].size() > 0) {
00892     if (m_setting["lmodel-file"].size() != m_setting["lmodel-dub"].size()) {
00893       stringstream errorMsg("");
00894       errorMsg << "Config and parameters specify "
00895                << static_cast<int>(m_setting["lmodel-file"].size())
00896                << " language model files (lmodel-file), but "
00897                << static_cast<int>(m_setting["lmodel-dub"].size())
00898                << " LM upperbounds (lmodel-dub)"
00899                << endl;
00900       UserMessage::Add(errorMsg.str());
00901       noErrorFlag = false;
00902     }
00903   }
00904 
00905   /*
00906   const vector<float> &lmWeights = GetWeights("LM");
00907   if (m_setting["lmodel-file"].size() * (m_setting.find("lmodel-oov-feature") != m_setting.end() ? 2 : 1)
00908          != lmWeights.size()) {
00909     stringstream errorMsg("");
00910     errorMsg << "Config and parameters specify "
00911              << static_cast<int>(m_setting["lmodel-file"].size())
00912              << " language model files (lmodel-file), but "
00913              << static_cast<int>(lmWeights.size())
00914              << " weights (weight-l)";
00915     errorMsg << endl << "You might be giving '-lmodel-file TYPE FACTOR ORDER FILENAME' but you should be giving these four as a single argument, i.e. '-lmodel-file \"TYPE FACTOR ORDER FILENAME\"'";
00916     errorMsg << endl << "You should also remember that each language model requires 2 weights, if and only if lmodel-oov-feature is on.";
00917     UserMessage::Add(errorMsg.str());
00918     noErrorFlag = false;
00919   }
00920   */
00921 
00922   // do files exist?
00923 
00924   // input file
00925   if (noErrorFlag && m_setting["input-file"].size() == 1) {
00926     noErrorFlag = FileExists(m_setting["input-file"][0]);
00927     if (!noErrorFlag) {
00928       stringstream errorMsg("");
00929       errorMsg << endl << "Input file " << m_setting["input-file"][0] << " does not exist";
00930       UserMessage::Add(errorMsg.str());
00931     }
00932   }
00933   // generation tables
00934   if (noErrorFlag) {
00935     std::vector<std::string> ext;
00936     //raw tables in either un compressed or compressed form
00937     ext.push_back("");
00938     ext.push_back(".gz");
00939     noErrorFlag = FilesExist("generation-file", 3, ext);
00940   }
00941   // distortion
00942   if (noErrorFlag) {
00943     std::vector<std::string> ext;
00944     //raw tables in either un compressed or compressed form
00945     ext.push_back("");
00946     ext.push_back(".gz");
00947     //prefix tree format
00948     ext.push_back(".binlexr.idx");
00949     //prefix tree format
00950     ext.push_back(".minlexr");
00951     noErrorFlag = FilesExist("distortion-file", 3, ext);
00952   }
00953   return noErrorFlag;
00954 }
00955 
00957 bool Parameter::FilesExist(const string &paramName, int fieldNo, std::vector<std::string> const& extensions)
00958 {
00959   typedef std::vector<std::string> StringVec;
00960   StringVec::const_iterator iter;
00961 
00962   PARAM_MAP::const_iterator iterParam = m_setting.find(paramName);
00963   if (iterParam == m_setting.end()) {
00964     // no param. therefore nothing to check
00965     return true;
00966   }
00967   const StringVec &pathVec = (*iterParam).second;
00968   for (iter = pathVec.begin() ; iter != pathVec.end() ; ++iter) {
00969     StringVec vec = Tokenize(*iter);
00970 
00971     size_t tokenizeIndex;
00972     if (fieldNo == -1)
00973       tokenizeIndex = vec.size() - 1;
00974     else
00975       tokenizeIndex = static_cast<size_t>(fieldNo);
00976 
00977     if (tokenizeIndex >= vec.size()) {
00978       stringstream errorMsg("");
00979       errorMsg << "Expected at least " << (tokenizeIndex+1) << " tokens per entry in '"
00980                << paramName << "', but only found "
00981                << vec.size();
00982       UserMessage::Add(errorMsg.str());
00983       return false;
00984     }
00985     const string &pathStr = vec[tokenizeIndex];
00986 
00987     bool fileFound=0;
00988     for(size_t i=0; i<extensions.size() && !fileFound; ++i) {
00989       fileFound|=FileExists(pathStr + extensions[i]);
00990     }
00991     if(!fileFound) {
00992       stringstream errorMsg("");
00993       errorMsg << "File " << pathStr << " does not exist";
00994       UserMessage::Add(errorMsg.str());
00995       return false;
00996     }
00997   }
00998   return true;
00999 }
01000 
01002 // TODO arg parsing like this does not belong in the library, it belongs
01003 // in moses-cmd
01004 string Parameter::FindParam(const string &paramSwitch, int argc, char* argv[])
01005 {
01006   for (int i = 0 ; i < argc ; i++) {
01007     if (string(argv[i]) == paramSwitch) {
01008       if (i+1 < argc) {
01009         return argv[i+1];
01010       } else {
01011         stringstream errorMsg("");
01012         errorMsg << "Option " << paramSwitch << " requires a parameter!";
01013         UserMessage::Add(errorMsg.str());
01014         // TODO return some sort of error, not the empty string
01015       }
01016     }
01017   }
01018   return "";
01019 }
01020 
01026 void Parameter::OverwriteParam(const string &paramSwitch, const string &paramName, int argc, char* argv[])
01027 {
01028   int startPos = -1;
01029   for (int i = 0 ; i < argc ; i++) {
01030     if (string(argv[i]) == paramSwitch) {
01031       startPos = i+1;
01032       break;
01033     }
01034   }
01035   if (startPos < 0)
01036     return;
01037 
01038   int index = 0;
01039   m_setting[paramName]; // defines the parameter, important for boolean switches
01040   while (startPos < argc && (!isOption(argv[startPos]))) {
01041     if (m_setting[paramName].size() > (size_t)index)
01042       m_setting[paramName][index] = argv[startPos];
01043     else
01044       m_setting[paramName].push_back(argv[startPos]);
01045     index++;
01046     startPos++;
01047   }
01048 }
01049 
01050 
01052 bool Parameter::ReadConfigFile(const string &filePath )
01053 {
01054   InputFileStream inFile(filePath);
01055   string line, paramName;
01056   while(getline(inFile, line)) {
01057     // comments
01058     size_t comPos = line.find_first_of("#");
01059     if (comPos != string::npos)
01060       line = line.substr(0, comPos);
01061     // trim leading and trailing spaces/tabs
01062     line = Trim(line);
01063 
01064     if (line.size() == 0) {
01065       // blank line. do nothing.
01066     } else if (line[0]=='[') {
01067       // new parameter
01068       for (size_t currPos = 0 ; currPos < line.size() ; currPos++) {
01069         if (line[currPos] == ']') {
01070           paramName = line.substr(1, currPos - 1);
01071           break;
01072         }
01073       }
01074     } else {
01075       // add value to parameter
01076       m_setting[paramName].push_back(line);
01077     }
01078   }
01079   return true;
01080 }
01081 
01082 struct Credit {
01083   string name, contact, currentPursuits, areaResponsibility;
01084   int sortId;
01085 
01086   Credit(string name, string contact, string currentPursuits, string areaResponsibility) {
01087     this->name                                                          = name                                                  ;
01088     this->contact                                                       = contact                                               ;
01089     this->currentPursuits                       = currentPursuits               ;
01090     this->areaResponsibility    = areaResponsibility;
01091     this->sortId                                                        = rand() % 1000;
01092   }
01093 
01094   bool operator<(const Credit &other) const {
01095     /*
01096     if (areaResponsibility.size() != 0 && other.areaResponsibility.size() ==0)
01097         return true;
01098     if (areaResponsibility.size() == 0 && other.areaResponsibility.size() !=0)
01099         return false;
01100 
01101     return name < other.name;
01102     */
01103     return sortId < other.sortId;
01104   }
01105 
01106 };
01107 
01108 std::ostream& operator<<(std::ostream &os, const Credit &credit)
01109 {
01110   os << credit.name;
01111   if (credit.contact != "")
01112     os << "\t   contact: " << credit.contact;
01113   if (credit.currentPursuits != "")
01114     os << "   " << credit.currentPursuits;
01115   if (credit.areaResponsibility != "")
01116     os << "   I'll answer question on: " << credit.areaResponsibility;
01117   return os;
01118 }
01119 
01120 void Parameter::PrintCredit()
01121 {
01122   vector<Credit> everyone;
01123   srand ( time(NULL) );
01124 
01125   everyone.push_back(Credit("Nicola Bertoldi"
01126                             , "911"
01127                             , ""
01128                             , "scripts & other stuff"));
01129   everyone.push_back(Credit("Ondrej Bojar"
01130                             , ""
01131                             , "czech this out!"
01132                             , ""));
01133   everyone.push_back(Credit("Chris Callison-Burch"
01134                             , "anytime, anywhere"
01135                             , "international playboy"
01136                             , ""));
01137   everyone.push_back(Credit("Alexandra Constantin"
01138                             , ""
01139                             , "eu sunt varza"
01140                             , ""));
01141   everyone.push_back(Credit("Brooke Cowan"
01142                             , "brooke@csail.mit.edu"
01143                             , "if you're going to san francisco, be sure to wear a flower in your hair"
01144                             , ""));
01145   everyone.push_back(Credit("Chris Dyer"
01146                             , "can't. i'll be out driving my mustang"
01147                             , "driving my mustang"
01148                             , ""));
01149   everyone.push_back(Credit("Marcello Federico"
01150                             , "federico at itc at it"
01151                             , "Researcher at ITC-irst, Trento, Italy"
01152                             , "IRST language model"));
01153   everyone.push_back(Credit("Evan Herbst"
01154                             , "Small college in upstate New York"
01155                             , ""
01156                             , ""));
01157   everyone.push_back(Credit("Philipp Koehn"
01158                             , "only between 2 and 4am"
01159                             , ""
01160                             , "Nothing fazes this dude"));
01161   everyone.push_back(Credit("Christine Moran"
01162                             , "weird building at MIT"
01163                             , ""
01164                             , ""));
01165   everyone.push_back(Credit("Wade Shen"
01166                             , "via morse code"
01167                             , "buying another laptop"
01168                             , ""));
01169   everyone.push_back(Credit("Richard Zens"
01170                             , "richard at aachen dot de"
01171                             , ""
01172                             , "ambiguous source input, confusion networks, confusing source code"));
01173   everyone.push_back(Credit("Hieu Hoang", "http://www.hoang.co.uk/hieu/"
01174                             , "phd student at Edinburgh Uni. Original Moses developer"
01175                             , "general queries/ flames on Moses."));
01176 
01177   sort(everyone.begin(), everyone.end());
01178 
01179 
01180   cerr <<  "Moses - A beam search decoder for phrase-based statistical machine translation models" << endl
01181        << "Copyright (C) 2006 University of Edinburgh" << endl << endl
01182 
01183        << "This library is free software; you can redistribute it and/or" << endl
01184        << "modify it under the terms of the GNU Lesser General Public" << endl
01185        << "License as published by the Free Software Foundation; either" << endl
01186        << "version 2.1 of the License, or (at your option) any later version." << endl << endl
01187 
01188        << "This library is distributed in the hope that it will be useful," << endl
01189        << "but WITHOUT ANY WARRANTY; without even the implied warranty of" << endl
01190        << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU" << endl
01191        << "Lesser General Public License for more details." << endl << endl
01192 
01193        << "You should have received a copy of the GNU Lesser General Public" << endl
01194        << "License along with this library; if not, write to the Free Software" << endl
01195        << "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA" << endl << endl
01196        << "***********************************************************************" << endl << endl
01197        << "Built on " << __DATE__ << " at " __TIME__ << endl << endl
01198        << "WHO'S FAULT IS THIS GODDAM SOFTWARE:" << endl;
01199 
01200   ostream_iterator<Credit> out(cerr, "\n");
01201   copy(everyone.begin(), everyone.end(), out);
01202   cerr <<  endl << endl;
01203 }
01204 
01208 void Parameter::OverwriteParam(const string &paramName, PARAM_VEC values)
01209 {
01210   VERBOSE(2,"Overwriting parameter " << paramName);
01211 
01212   m_setting[paramName]; // defines the parameter, important for boolean switches
01213   if (m_setting[paramName].size() > 1) {
01214     VERBOSE(2," (the parameter had " << m_setting[paramName].size() << " previous values)");
01215     CHECK(m_setting[paramName].size() == values.size());
01216   } else {
01217     VERBOSE(2," (the parameter does not have previous values)");
01218     m_setting[paramName].resize(values.size());
01219   }
01220   VERBOSE(2," with the following values:");
01221   int i=0;
01222   for (PARAM_VEC::iterator iter = values.begin(); iter != values.end() ; iter++, i++) {
01223     m_setting[paramName][i] = *iter;
01224     VERBOSE(2, " " << *iter);
01225   }
01226   VERBOSE(2, std::endl);
01227 }
01228 
01229 std::set<std::string> Parameter::GetWeightNames() const
01230 {
01231   std::set<std::string> ret;
01232   std::map<std::string, std::vector<float> >::const_iterator iter;
01233   for (iter = m_weights.begin(); iter != m_weights.end(); ++iter) {
01234     const string &key = iter->first;
01235     ret.insert(key);
01236   }
01237   return ret;
01238 }
01239 
01240 void Parameter::Save(const std::string path)
01241 {
01242   ofstream file;
01243   file.open(path.c_str());
01244 
01245   PARAM_MAP::const_iterator iterOuter;
01246   for (iterOuter = m_setting.begin(); iterOuter != m_setting.end(); ++iterOuter) {
01247     const std::string &sectionName = iterOuter->first;
01248     file << "[" << sectionName << "]" << endl;
01249 
01250     const PARAM_VEC &values = iterOuter->second;
01251 
01252     PARAM_VEC::const_iterator iterInner;
01253     for (iterInner = values.begin(); iterInner != values.end(); ++iterInner) {
01254       const std::string &value = *iterInner;
01255       file << value << endl;
01256     }
01257 
01258     file << endl;
01259   }
01260 
01261 
01262   file.close();
01263 }
01264 
01265 }
01266 
01267