00001 #include "M2Scorer.h"
00002
00003 #include <algorithm>
00004 #include <fstream>
00005 #include <stdexcept>
00006 #include <sstream>
00007 #include <cstdlib>
00008
00009 #include <boost/lexical_cast.hpp>
00010
00011
00012 using namespace std;
00013
00014 namespace MosesTuning
00015 {
00016
00017 M2Scorer::M2Scorer(const string& config)
00018 : StatisticsBasedScorer("M2Scorer", config),
00019 beta_(Scan<float>(getConfig("beta", "0.5"))),
00020 max_unchanged_words_(Scan<int>(getConfig("max_unchanged_words", "2"))),
00021 truecase_(Scan<bool>(getConfig("truecase", "false"))),
00022 verbose_(Scan<bool>(getConfig("verbose", "false"))),
00023 m2_(max_unchanged_words_, beta_, truecase_)
00024 {}
00025
00026 void M2Scorer::setReferenceFiles(const vector<string>& referenceFiles)
00027 {
00028 for(size_t i = 0; i < referenceFiles.size(); ++i) {
00029 m2_.ReadM2(referenceFiles[i]);
00030 break;
00031 }
00032 }
00033
00034 void M2Scorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
00035 {
00036 string sentence = trimStr(this->preprocessSentence(text));
00037 std::vector<ScoreStatsType> stats(4, 0);
00038 m2_.SufStats(sentence, sid, stats);
00039 entry.set(stats);
00040 }
00041
00042 float M2Scorer::calculateScore(const vector<ScoreStatsType>& comps) const
00043 {
00044
00045 if (comps.size() != NumberOfScores()) {
00046 throw runtime_error("Size of stat vector for M2Scorer is not " + NumberOfScores());
00047 }
00048
00049 float beta = beta_;
00050
00051
00052 float p = 0.0;
00053 float r = 0.0;
00054 float f = 0.0;
00055
00056 if(comps[1] != 0)
00057 p = comps[0] / (double)comps[1];
00058 else
00059 p = 1.0;
00060
00061 if(comps[2] != 0)
00062 r = comps[0] / (double)comps[2];
00063 else
00064 r = 1.0;
00065
00066 float denom = beta * beta * p + r;
00067 if(denom != 0)
00068 f = (1.0 + beta * beta) * p * r / denom;
00069 else
00070 f = 0.0;
00071
00072 if(verbose_)
00073 std::cerr << comps[0] << " " << comps[1] << " " << comps[2] << std::endl;
00074
00075 if(verbose_)
00076 std::cerr << p << " " << r << " " << f << std::endl;
00077
00078 return f;
00079 }
00080
00081 float M2Scorer::getReferenceLength(const vector<ScoreStatsType>& comps) const
00082 {
00083 return comps[3];
00084 }
00085
00086 std::vector<ScoreStatsType> randomStats(float decay, int max)
00087 {
00088 int gold = rand() % max;
00089 int prop = rand() % max;
00090 int corr = 0.0;
00091
00092 if(std::min(prop, gold) > 0)
00093 corr = rand() % std::min(prop, gold);
00094
00095
00096
00097 std::vector<ScoreStatsType> stats(3, 0.0);
00098 stats[0] = corr * decay;
00099 stats[1] = prop * decay;
00100 stats[2] = gold * decay;
00101
00102 return stats;
00103 }
00104
00105 float sentenceM2(const std::vector<ScoreStatsType>& stats)
00106 {
00107 float beta = 0.5;
00108
00109 std::vector<ScoreStatsType> smoothStats(3, 0.0);
00110 smoothStats[0] += stats[0];
00111 smoothStats[1] += stats[1];
00112 smoothStats[2] += stats[2];
00113
00114 float p = 0.0;
00115 float r = 0.0;
00116 float f = 0.0;
00117
00118 if(smoothStats[1] != 0)
00119 p = smoothStats[0] / smoothStats[1];
00120 else
00121 p = 1.0;
00122
00123 if(smoothStats[2] != 0)
00124 r = smoothStats[0] / smoothStats[2];
00125 else
00126 r = 1.0;
00127
00128 float denom = beta * beta * p + r;
00129 if(denom != 0)
00130 f = (1.0 + beta * beta) * p * r / denom;
00131 else
00132 f = 0.0;
00133
00134 return f;
00135 }
00136
00137 }