00001
00002 #include <vector>
00003 #include <boost/algorithm/string/predicate.hpp>
00004 #include <boost/foreach.hpp>
00005 #include <boost/format.hpp>
00006 #include "util/exception.hh"
00007 #include "util/string_stream.hh"
00008 #include "ScoreComponentCollection.h"
00009 #include "StaticData.h"
00010 #include "moses/FF/StatelessFeatureFunction.h"
00011 #include "moses/FF/StatefulFeatureFunction.h"
00012
00013 using namespace std;
00014 using namespace boost::algorithm;
00015
00016 namespace Moses
00017 {
00018 void ScorePair::PlusEquals(const ScorePair &other)
00019 {
00020 PlusEquals(other.denseScores);
00021 std::map<StringPiece, float>::const_iterator iter;
00022 for (iter = other.sparseScores.begin(); iter != other.sparseScores.end(); ++iter) {
00023 PlusEquals(iter->first, iter->second);
00024 }
00025 }
00026
00027 void ScorePair::PlusEquals(const StringPiece &key, float value)
00028 {
00029 std::map<StringPiece, float>::iterator iter;
00030 iter = sparseScores.find(key);
00031 if (iter == sparseScores.end()) {
00032 sparseScores[key] = value;
00033 } else {
00034 float &existingval = iter->second;
00035 existingval += value;
00036 }
00037 }
00038
00039 std::ostream& operator<<(std::ostream& os, const ScorePair& rhs)
00040 {
00041 for (size_t i = 0; i < rhs.denseScores.size(); ++i) {
00042 os << rhs.denseScores[i] << ",";
00043 }
00044
00045 std::map<StringPiece, float>::const_iterator iter;
00046 for (iter = rhs.sparseScores.begin(); iter != rhs.sparseScores.end(); ++iter) {
00047 os << iter->first << "=" << iter->second << ",";
00048 }
00049
00050 return os;
00051 }
00052
00053
00054 size_t ScoreComponentCollection::s_denseVectorSize = 0;
00055
00056 ScoreComponentCollection::
00057 ScoreComponentCollection()
00058 : m_scores(s_denseVectorSize)
00059 {}
00060
00061
00062 void
00063 ScoreComponentCollection::
00064 RegisterScoreProducer(FeatureFunction* scoreProducer)
00065 {
00066 size_t start = s_denseVectorSize;
00067 s_denseVectorSize = scoreProducer->SetIndex(s_denseVectorSize);
00068 VERBOSE(1, "FeatureFunction: "
00069 << scoreProducer->GetScoreProducerDescription()
00070 << " start: " << start
00071 << " end: " << (s_denseVectorSize-1) << endl);
00072 }
00073
00074
00075 float
00076 ScoreComponentCollection::
00077 GetWeightedScore() const
00078 {
00079 return m_scores.inner_product(StaticData::Instance().GetAllWeights().m_scores);
00080 }
00081
00082 void ScoreComponentCollection::MultiplyEquals(float scalar)
00083 {
00084 m_scores *= scalar;
00085 }
00086
00087
00088 void ScoreComponentCollection::MultiplyEquals(const FeatureFunction* sp, float scalar)
00089 {
00090 std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
00091 for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
00092 const std::string &name = i->first.name();
00093 if (starts_with(name, prefix))
00094 m_scores[i->first] = i->second * scalar;
00095 }
00096 }
00097
00098
00099 size_t ScoreComponentCollection::GetNumberWeights(const FeatureFunction* sp)
00100 {
00101 std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
00102 size_t weights = 0;
00103 for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
00104 const std::string &name = i->first.name();
00105 if (starts_with(name, prefix))
00106 weights++;
00107 }
00108 return weights;
00109 }
00110
00111 void ScoreComponentCollection::DivideEquals(float scalar)
00112 {
00113 m_scores /= scalar;
00114 }
00115
00116 void ScoreComponentCollection::CoreDivideEquals(float scalar)
00117 {
00118 m_scores.coreDivideEquals(scalar);
00119 }
00120
00121 void ScoreComponentCollection::DivideEquals(const ScoreComponentCollection& rhs)
00122 {
00123 m_scores.divideEquals(rhs.m_scores);
00124 }
00125
00126 void ScoreComponentCollection::MultiplyEquals(const ScoreComponentCollection& rhs)
00127 {
00128 m_scores *= rhs.m_scores;
00129 }
00130
00131 void ScoreComponentCollection::MultiplyEqualsBackoff(const ScoreComponentCollection& rhs, float backoff)
00132 {
00133 m_scores.multiplyEqualsBackoff(rhs.m_scores, backoff);
00134 }
00135
00136 void ScoreComponentCollection::MultiplyEquals(float core_r0, float sparse_r0)
00137 {
00138 m_scores.multiplyEquals(core_r0, sparse_r0);
00139 }
00140
00141 std::ostream& operator<<(std::ostream& os, const ScoreComponentCollection& rhs)
00142 {
00143 os << rhs.m_scores;
00144 return os;
00145 }
00146 void ScoreComponentCollection::L1Normalise()
00147 {
00148 m_scores /= m_scores.l1norm_coreFeatures();
00149 }
00150
00151 float ScoreComponentCollection::GetL1Norm() const
00152 {
00153 return m_scores.l1norm();
00154 }
00155
00156 float ScoreComponentCollection::GetL2Norm() const
00157 {
00158 return m_scores.l2norm();
00159 }
00160
00161 float ScoreComponentCollection::GetLInfNorm() const
00162 {
00163 return m_scores.linfnorm();
00164 }
00165
00166 size_t ScoreComponentCollection::L1Regularize(float lambda)
00167 {
00168 return m_scores.l1regularize(lambda);
00169 }
00170
00171 void ScoreComponentCollection::L2Regularize(float lambda)
00172 {
00173 m_scores.l2regularize(lambda);
00174 }
00175
00176 size_t ScoreComponentCollection::SparseL1Regularize(float lambda)
00177 {
00178 return m_scores.sparseL1regularize(lambda);
00179 }
00180
00181 void ScoreComponentCollection::SparseL2Regularize(float lambda)
00182 {
00183 m_scores.sparseL2regularize(lambda);
00184 }
00185
00186 void ScoreComponentCollection::Save(ostream& out, bool multiline) const
00187 {
00188 string sep = " ";
00189 string linesep = "\n";
00190 if (!multiline) {
00191 sep = "=";
00192 linesep = " ";
00193 }
00194
00195 std::vector<FeatureFunction*> const& all_ff
00196 = FeatureFunction::GetFeatureFunctions();
00197 BOOST_FOREACH(FeatureFunction const* ff, all_ff) {
00198 string name = ff->GetScoreProducerDescription();
00199 size_t i = ff->GetIndex();
00200 if (ff->GetNumScoreComponents() == 1)
00201 out << name << sep << m_scores[i] << linesep;
00202 else {
00203 size_t stop = i + ff->GetNumScoreComponents();
00204 boost::format fmt("%s_%d");
00205 for (size_t k = 1; i < stop; ++i, ++k)
00206 out << fmt % name % k << sep << m_scores[i] << linesep;
00207 }
00208 }
00209
00210 m_scores.write(out,sep,linesep);
00211 }
00212
00213 void ScoreComponentCollection::Save(const string& filename) const
00214 {
00215 ofstream out(filename.c_str());
00216 if (!out) {
00217 util::StringStream msg;
00218 msg << "Unable to open " << filename;
00219 throw runtime_error(msg.str());
00220 }
00221 Save(out);
00222 out.close();
00223 }
00224
00225 void
00226 ScoreComponentCollection::
00227 Assign(const FeatureFunction* sp, const string &line)
00228 {
00229 istringstream istr(line);
00230 while(istr) {
00231 string namestring;
00232 FValue value;
00233 istr >> namestring;
00234 if (!istr) break;
00235 istr >> value;
00236 FName fname(sp->GetScoreProducerDescription(), namestring);
00237 m_scores[fname] = value;
00238 }
00239 }
00240
00241 void
00242 ScoreComponentCollection::
00243 Assign(const FeatureFunction* sp, const std::vector<float>& scores)
00244 {
00245 size_t numScores = sp->GetNumScoreComponents();
00246 size_t offset = sp->GetIndex();
00247
00248 if (scores.size() != numScores) {
00249 UTIL_THROW(util::Exception, "Feature function "
00250 << sp->GetScoreProducerDescription() << " specified "
00251 << numScores << " dense scores or weights. Actually has "
00252 << scores.size());
00253 }
00254
00255 for (size_t i = 0; i < scores.size(); ++i) {
00256 m_scores[i + offset] = scores[i];
00257 }
00258 }
00259
00260
00261 void ScoreComponentCollection::InvertDenseFeatures(const FeatureFunction* sp)
00262 {
00263
00264 Scores old_scores = GetScoresForProducer(sp);
00265 Scores new_scores(old_scores.size());
00266
00267 for (size_t i = 0; i != old_scores.size(); ++i) {
00268 new_scores[i] = -old_scores[i];
00269 }
00270
00271 Assign(sp, new_scores);
00272 }
00273
00274 void ScoreComponentCollection::ZeroDenseFeatures(const FeatureFunction* sp)
00275 {
00276 size_t numScores = sp->GetNumScoreComponents();
00277 Scores vec(numScores, 0);
00278
00279 Assign(sp, vec);
00280 }
00281
00283 FVector
00284 ScoreComponentCollection::
00285 GetVectorForProducer(const FeatureFunction* sp) const
00286 {
00287 FVector fv(s_denseVectorSize);
00288 std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
00289 for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
00290 std::stringstream name;
00291 name << i->first;
00292 if (starts_with(name.str(), prefix))
00293 fv[i->first] = i->second;
00294 }
00295 return fv;
00296 }
00297
00298 void ScoreComponentCollection::PlusEquals(const FeatureFunction* sp, const ScorePair &scorePair)
00299 {
00300 PlusEquals(sp, scorePair.denseScores);
00301
00302 std::map<StringPiece, float>::const_iterator iter;
00303 for (iter = scorePair.sparseScores.begin(); iter != scorePair.sparseScores.end(); ++iter) {
00304 const StringPiece &key = iter->first;
00305 float value = iter->second;
00306 PlusEquals(sp, key, value);
00307 }
00308 }
00309
00310 void
00311 ScoreComponentCollection::
00312 OutputAllFeatureScores(std::ostream &out, bool with_labels) const
00313 {
00314 std::string lastName = "";
00315 const vector<const StatefulFeatureFunction*>& sff
00316 = StatefulFeatureFunction::GetStatefulFeatureFunctions();
00317 for( size_t i=0; i<sff.size(); i++ ) {
00318 const StatefulFeatureFunction *ff = sff[i];
00319 if (ff->IsTuneable()) {
00320 OutputFeatureScores(out, ff, lastName, with_labels);
00321 }
00322 }
00323 const vector<const StatelessFeatureFunction*>& slf
00324 = StatelessFeatureFunction::GetStatelessFeatureFunctions();
00325 for( size_t i=0; i<slf.size(); i++ ) {
00326 const StatelessFeatureFunction *ff = slf[i];
00327 if (ff->IsTuneable()) {
00328 OutputFeatureScores(out, ff, lastName, with_labels);
00329 }
00330 }
00331 }
00332
00333 void
00334 ScoreComponentCollection::
00335 OutputFeatureScores(std::ostream& out, FeatureFunction const* ff,
00336 std::string &lastName, bool with_labels) const
00337 {
00338
00339
00340
00341
00342 if (ff->HasTuneableComponents()) {
00343 if( with_labels && lastName != ff->GetScoreProducerDescription() ) {
00344 lastName = ff->GetScoreProducerDescription();
00345 out << " " << lastName << "=";
00346 }
00347 vector<float> scores = GetScoresForProducer( ff );
00348 for (size_t j = 0; j<scores.size(); ++j) {
00349 if (ff->IsTuneableComponent(j)) {
00350 out << " " << scores[j];
00351 }
00352 }
00353 }
00354
00355
00356 const FVector scores = GetVectorForProducer( ff );
00357 for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
00358 out << " " << i->first << "= " << i->second;
00359 }
00360 }
00361
00362 }
00363
00364