00001
00002 #include <vector>
00003
00004 #include "ScoreComponentCollection.h"
00005 #include "StaticData.h"
00006
00007 using namespace std;
00008
00009 namespace Moses
00010 {
00011
00012 ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
00013 size_t ScoreComponentCollection::s_denseVectorSize = 0;
00014
00015 ScoreComponentCollection::ScoreComponentCollection() : m_scores(s_denseVectorSize)
00016 {}
00017
00018
00019 void ScoreComponentCollection::RegisterScoreProducer
00020 (const FeatureFunction* scoreProducer)
00021 {
00022 size_t start = s_denseVectorSize;
00023 size_t end = start + scoreProducer->GetNumScoreComponents();
00024 VERBOSE(1, "FeatureFunction: " << scoreProducer->GetScoreProducerDescription() << " start: " << start << " end: " << end << endl);
00025 s_scoreIndexes[scoreProducer] = pair<size_t,size_t>(start,end);
00026 s_denseVectorSize = end;
00027 }
00028
00029
00030 float ScoreComponentCollection::GetWeightedScore() const
00031 {
00032 return m_scores.inner_product(StaticData::Instance().GetAllWeights().m_scores);
00033 }
00034
00035 void ScoreComponentCollection::MultiplyEquals(float scalar)
00036 {
00037 m_scores *= scalar;
00038 }
00039
00040
00041 void ScoreComponentCollection::MultiplyEquals(const FeatureFunction* sp, float scalar) {
00042 std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
00043 for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
00044 std::stringstream name;
00045 name << i->first;
00046 if (name.str().substr( 0, prefix.length() ).compare( prefix ) == 0)
00047 m_scores[i->first] = i->second * scalar;
00048 }
00049 }
00050
00051
00052 size_t ScoreComponentCollection::GetNumberWeights(const FeatureFunction* sp) {
00053 std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
00054 size_t weights = 0;
00055 for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
00056 std::stringstream name;
00057 name << i->first;
00058 if (name.str().substr( 0, prefix.length() ).compare( prefix ) == 0)
00059 weights++;
00060 }
00061 return weights;
00062 }
00063
00064 void ScoreComponentCollection::DivideEquals(float scalar)
00065 {
00066 m_scores /= scalar;
00067 }
00068
00069 void ScoreComponentCollection::CoreDivideEquals(float scalar)
00070 {
00071 m_scores.coreDivideEquals(scalar);
00072 }
00073
00074 void ScoreComponentCollection::DivideEquals(const ScoreComponentCollection& rhs)
00075 {
00076 m_scores.divideEquals(rhs.m_scores);
00077 }
00078
00079 void ScoreComponentCollection::MultiplyEquals(const ScoreComponentCollection& rhs)
00080 {
00081 m_scores *= rhs.m_scores;
00082 }
00083
00084 void ScoreComponentCollection::MultiplyEqualsBackoff(const ScoreComponentCollection& rhs, float backoff)
00085 {
00086 m_scores.multiplyEqualsBackoff(rhs.m_scores, backoff);
00087 }
00088
00089 void ScoreComponentCollection::MultiplyEquals(float core_r0, float sparse_r0)
00090 {
00091 m_scores.multiplyEquals(core_r0, sparse_r0);
00092 }
00093
00094 std::ostream& operator<<(std::ostream& os, const ScoreComponentCollection& rhs)
00095 {
00096 os << rhs.m_scores;
00097 return os;
00098 }
00099 void ScoreComponentCollection::L1Normalise() {
00100 m_scores /= m_scores.l1norm_coreFeatures();
00101 }
00102
00103 float ScoreComponentCollection::GetL1Norm() const {
00104 return m_scores.l1norm();
00105 }
00106
00107 float ScoreComponentCollection::GetL2Norm() const {
00108 return m_scores.l2norm();
00109 }
00110
00111 float ScoreComponentCollection::GetLInfNorm() const {
00112 return m_scores.linfnorm();
00113 }
00114
00115 size_t ScoreComponentCollection::L1Regularize(float lambda) {
00116 return m_scores.l1regularize(lambda);
00117 }
00118
00119 void ScoreComponentCollection::L2Regularize(float lambda) {
00120 m_scores.l2regularize(lambda);
00121 }
00122
00123 size_t ScoreComponentCollection::SparseL1Regularize(float lambda) {
00124 return m_scores.sparseL1regularize(lambda);
00125 }
00126
00127 void ScoreComponentCollection::SparseL2Regularize(float lambda) {
00128 m_scores.sparseL2regularize(lambda);
00129 }
00130
00131 void ScoreComponentCollection::Save(ostream& out) const {
00132 ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
00133 for (; iter != s_scoreIndexes.end(); ++iter ) {
00134 string name = iter->first->GetScoreProducerDescription();
00135 IndexPair ip = iter->second;
00136 if (ip.second-ip.first == 1) {
00137 out << name << " " << m_scores[ip.first] << endl;
00138 } else {
00139 for (size_t i=ip.first; i < ip.second; ++i) {
00140 ostringstream fullname;
00141 fullname << name << "_" << (i + 1 - ip.first);
00142 out << fullname.str() << " " << m_scores[i] << endl;
00143 }
00144 }
00145 }
00146
00147
00148 m_scores.write(out);
00149 }
00150
00151 void ScoreComponentCollection::Save(const string& filename) const {
00152 ofstream out(filename.c_str());
00153 if (!out) {
00154 ostringstream msg;
00155 msg << "Unable to open " << filename;
00156 throw runtime_error(msg.str());
00157 }
00158 Save(out);
00159 out.close();
00160 }
00161
00162 void ScoreComponentCollection::Assign(const FeatureFunction* sp, const string line) {
00163 istringstream istr(line);
00164 while(istr) {
00165 string namestring;
00166 FValue value;
00167 istr >> namestring;
00168 if (!istr) break;
00169 istr >> value;
00170 FName fname(sp->GetScoreProducerDescription(), namestring);
00171 m_scores[fname] = value;
00172 }
00173 }
00174
00175 void ScoreComponentCollection::ZeroDenseFeatures(const FeatureFunction* sp)
00176 {
00177 size_t numScores = sp->GetNumScoreComponents();
00178 Scores vec(numScores, 0);
00179
00180 Assign(sp, vec);
00181 }
00182
00184 FVector ScoreComponentCollection::GetVectorForProducer(const FeatureFunction* sp) const
00185 {
00186 FVector fv(s_denseVectorSize);
00187 std::string prefix = sp->GetScoreProducerDescription() + FName::SEP;
00188 for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
00189 std::stringstream name;
00190 name << i->first;
00191 if (name.str().substr( 0, prefix.length() ).compare( prefix ) == 0)
00192 fv[i->first] = i->second;
00193 }
00194 return fv;
00195 }
00196
00197 }
00198
00199