00001 #include "PhraseDistanceFeature.h"
00002
00003 #include <vector>
00004 #include <boost/foreach.hpp>
00005 #include "moses/InputType.h"
00006 #include "moses/ScoreComponentCollection.h"
00007 #include "moses/StaticData.h"
00008 #include "util/exception.hh"
00009
00010 using namespace std;
00011
00012 namespace Moses
00013 {
00014 PhraseDistanceFeature::PhraseDistanceFeature(const string &line)
00015 : StatelessFeatureFunction(2, line)
00016 , m_space("")
00017 , m_spaceID(0)
00018 , m_measure(EuclideanDistance)
00019 {
00020 ReadParameters();
00021 }
00022
00023 void PhraseDistanceFeature::EvaluateWithSourceContext(const InputType &input
00024 , const InputPath &inputPath
00025 , const TargetPhrase &targetPhrase
00026 , const StackVec *stackVec
00027 , ScoreComponentCollection &scoreBreakdown
00028 , ScoreComponentCollection *estimatedScores) const
00029 {
00030 vector<float> scores(m_numScoreComponents, 0);
00031 bool broken = false;
00032
00033 map<size_t const, vector<float> >::const_iterator ii;
00034 if (input.m_coordMap) {
00035 ii = input.m_coordMap->find(m_spaceID);
00036 } else {
00037 TRACE_ERR("No coordinates for space " << m_space << " on input (specify with coord XML tag)" << endl);
00038 TRACE_ERR("Scores for " << m_description << " will be incorrect and probably all zeros" << endl);
00039 broken = true;
00040 }
00041 if (ii == input.m_coordMap->end()) {
00042 TRACE_ERR("No coordinates for space " << m_space << " on input (specify with coord XML tag)" << endl);
00043 TRACE_ERR("Scores for " << m_description << " will be incorrect and probably all zeros" << endl);
00044 broken = true;
00045 }
00046
00047 vector<SPTR<vector<float> > > const* tpp = targetPhrase.GetCoordList(m_spaceID);
00048 if (tpp == NULL) {
00049 TRACE_ERR("No coordinates for space " << m_space << " on target phrase (PhraseDictionary implementation needs to set)" << endl);
00050 TRACE_ERR("Scores for " << m_description << " will be incorrect and probably all zeros" << endl);
00051 broken = true;
00052 }
00053
00054 if (!broken) {
00055 vector<float> const& inputCoord = ii->second;
00056 vector<SPTR<vector<float> > > const& tpCoord = *tpp;
00057
00058 vector<float> centroid = vector<float>(inputCoord.size(), 0);
00059 BOOST_FOREACH(SPTR<vector<float> > const coord, tpCoord) {
00060 for (size_t i = 0; i < inputCoord.size(); ++i) {
00061 centroid[i] += (*coord)[i];
00062 }
00063 }
00064 for (size_t i = 0; i < inputCoord.size(); ++i) {
00065 centroid[i] /= tpCoord.size();
00066 }
00067
00068
00069 float inputDistance = 0;
00070 float centroidDistance = 0;
00071 if (m_measure == EuclideanDistance) {
00072 BOOST_FOREACH(SPTR<vector<float> > const coord, tpCoord) {
00073 float pointInputDistance = 0;
00074 float pointCentroidDistance = 0;
00075 for (size_t i = 0; i < inputCoord.size(); ++i) {
00076 pointInputDistance += pow(inputCoord[i] - (*coord)[i], 2);
00077 pointCentroidDistance += pow(centroid[i] - (*coord)[i], 2);
00078 }
00079 inputDistance += sqrt(pointInputDistance);
00080 centroidDistance += sqrt(pointCentroidDistance);
00081 }
00082 } else if (m_measure == TotalVariationDistance) {
00083 BOOST_FOREACH(SPTR<vector<float> > const coord, tpCoord) {
00084 float pointInputDistance = 0;
00085 float pointCentroidDistance = 0;
00086 for (size_t i = 0; i < inputCoord.size(); ++i) {
00087 pointInputDistance += abs(inputCoord[i] - (*coord)[i]);
00088 pointCentroidDistance += abs(centroid[i] - (*coord)[i]);
00089 }
00090 inputDistance += pointInputDistance / 2;
00091 centroidDistance += pointCentroidDistance / 2;
00092 }
00093 }
00094 inputDistance /= tpCoord.size();
00095 centroidDistance /= tpCoord.size();
00096
00097 scores[0] = log(max(inputDistance, Moses::FLOAT_EPSILON));
00098 scores[1] = log(max(centroidDistance, Moses::FLOAT_EPSILON));
00099 }
00100
00101 scoreBreakdown.Assign(this, scores);
00102 return;
00103 }
00104
00105 void PhraseDistanceFeature::SetParameter(const string& key, const string& value)
00106 {
00107 if (key == "space") {
00108 m_space = value;
00109 m_spaceID = StaticData::InstanceNonConst().MapCoordSpace(m_space);
00110 } else if (key == "measure") {
00111 if (value == "euc") {
00112 m_measure = EuclideanDistance;
00113 } else if (value == "var") {
00114 m_measure = TotalVariationDistance;
00115 } else {
00116 UTIL_THROW2("Unknown measure " << value << ", choices: euc var");
00117 }
00118 } else {
00119 StatelessFeatureFunction::SetParameter(key, value);
00120 }
00121 }
00122
00123 }