00001 #include <boost/functional/hash.hpp>
00002 #include <vector>
00003 #include <algorithm>
00004 #include <iterator>
00005 #include <boost/foreach.hpp>
00006 #include "CoveredReferenceFeature.h"
00007 #include "moses/ScoreComponentCollection.h"
00008 #include "moses/Hypothesis.h"
00009 #include "moses/Manager.h"
00010 #include "moses/ChartHypothesis.h"
00011 #include "moses/ChartManager.h"
00012 #include "moses/StaticData.h"
00013 #include "moses/InputFileStream.h"
00014 #include "moses/Util.h"
00015 #include "util/exception.hh"
00016
00017 using namespace std;
00018
00019 namespace Moses
00020 {
00021
00022 size_t CoveredReferenceState::hash() const
00023 {
00024 UTIL_THROW2("TODO:Haven't figure this out yet");
00025 }
00026
00027 bool CoveredReferenceState::operator==(const FFState& other) const
00028 {
00029 UTIL_THROW2("TODO:Haven't figure this out yet");
00030 }
00031
00033
00034 void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
00035 , const InputPath &inputPath
00036 , const TargetPhrase &targetPhrase
00037 , const StackVec *stackVec
00038 , ScoreComponentCollection &scoreBreakdown
00039 , ScoreComponentCollection *estimatedScores) const
00040 {
00041 long id = input.GetTranslationId();
00042 boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
00043 multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase);
00044 multiset<string> covered;
00045 set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
00046 refIt->second.begin(), refIt->second.end(),
00047 inserter(covered, covered.begin()));
00048 vector<float> scores;
00049 scores.push_back(covered.size());
00050
00051 scoreBreakdown.Assign(this, scores);
00052 estimatedScores->Assign(this, scores);
00053 }
00054
00055 void CoveredReferenceFeature::Load(AllOptions::ptr const& opts)
00056 {
00057 m_options = opts;
00058 InputFileStream refFile(m_path);
00059 std::string line;
00060 const StaticData &staticData = StaticData::Instance();
00061 long sentenceID = opts->output.start_translation_id;
00062 while (getline(refFile, line)) {
00063 vector<string> words = Tokenize(line, " ");
00064 multiset<string> wordSet;
00065
00066 copy(words.begin(), words.end(), inserter(wordSet, wordSet.begin()));
00067 m_refs.insert(make_pair(sentenceID++, wordSet));
00068 }
00069 }
00070
00071 void CoveredReferenceFeature::SetParameter(const std::string& key, const std::string& value)
00072 {
00073 if (key == "path") {
00074 m_path = value;
00075 } else {
00076 StatefulFeatureFunction::SetParameter(key, value);
00077 }
00078 }
00079
00080 FFState* CoveredReferenceFeature::EvaluateWhenApplied(
00081 const Hypothesis& cur_hypo,
00082 const FFState* prev_state,
00083 ScoreComponentCollection* accumulator) const
00084 {
00085 const CoveredReferenceState &prev = static_cast<const CoveredReferenceState&>(*prev_state);
00086 CoveredReferenceState *ret = new CoveredReferenceState(prev);
00087
00088 const Manager &mgr = cur_hypo.GetManager();
00089 const InputType &input = mgr.GetSource();
00090 long id = input.GetTranslationId();
00091
00092
00093 multiset<string> remaining;
00094 boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id);
00095 if (refIt == m_refs.end()) UTIL_THROW(util::Exception, "Sentence id out of range: " + SPrint<long>(id));
00096 set_difference(refIt->second.begin(), refIt->second.end(),
00097 ret->m_coveredRef.begin(), ret->m_coveredRef.end(),
00098 inserter(remaining, remaining.begin()));
00099
00100
00101 multiset<string> wordsInPhrase = GetWordsInPhrase(cur_hypo.GetCurrTargetPhrase());
00102 multiset<string> newCovered;
00103 set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(),
00104 remaining.begin(), remaining.end(),
00105 inserter(newCovered, newCovered.begin()));
00106
00107 vector<float> estimateScore =
00108 cur_hypo.GetCurrTargetPhrase().GetScoreBreakdown().GetScoresForProducer(this);
00109 vector<float> scores;
00110 scores.push_back(newCovered.size() - estimateScore[0]);
00111 accumulator->PlusEquals(this, scores);
00112
00113
00114 multiset<string>::const_iterator newCoveredIt;
00115 for (newCoveredIt = newCovered.begin(); newCoveredIt != newCovered.end(); newCoveredIt++) {
00116 ret->m_coveredRef.insert(*newCoveredIt);
00117 }
00118 return ret;
00119 }
00120
00121 FFState* CoveredReferenceFeature::EvaluateWhenApplied(
00122 const ChartHypothesis& ,
00123 int ,
00124 ScoreComponentCollection* accumulator) const
00125 {
00126 UTIL_THROW(util::Exception, "Not implemented");
00127 }
00128
00129 }