00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef moses_cmd_LatticeMBR_h
00011 #define moses_cmd_LatticeMBR_h
00012
00013 #include <map>
00014 #include <vector>
00015 #include <set>
00016 #include "Hypothesis.h"
00017 #include "Manager.h"
00018 #include "TrellisPathList.h"
00019
00020 using namespace Moses;
00021
00022 template<class T>
00023 T log_sum (T log_a, T log_b)
00024 {
00025 T v;
00026 if (log_a < log_b) {
00027 v = log_b+log ( 1 + exp ( log_a-log_b ));
00028 } else {
00029 v = log_a+log ( 1 + exp ( log_b-log_a ));
00030 }
00031 return ( v );
00032 }
00033
00034 class Edge;
00035
00036 typedef std::vector< const Hypothesis *> Lattice;
00037 typedef std::vector<const Edge*> Path;
00038 typedef std::map<Path, size_t> PathCounts;
00039 typedef std::map<Phrase, PathCounts > NgramHistory;
00040
00041 class Edge {
00042 const Hypothesis* m_tailNode;
00043 const Hypothesis* m_headNode;
00044 float m_score;
00045 TargetPhrase m_targetPhrase;
00046 NgramHistory m_ngrams;
00047
00048 public:
00049 Edge(const Hypothesis* from, const Hypothesis* to, float score, const TargetPhrase& targetPhrase) : m_tailNode(from), m_headNode(to), m_score(score), m_targetPhrase(targetPhrase) {
00050
00051 }
00052
00053 const Hypothesis* GetHeadNode() const {
00054 return m_headNode;
00055 }
00056
00057 const Hypothesis* GetTailNode() const {
00058 return m_tailNode;
00059 }
00060
00061 float GetScore() const {
00062 return m_score;
00063 }
00064
00065 size_t GetWordsSize() const {
00066 return m_targetPhrase.GetSize();
00067 }
00068
00069 const Phrase& GetWords() const {
00070 return m_targetPhrase;
00071 }
00072
00073 friend std::ostream& operator<< (std::ostream& out, const Edge& edge);
00074
00075 const NgramHistory& GetNgrams( std::map<const Hypothesis*, std::vector<Edge> > & incomingEdges) ;
00076
00077 bool operator < (const Edge & compare) const;
00078
00079 void GetPhraseSuffix(const Phrase& origPhrase, size_t lastN, Phrase& targetPhrase) const;
00080
00081 void storeNgramHistory(const Phrase& phrase, Path & path, size_t count = 1){
00082 m_ngrams[phrase][path]+= count;
00083 }
00084
00085 };
00086
00090 class NgramScores {
00091 public:
00092 NgramScores() {}
00093
00095 void addScore(const Hypothesis* node, const Phrase& ngram, float score);
00096
00098 typedef std::map<const Phrase*, float>::const_iterator NodeScoreIterator;
00099 NodeScoreIterator nodeBegin(const Hypothesis* node);
00100 NodeScoreIterator nodeEnd(const Hypothesis* node);
00101
00102 private:
00103 std::set<Phrase> m_ngrams;
00104 std::map<const Hypothesis*, std::map<const Phrase*, float> > m_scores;
00105 };
00106
00107
00109 class LatticeMBRSolution {
00110 public:
00112 LatticeMBRSolution(const TrellisPath& path, bool isMap);
00113 const std::vector<float>& GetNgramScores() const {return m_ngramScores;}
00114 const std::vector<Word>& GetWords() const {return m_words;}
00115 float GetMapScore() const {return m_mapScore;}
00116 float GetScore() const {return m_score;}
00117
00119 void CalcScore(std::map<Phrase, float>& finalNgramScores, const std::vector<float>& thetas, float mapWeight);
00120
00121 private:
00122 std::vector<Word> m_words;
00123 float m_mapScore;
00124 std::vector<float> m_ngramScores;
00125 float m_score;
00126 };
00127
00128 struct LatticeMBRSolutionComparator {
00129 bool operator()(const LatticeMBRSolution& a, const LatticeMBRSolution& b) {
00130 return a.GetScore() > b.GetScore();
00131 }
00132 };
00133
00134 void pruneLatticeFB(Lattice & connectedHyp, std::map < const Hypothesis*, std::set <const Hypothesis* > > & outgoingHyps, std::map<const Hypothesis*, std::vector<Edge> >& incomingEdges,
00135 const std::vector< float> & estimatedScores, const Hypothesis*, size_t edgeDensity,float scale);
00136
00137
00138 void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
00139
00140 void calcNgramExpectations(Lattice & connectedHyp, std::map<const Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Phrase,
00141 float>& finalNgramScores, bool posteriors);
00142 void GetOutputFactors(const TrellisPath &path, std::vector <Word> &translation);
00143 void extract_ngrams(const std::vector<Word >& sentence, std::map < Phrase, int > & allngrams);
00144 bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b);
00145 std::vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList);
00146 const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList);
00147
00148 #endif