00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef moses_cmd_LatticeMBR_h
00011 #define moses_cmd_LatticeMBR_h
00012
00013 #include <map>
00014 #include <vector>
00015 #include <set>
00016 #include "Hypothesis.h"
00017 #include "Manager.h"
00018 #include "TrellisPathList.h"
00019
00020 using namespace Moses;
00021
00022
00023
00024 class Edge;
00025
00026 typedef std::vector< const Hypothesis *> Lattice;
00027 typedef std::vector<const Edge*> Path;
00028 typedef std::map<Path, size_t> PathCounts;
00029 typedef std::map<Phrase, PathCounts > NgramHistory;
00030
00031 class Edge
00032 {
00033 const Hypothesis* m_tailNode;
00034 const Hypothesis* m_headNode;
00035 float m_score;
00036 TargetPhrase m_targetPhrase;
00037 NgramHistory m_ngrams;
00038
00039 public:
00040 Edge(const Hypothesis* from, const Hypothesis* to, float score, const TargetPhrase& targetPhrase) : m_tailNode(from), m_headNode(to), m_score(score), m_targetPhrase(targetPhrase) {
00041
00042 }
00043
00044 const Hypothesis* GetHeadNode() const {
00045 return m_headNode;
00046 }
00047
00048 const Hypothesis* GetTailNode() const {
00049 return m_tailNode;
00050 }
00051
00052 float GetScore() const {
00053 return m_score;
00054 }
00055
00056 size_t GetWordsSize() const {
00057 return m_targetPhrase.GetSize();
00058 }
00059
00060 const Phrase& GetWords() const {
00061 return m_targetPhrase;
00062 }
00063
00064 friend std::ostream& operator<< (std::ostream& out, const Edge& edge);
00065
00066 const NgramHistory& GetNgrams( std::map<const Hypothesis*, std::vector<Edge> > & incomingEdges) ;
00067
00068 bool operator < (const Edge & compare) const;
00069
00070 void GetPhraseSuffix(const Phrase& origPhrase, size_t lastN, Phrase& targetPhrase) const;
00071
00072 void storeNgramHistory(const Phrase& phrase, Path & path, size_t count = 1) {
00073 m_ngrams[phrase][path]+= count;
00074 }
00075
00076 };
00077
00081 class NgramScores
00082 {
00083 public:
00084 NgramScores() {}
00085
00087 void addScore(const Hypothesis* node, const Phrase& ngram, float score);
00088
00090 typedef std::map<const Phrase*, float>::const_iterator NodeScoreIterator;
00091 NodeScoreIterator nodeBegin(const Hypothesis* node);
00092 NodeScoreIterator nodeEnd(const Hypothesis* node);
00093
00094 private:
00095 std::set<Phrase> m_ngrams;
00096 std::map<const Hypothesis*, std::map<const Phrase*, float> > m_scores;
00097 };
00098
00099
00101 class LatticeMBRSolution
00102 {
00103 public:
00105 LatticeMBRSolution(const TrellisPath& path, bool isMap);
00106 const std::vector<float>& GetNgramScores() const {
00107 return m_ngramScores;
00108 }
00109 const std::vector<Word>& GetWords() const {
00110 return m_words;
00111 }
00112 float GetMapScore() const {
00113 return m_mapScore;
00114 }
00115 float GetScore() const {
00116 return m_score;
00117 }
00118
00120 void CalcScore(std::map<Phrase, float>& finalNgramScores, const std::vector<float>& thetas, float mapWeight);
00121
00122 private:
00123 std::vector<Word> m_words;
00124 float m_mapScore;
00125 std::vector<float> m_ngramScores;
00126 float m_score;
00127 };
00128
00129 struct LatticeMBRSolutionComparator {
00130 bool operator()(const LatticeMBRSolution& a, const LatticeMBRSolution& b) {
00131 return a.GetScore() > b.GetScore();
00132 }
00133 };
00134
00135 void pruneLatticeFB(Lattice & connectedHyp, std::map < const Hypothesis*, std::set <const Hypothesis* > > & outgoingHyps, std::map<const Hypothesis*, std::vector<Edge> >& incomingEdges,
00136 const std::vector< float> & estimatedScores, const Hypothesis*, size_t edgeDensity,float scale);
00137
00138
00139 void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
00140
00141 void calcNgramExpectations(Lattice & connectedHyp, std::map<const Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Phrase,
00142 float>& finalNgramScores, bool posteriors);
00143 void GetOutputFactors(const TrellisPath &path, std::vector <Word> &translation);
00144 void extract_ngrams(const std::vector<Word >& sentence, std::map < Phrase, int > & allngrams);
00145 bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b);
00146 std::vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList);
00147 const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList);
00148
00149 #endif