00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <fstream>
00023 #include <string>
00024 #include <iterator>
00025 #include <algorithm>
00026 #include "PhraseDictionaryMemory.h"
00027 #include "moses/FactorCollection.h"
00028 #include "moses/Word.h"
00029 #include "moses/Util.h"
00030 #include "moses/InputFileStream.h"
00031 #include "moses/StaticData.h"
00032 #include "moses/Range.h"
00033 #include "moses/TranslationModel/RuleTable/LoaderFactory.h"
00034 #include "moses/TranslationModel/RuleTable/Loader.h"
00035 #include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h"
00036 #include "moses/InputPath.h"
00037
00038 using namespace std;
00039
00040 namespace Moses
00041 {
00042 PhraseDictionaryMemory::PhraseDictionaryMemory(const std::string &line)
00043 : RuleTableTrie(line)
00044 {
00045 ReadParameters();
00046
00047
00048 m_maxCacheSize = 0;
00049
00050 }
00051
00052 TargetPhraseCollection::shared_ptr
00053 PhraseDictionaryMemory::
00054 GetOrCreateTargetPhraseCollection(const Phrase &source,
00055 const TargetPhrase &target,
00056 const Word *sourceLHS)
00057 {
00058 PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
00059 return currNode.GetTargetPhraseCollection();
00060 }
00061
00062 TargetPhraseCollection::shared_ptr
00063 PhraseDictionaryMemory::
00064 GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
00065 {
00066 Phrase source(sourceOrig);
00067 source.OnlyTheseFactors(m_inputFactors);
00068
00069
00070 const size_t size = source.GetSize();
00071
00072 const PhraseDictionaryNodeMemory *currNode = &m_collection;
00073 for (size_t pos = 0 ; pos < size ; ++pos) {
00074 const Word& word = source.GetWord(pos);
00075 currNode = currNode->GetChild(word);
00076 if (currNode == NULL)
00077 return TargetPhraseCollection::shared_ptr();
00078 }
00079
00080 return currNode->GetTargetPhraseCollection();
00081 }
00082
00083 PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source
00084 , const TargetPhrase &target
00085 , const Word *sourceLHS)
00086 {
00087 const size_t size = source.GetSize();
00088
00089 const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
00090 AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
00091
00092 PhraseDictionaryNodeMemory *currNode = &m_collection;
00093 for (size_t pos = 0 ; pos < size ; ++pos) {
00094 const Word& word = source.GetWord(pos);
00095
00096 if (word.IsNonTerminal()) {
00097
00098 const Word &sourceNonTerm = word;
00099
00100 UTIL_THROW_IF2(iterAlign == alignmentInfo.end(),
00101 "No alignment for non-term at position " << pos);
00102 UTIL_THROW_IF2(iterAlign->first != pos,
00103 "Alignment info incorrect at position " << pos);
00104
00105 size_t targetNonTermInd = iterAlign->second;
00106 ++iterAlign;
00107 const Word &targetNonTerm = target.GetWord(targetNonTermInd);
00108 #if defined(UNLABELLED_SOURCE)
00109 currNode = currNode->GetOrCreateNonTerminalChild(targetNonTerm);
00110 #else
00111 currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
00112 #endif
00113 } else {
00114 currNode = currNode->GetOrCreateChild(word);
00115 }
00116
00117 UTIL_THROW_IF2(currNode == NULL,
00118 "Node not found at position " << pos);
00119 }
00120
00121
00122
00123
00124 return *currNode;
00125 }
00126
00127 ChartRuleLookupManager *PhraseDictionaryMemory::CreateRuleLookupManager(
00128 const ChartParser &parser,
00129 const ChartCellCollectionBase &cellCollection,
00130 std::size_t )
00131 {
00132 return new ChartRuleLookupManagerMemory(parser, cellCollection, *this);
00133 }
00134
00135 void PhraseDictionaryMemory::SortAndPrune()
00136 {
00137 if (GetTableLimit()) {
00138 m_collection.Sort(GetTableLimit());
00139 }
00140 }
00141
00142 void
00143 PhraseDictionaryMemory::
00144 GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
00145 {
00146 InputPathList::const_iterator iter;
00147 for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
00148 InputPath &inputPath = **iter;
00149 const Phrase &phrase = inputPath.GetPhrase();
00150 const InputPath *prevPath = inputPath.GetPrevPath();
00151
00152 const PhraseDictionaryNodeMemory *prevPtNode = NULL;
00153
00154 if (prevPath) {
00155 prevPtNode = static_cast<const PhraseDictionaryNodeMemory*>(prevPath->GetPtNode(*this));
00156 } else {
00157
00158 assert(phrase.GetSize() == 1);
00159 prevPtNode = &GetRootNode();
00160 }
00161
00162
00163 if (!SatisfyBackoff(inputPath)) {
00164 continue;
00165 }
00166
00167 if (prevPtNode) {
00168 Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
00169 lastWord.OnlyTheseFactors(m_inputFactors);
00170
00171 const PhraseDictionaryNodeMemory *ptNode = prevPtNode->GetChild(lastWord);
00172 TargetPhraseCollection::shared_ptr targetPhrases;
00173 if (ptNode) {
00174 targetPhrases = ptNode->GetTargetPhraseCollection();
00175 }
00176 inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
00177 }
00178 }
00179 }
00180
00181 TO_STRING_BODY(PhraseDictionaryMemory);
00182
00183
00184 ostream& operator<<(ostream& out, const PhraseDictionaryMemory& phraseDict)
00185 {
00186 typedef PhraseDictionaryNodeMemory::TerminalMap TermMap;
00187 typedef PhraseDictionaryNodeMemory::NonTerminalMap NonTermMap;
00188
00189 const PhraseDictionaryNodeMemory &coll = phraseDict.m_collection;
00190 for (NonTermMap::const_iterator p = coll.m_nonTermMap.begin(); p != coll.m_nonTermMap.end(); ++p) {
00191 #if defined(UNLABELLED_SOURCE)
00192 const Word &targetNonTerm = p->first;
00193 out << targetNonTerm;
00194 #else
00195 const Word &sourceNonTerm = p->first.first;
00196 out << sourceNonTerm;
00197 #endif
00198 }
00199 for (TermMap::const_iterator p = coll.m_sourceTermMap.begin(); p != coll.m_sourceTermMap.end(); ++p) {
00200 const Word &sourceTerm = p->first;
00201 out << sourceTerm;
00202 }
00203 return out;
00204 }
00205
00206 }