Moses: /disk4/html/www/moses/doxygen/mosesdecoder/moses/TranslationModel/PhraseDictionaryMemory.cpp Source File

00001 // vim:tabstop=2
00002 
00003 /***********************************************************************
00004  Moses - factored phrase-based language decoder
00005  Copyright (C) 2006 University of Edinburgh
00006 
00007  This library is free software; you can redistribute it and/or
00008  modify it under the terms of the GNU Lesser General Public
00009  License as published by the Free Software Foundation; either
00010  version 2.1 of the License, or (at your option) any later version.
00011 
00012  This library is distributed in the hope that it will be useful,
00013  but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  Lesser General Public License for more details.
00016 
00017  You should have received a copy of the GNU Lesser General Public
00018  License along with this library; if not, write to the Free Software
00019  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00020  ***********************************************************************/
00021 
00022 #include <fstream>
00023 #include <string>
00024 #include <iterator>
00025 #include <algorithm>
00026 #include "PhraseDictionaryMemory.h"
00027 #include "moses/FactorCollection.h"
00028 #include "moses/Word.h"
00029 #include "moses/Util.h"
00030 #include "moses/InputFileStream.h"
00031 #include "moses/StaticData.h"
00032 #include "moses/Range.h"
00033 #include "moses/TranslationModel/RuleTable/LoaderFactory.h"
00034 #include "moses/TranslationModel/RuleTable/Loader.h"
00035 #include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerMemory.h"
00036 #include "moses/InputPath.h"
00037 
00038 using namespace std;
00039 
00040 namespace Moses
00041 {
00042 PhraseDictionaryMemory::PhraseDictionaryMemory(const std::string &line)
00043   : RuleTableTrie(line)
00044 {
00045   ReadParameters();
00046 
00047   // caching for memory pt is pointless
00048   m_maxCacheSize = 0;
00049 
00050 }
00051 
00052 TargetPhraseCollection::shared_ptr
00053 PhraseDictionaryMemory::
00054 GetOrCreateTargetPhraseCollection(const Phrase &source,
00055                                   const TargetPhrase &target,
00056                                   const Word *sourceLHS)
00057 {
00058   PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
00059   return currNode.GetTargetPhraseCollection();
00060 }
00061 
00062 TargetPhraseCollection::shared_ptr
00063 PhraseDictionaryMemory::
00064 GetTargetPhraseCollectionLEGACY(const Phrase& sourceOrig) const
00065 {
00066   Phrase source(sourceOrig);
00067   source.OnlyTheseFactors(m_inputFactors);
00068 
00069   // exactly like CreateTargetPhraseCollection, but don't create
00070   const size_t size = source.GetSize();
00071 
00072   const PhraseDictionaryNodeMemory *currNode = &m_collection;
00073   for (size_t pos = 0 ; pos < size ; ++pos) {
00074     const Word& word = source.GetWord(pos);
00075     currNode = currNode->GetChild(word);
00076     if (currNode == NULL)
00077       return TargetPhraseCollection::shared_ptr();
00078   }
00079 
00080   return currNode->GetTargetPhraseCollection();
00081 }
00082 
00083 PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source
00084     , const TargetPhrase &target
00085     , const Word *sourceLHS)
00086 {
00087   const size_t size = source.GetSize();
00088 
00089   const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
00090   AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
00091 
00092   PhraseDictionaryNodeMemory *currNode = &m_collection;
00093   for (size_t pos = 0 ; pos < size ; ++pos) {
00094     const Word& word = source.GetWord(pos);
00095 
00096     if (word.IsNonTerminal()) {
00097       // indexed by source label 1st
00098       const Word &sourceNonTerm = word;
00099 
00100       UTIL_THROW_IF2(iterAlign == alignmentInfo.end(),
00101                      "No alignment for non-term at position " << pos);
00102       UTIL_THROW_IF2(iterAlign->first != pos,
00103                      "Alignment info incorrect at position " << pos);
00104 
00105       size_t targetNonTermInd = iterAlign->second;
00106       ++iterAlign;
00107       const Word &targetNonTerm = target.GetWord(targetNonTermInd);
00108 #if defined(UNLABELLED_SOURCE)
00109       currNode = currNode->GetOrCreateNonTerminalChild(targetNonTerm);
00110 #else
00111       currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
00112 #endif
00113     } else {
00114       currNode = currNode->GetOrCreateChild(word);
00115     }
00116 
00117     UTIL_THROW_IF2(currNode == NULL,
00118                    "Node not found at position " << pos);
00119   }
00120 
00121   // finally, the source LHS
00122   //currNode = currNode->GetOrCreateChild(sourceLHS);
00123 
00124   return *currNode;
00125 }
00126 
00127 ChartRuleLookupManager *PhraseDictionaryMemory::CreateRuleLookupManager(
00128   const ChartParser &parser,
00129   const ChartCellCollectionBase &cellCollection,
00130   std::size_t /*maxChartSpan */)
00131 {
00132   return new ChartRuleLookupManagerMemory(parser, cellCollection, *this);
00133 }
00134 
00135 void PhraseDictionaryMemory::SortAndPrune()
00136 {
00137   if (GetTableLimit()) {
00138     m_collection.Sort(GetTableLimit());
00139   }
00140 }
00141 
00142 void
00143 PhraseDictionaryMemory::
00144 GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
00145 {
00146   InputPathList::const_iterator iter;
00147   for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
00148     InputPath &inputPath = **iter;
00149     const Phrase &phrase = inputPath.GetPhrase();
00150     const InputPath *prevPath = inputPath.GetPrevPath();
00151 
00152     const PhraseDictionaryNodeMemory *prevPtNode = NULL;
00153 
00154     if (prevPath) {
00155       prevPtNode = static_cast<const PhraseDictionaryNodeMemory*>(prevPath->GetPtNode(*this));
00156     } else {
00157       // Starting subphrase.
00158       assert(phrase.GetSize() == 1);
00159       prevPtNode = &GetRootNode();
00160     }
00161 
00162     // backoff
00163     if (!SatisfyBackoff(inputPath)) {
00164       continue;
00165     }
00166 
00167     if (prevPtNode) {
00168       Word lastWord = phrase.GetWord(phrase.GetSize() - 1);
00169       lastWord.OnlyTheseFactors(m_inputFactors);
00170 
00171       const PhraseDictionaryNodeMemory *ptNode = prevPtNode->GetChild(lastWord);
00172       TargetPhraseCollection::shared_ptr targetPhrases;
00173       if (ptNode) {
00174         targetPhrases = ptNode->GetTargetPhraseCollection();
00175       }
00176       inputPath.SetTargetPhrases(*this, targetPhrases, ptNode);
00177     }
00178   }
00179 }
00180 
00181 TO_STRING_BODY(PhraseDictionaryMemory);
00182 
00183 // friend
00184 ostream& operator<<(ostream& out, const PhraseDictionaryMemory& phraseDict)
00185 {
00186   typedef PhraseDictionaryNodeMemory::TerminalMap TermMap;
00187   typedef PhraseDictionaryNodeMemory::NonTerminalMap NonTermMap;
00188 
00189   const PhraseDictionaryNodeMemory &coll = phraseDict.m_collection;
00190   for (NonTermMap::const_iterator p = coll.m_nonTermMap.begin(); p != coll.m_nonTermMap.end(); ++p) {
00191 #if defined(UNLABELLED_SOURCE)
00192     const Word &targetNonTerm = p->first;
00193     out << targetNonTerm;
00194 #else
00195     const Word &sourceNonTerm = p->first.first;
00196     out << sourceNonTerm;
00197 #endif
00198   }
00199   for (TermMap::const_iterator p = coll.m_sourceTermMap.begin(); p != coll.m_sourceTermMap.end(); ++p) {
00200     const Word &sourceTerm = p->first;
00201     out << sourceTerm;
00202   }
00203   return out;
00204 }
00205 
00206 }