00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "ChartRuleLookupManagerOnDisk.h"
00021
00022 #include <algorithm>
00023
00024 #include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
00025 #include "moses/StaticData.h"
00026 #include "moses/ChartParserCallback.h"
00027 #include "DotChartOnDisk.h"
00028 #include "OnDiskPt/TargetPhraseCollection.h"
00029
00030 using namespace std;
00031
00032 namespace Moses
00033 {
00034
00035 ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
00036 const InputType &sentence,
00037 const ChartCellCollectionBase &cellColl,
00038 const PhraseDictionaryOnDisk &dictionary,
00039 OnDiskPt::OnDiskWrapper &dbWrapper,
00040 const std::vector<FactorType> &inputFactorsVec,
00041 const std::vector<FactorType> &outputFactorsVec,
00042 const std::string &filePath)
00043 : ChartRuleLookupManagerCYKPlus(sentence, cellColl)
00044 , m_dictionary(dictionary)
00045 , m_dbWrapper(dbWrapper)
00046 , m_inputFactorsVec(inputFactorsVec)
00047 , m_outputFactorsVec(outputFactorsVec)
00048 , m_filePath(filePath)
00049 {
00050 CHECK(m_expandableDottedRuleListVec.size() == 0);
00051 size_t sourceSize = sentence.GetSize();
00052 m_expandableDottedRuleListVec.resize(sourceSize);
00053
00054 for (size_t ind = 0; ind < m_expandableDottedRuleListVec.size(); ++ind) {
00055 DottedRuleOnDisk *initDottedRule = new DottedRuleOnDisk(m_dbWrapper.GetRootSourceNode());
00056
00057 DottedRuleStackOnDisk *processedStack = new DottedRuleStackOnDisk(sourceSize - ind + 1);
00058 processedStack->Add(0, initDottedRule);
00059
00060 m_expandableDottedRuleListVec[ind] = processedStack;
00061 }
00062 }
00063
00064 ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk()
00065 {
00066 std::map<UINT64, const TargetPhraseCollection*>::const_iterator iterCache;
00067 for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) {
00068 delete iterCache->second;
00069 }
00070 m_cache.clear();
00071
00072 RemoveAllInColl(m_expandableDottedRuleListVec);
00073 RemoveAllInColl(m_sourcePhraseNode);
00074 }
00075
00076 void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
00077 const WordsRange &range,
00078 ChartParserCallback &outColl)
00079 {
00080 const StaticData &staticData = StaticData::Instance();
00081 size_t relEndPos = range.GetEndPos() - range.GetStartPos();
00082 size_t absEndPos = range.GetEndPos();
00083
00084
00085 DottedRuleStackOnDisk &expandableDottedRuleList = *m_expandableDottedRuleListVec[range.GetStartPos()];
00086
00087
00088 expandableDottedRuleList.SortSavedNodes();
00089
00090 const DottedRuleStackOnDisk::SavedNodeColl &savedNodeColl = expandableDottedRuleList.GetSavedNodeColl();
00091
00092
00093 const ChartCellLabel &sourceWordLabel = GetSourceAt(absEndPos);
00094
00095 for (size_t ind = 0; ind < (savedNodeColl.size()) ; ++ind) {
00096 const SavedNodeOnDisk &savedNode = *savedNodeColl[ind];
00097
00098 const DottedRuleOnDisk &prevDottedRule = savedNode.GetDottedRule();
00099 const OnDiskPt::PhraseNode &prevNode = prevDottedRule.GetLastNode();
00100 size_t startPos = prevDottedRule.IsRoot() ? range.GetStartPos() : prevDottedRule.GetWordsRange().GetEndPos() + 1;
00101
00102
00103 if (startPos == absEndPos) {
00104 OnDiskPt::Word *sourceWordBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceWordLabel.GetLabel());
00105
00106 if (sourceWordBerkeleyDb != NULL) {
00107 const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceWordBerkeleyDb, m_dbWrapper);
00108 if (node != NULL) {
00109
00110
00111
00112 DottedRuleOnDisk *dottedRule = new DottedRuleOnDisk(*node, sourceWordLabel, prevDottedRule);
00113 expandableDottedRuleList.Add(relEndPos+1, dottedRule);
00114
00115
00116 m_sourcePhraseNode.push_back(node);
00117 }
00118
00119 delete sourceWordBerkeleyDb;
00120 }
00121 }
00122
00123
00124 size_t endPos, stackInd;
00125 if (startPos > absEndPos)
00126 continue;
00127 else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) {
00128
00129 endPos = absEndPos - 1;
00130 stackInd = relEndPos;
00131 } else {
00132 endPos = absEndPos;
00133 stackInd = relEndPos + 1;
00134 }
00135
00136
00137
00138
00139 const ChartCellLabelSet &chartNonTermSet =
00140 GetTargetLabelSet(startPos, endPos);
00141
00142
00143
00144
00145
00146 const NonTerminalSet &sourceLHSSet = GetSentence().GetLabelSet(startPos, endPos);
00147
00148 NonTerminalSet::const_iterator iterSourceLHS;
00149 for (iterSourceLHS = sourceLHSSet.begin(); iterSourceLHS != sourceLHSSet.end(); ++iterSourceLHS) {
00150 const Word &sourceLHS = *iterSourceLHS;
00151
00152 OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceLHS);
00153
00154 if (sourceLHSBerkeleyDb == NULL) {
00155 delete sourceLHSBerkeleyDb;
00156 continue;
00157 }
00158
00159 const OnDiskPt::PhraseNode *sourceNode = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
00160 delete sourceLHSBerkeleyDb;
00161
00162 if (sourceNode == NULL)
00163 continue;
00164
00165
00166 ChartCellLabelSet::const_iterator iterChartNonTerm;
00167 for (iterChartNonTerm = chartNonTermSet.begin(); iterChartNonTerm != chartNonTermSet.end(); ++iterChartNonTerm) {
00168 const ChartCellLabel &cellLabel = iterChartNonTerm->second;
00169
00170
00171
00172
00173 bool doSearch = true;
00174
00175
00176 if (doSearch) {
00177
00178 OnDiskPt::Word *chartNonTermBerkeleyDb = m_dbWrapper.ConvertFromMoses(Output, m_outputFactorsVec, cellLabel.GetLabel());
00179
00180 if (chartNonTermBerkeleyDb == NULL)
00181 continue;
00182
00183 const OnDiskPt::PhraseNode *node = sourceNode->GetChild(*chartNonTermBerkeleyDb, m_dbWrapper);
00184 delete chartNonTermBerkeleyDb;
00185
00186 if (node == NULL)
00187 continue;
00188
00189
00190
00191 DottedRuleOnDisk *dottedRule = new DottedRuleOnDisk(*node, cellLabel, prevDottedRule);
00192 expandableDottedRuleList.Add(stackInd, dottedRule);
00193
00194 m_sourcePhraseNode.push_back(node);
00195 }
00196 }
00197
00198 delete sourceNode;
00199
00200 }
00201
00202
00203 DottedRuleCollOnDisk &nodes = expandableDottedRuleList.Get(relEndPos + 1);
00204
00205
00206 DottedRuleCollOnDisk::const_iterator iterDottedRuleColl;
00207 for (iterDottedRuleColl = nodes.begin(); iterDottedRuleColl != nodes.end(); ++iterDottedRuleColl) {
00208
00209 const DottedRuleOnDisk &prevDottedRule = **iterDottedRuleColl;
00210 if (prevDottedRule.Done())
00211 continue;
00212 prevDottedRule.Done(true);
00213
00214 const OnDiskPt::PhraseNode &prevNode = prevDottedRule.GetLastNode();
00215
00216
00217 const NonTerminalSet &lhsSet = GetSentence().GetLabelSet(range.GetStartPos(), range.GetEndPos());
00218 NonTerminalSet::const_iterator iterLabelSet;
00219 for (iterLabelSet = lhsSet.begin(); iterLabelSet != lhsSet.end(); ++iterLabelSet) {
00220 const Word &sourceLHS = *iterLabelSet;
00221
00222 OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceLHS);
00223 if (sourceLHSBerkeleyDb == NULL)
00224 continue;
00225
00226 const TargetPhraseCollection *targetPhraseCollection = NULL;
00227 const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
00228 if (node) {
00229 UINT64 tpCollFilePos = node->GetValue();
00230 std::map<UINT64, const TargetPhraseCollection*>::const_iterator iterCache = m_cache.find(tpCollFilePos);
00231 if (iterCache == m_cache.end()) {
00232
00233 const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);
00234
00235 std::vector<float> weightT = staticData.GetWeights(&m_dictionary);
00236 targetPhraseCollection
00237 = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec
00238 ,m_outputFactorsVec
00239 ,m_dictionary
00240 ,weightT
00241 ,m_filePath
00242 , m_dbWrapper.GetVocab());
00243
00244 delete tpcollBerkeleyDb;
00245 m_cache[tpCollFilePos] = targetPhraseCollection;
00246 } else {
00247
00248 targetPhraseCollection = iterCache->second;
00249 }
00250
00251 CHECK(targetPhraseCollection);
00252 if (!targetPhraseCollection->IsEmpty()) {
00253 AddCompletedRule(prevDottedRule, *targetPhraseCollection,
00254 range, outColl);
00255 }
00256
00257 }
00258
00259 delete node;
00260 delete sourceLHSBerkeleyDb;
00261 }
00262 }
00263 }
00264
00265
00266 }
00267
00268 }