00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "Parser.h"
00021
00022 #include "moses/ChartParser.h"
00023 #include "moses/ChartTranslationOptionList.h"
00024 #include "moses/InputType.h"
00025 #include "moses/NonTerminal.h"
00026 #include "moses/TranslationModel/RuleTable/UTrieNode.h"
00027 #include "moses/TranslationModel/RuleTable/UTrie.h"
00028 #include "moses/StaticData.h"
00029 #include "ApplicableRuleTrie.h"
00030 #include "StackLattice.h"
00031 #include "StackLatticeBuilder.h"
00032 #include "StackLatticeSearcher.h"
00033 #include "VarSpanTrieBuilder.h"
00034
00035 #include <memory>
00036 #include <vector>
00037
00038 namespace Moses
00039 {
00040
00041 void Scope3Parser::GetChartRuleCollection(
00042 const InputPath &inputPath,
00043 size_t last,
00044 ChartParserCallback &outColl)
00045 {
00046 const Range &range = inputPath.GetWordsRange();
00047 const size_t start = range.GetStartPos();
00048 const size_t end = range.GetEndPos();
00049
00050 std::vector<std::pair<const UTrieNode *, const VarSpanNode *> > &pairVec
00051 = m_ruleApplications[start][end-start+1];
00052
00053 MatchCallback matchCB(range, outColl);
00054 for (std::vector<std::pair<const UTrieNode *, const VarSpanNode *> >::const_iterator p = pairVec.begin(); p != pairVec.end(); ++p) {
00055 const UTrieNode &ruleNode = *(p->first);
00056 const VarSpanNode &varSpanNode = *(p->second);
00057
00058 const UTrieNode::LabelMap &labelMap = ruleNode.GetLabelMap();
00059
00060 if (varSpanNode.m_rank == 0) {
00061 assert(labelMap.size() == 1);
00062 TargetPhraseCollection::shared_ptr tpc = labelMap.begin()->second;
00063 matchCB.m_tpc = tpc;
00064 matchCB(m_emptyStackVec);
00065 } else {
00066 varSpanNode.CalculateRanges(start, end, m_ranges);
00067 m_latticeBuilder.Build(start, end, ruleNode, varSpanNode, m_ranges,
00068 *this, m_lattice,
00069 m_quickCheckTable);
00070 StackLatticeSearcher<MatchCallback> searcher(m_lattice, m_ranges);
00071 UTrieNode::LabelMap::const_iterator p = labelMap.begin();
00072 for (; p != labelMap.end(); ++p) {
00073 const std::vector<int> &labels = p->first;
00074 TargetPhraseCollection::shared_ptr tpc = p->second;
00075 assert(labels.size() == varSpanNode.m_rank);
00076 bool failCheck = false;
00077 for (size_t i = 0; i < varSpanNode.m_rank; ++i) {
00078 if (!m_quickCheckTable[i][labels[i]]) {
00079 failCheck = true;
00080 break;
00081 }
00082 }
00083 if (failCheck) {
00084 continue;
00085 }
00086 matchCB.m_tpc = tpc;
00087 searcher.Search(labels, matchCB);
00088 }
00089 }
00090 }
00091 }
00092
00093 void Scope3Parser::Init()
00094 {
00095 InitRuleApplicationVector();
00096
00097
00098 SentenceMap sentMap;
00099 FillSentenceMap(sentMap);
00100
00101
00102 const UTrieNode &rootNode = m_ruleTable.GetRootNode();
00103 std::auto_ptr<ApplicableRuleTrie> art(new ApplicableRuleTrie(-1, -1, rootNode));
00104 art->Extend(rootNode, -1, sentMap, false);
00105
00106
00107
00108
00109 VarSpanTrieBuilder vstBuilder;
00110 m_varSpanTrie = vstBuilder.Build(*art);
00111
00112
00113 AddRulesToCells(*art, std::make_pair<int, int>(-1, -1), GetParser().GetSize()-1, 0);
00114 }
00115
00116 void Scope3Parser::InitRuleApplicationVector()
00117 {
00118 const size_t sourceSize = GetParser().GetSize();
00119 m_ruleApplications.resize(sourceSize);
00120 for (size_t start = 0; start < sourceSize; ++start) {
00121 size_t maxSpan = sourceSize-start+1;
00122 m_ruleApplications[start].resize(maxSpan+1);
00123 }
00124 }
00125
00126 void Scope3Parser::FillSentenceMap(SentenceMap &sentMap)
00127 {
00128 for (size_t i = 0; i < GetParser().GetSize(); ++i) {
00129 const Word &word = GetParser().GetInputPath(i, i).GetLastWord();
00130 sentMap[word].push_back(i);
00131 }
00132 }
00133
00134 void Scope3Parser::AddRulesToCells(
00135 const ApplicableRuleTrie &node,
00136 std::pair<int, int> start,
00137 int maxPos,
00138 int depth)
00139 {
00140 if (depth > 0) {
00141
00142 if (start.first == -1 && start.second == -1) {
00143 assert(depth == 1);
00144 start.first = std::max(0, node.m_start);
00145 start.second = node.m_start;
00146 } else if (start.second < 0) {
00147 assert(depth > 1);
00148 if (node.m_start == -1) {
00149 --start.second;
00150 } else {
00151 int numSplitPoints = -1 - start.second;
00152 start.second = node.m_start - (numSplitPoints+1);
00153 }
00154 }
00155 }
00156
00157 if (node.m_node->HasRules()) {
00158 assert(depth > 0);
00159 assert(node.m_vstNode);
00160
00161 std::pair<int, int> end;
00162 if (node.m_end == -1) {
00163 end.first = (*(node.m_vstNode->m_label))[2];
00164 end.second = (*(node.m_vstNode->m_label))[3];
00165 assert(end.first != -1);
00166 if (end.second == -1) {
00167 end.second = maxPos;
00168 }
00169 } else {
00170 assert(node.m_start == node.m_end);
00171 end.first = end.second = node.m_start;
00172 }
00173
00174 int s2 = start.second;
00175 if (s2 < 0) {
00176 int numSplitPoints = -1 - s2;
00177 s2 = maxPos - numSplitPoints;
00178 }
00179 for (int i = start.first; i <= s2; ++i) {
00180 int e1 = std::max(i+depth-1, end.first);
00181 for (int j = e1; j <= end.second; ++j) {
00182 size_t span = j-i+1;
00183 assert(span >= 1);
00184 if (m_maxChartSpan && span > m_maxChartSpan) {
00185 break;
00186 }
00187 m_ruleApplications[i][span].push_back(std::make_pair(node.m_node,
00188 node.m_vstNode));
00189 }
00190 }
00191 }
00192
00193 for (std::vector<ApplicableRuleTrie*>::const_iterator p = node.m_children.begin(); p != node.m_children.end(); ++p) {
00194 AddRulesToCells(**p, start, maxPos, depth+1);
00195 }
00196 }
00197
00198 }