00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "TranslationOptionCollectionText.h"
00023 #include "Sentence.h"
00024 #include "DecodeStep.h"
00025 #include "DecodeStepTranslation.h"
00026 #include "FactorCollection.h"
00027 #include "Range.h"
00028 #include <list>
00029 #include "TranslationTask.h"
00030
00031 using namespace std;
00032
00033 namespace Moses
00034 {
00036 TranslationOptionCollectionText::
00037 TranslationOptionCollectionText(ttasksptr const& ttask, Sentence const &input)
00038
00039 : TranslationOptionCollection(ttask,input)
00040
00041 {
00042 size_t maxNoTransOptPerCoverage
00043 = ttask->options()->search.max_trans_opt_per_cov;
00044 float translationOptionThreshold
00045 = ttask->options()->search.trans_opt_threshold;
00046 size_t size = input.GetSize();
00047 m_inputPathMatrix.resize(size);
00048 for (size_t phaseSize = 1; phaseSize <= size; ++phaseSize) {
00049 for (size_t startPos = 0; startPos < size - phaseSize + 1; ++startPos) {
00050 size_t endPos = startPos + phaseSize -1;
00051 vector<InputPath*> &vec = m_inputPathMatrix[startPos];
00052
00053 Range range(startPos, endPos);
00054 Phrase subphrase(input.GetSubString(Range(startPos, endPos)));
00055 const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
00056
00057 InputPath *path;
00058 if (range.GetNumWordsCovered() == 1) {
00059 path = new InputPath(ttask.get(), subphrase, labels, range, NULL, NULL);
00060 vec.push_back(path);
00061 } else {
00062 const InputPath &prevPath = GetInputPath(startPos, endPos - 1);
00063 path = new InputPath(ttask.get(), subphrase, labels, range, &prevPath, NULL);
00064 vec.push_back(path);
00065 }
00066
00067 m_inputPathQueue.push_back(path);
00068 }
00069 }
00070 }
00071
00072
00073
00074
00075 void TranslationOptionCollectionText::ProcessUnknownWord(size_t sourcePos)
00076 {
00077 const InputPath &inputPath = GetInputPath(sourcePos, sourcePos);
00078 ProcessOneUnknownWord(inputPath,sourcePos);
00079 }
00080
00084 bool TranslationOptionCollectionText::HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const
00085 {
00086 Sentence const& source=static_cast<Sentence const&>(m_source);
00087 return source.XmlOverlap(startPosition,endPosition);
00088 }
00089
00093 bool TranslationOptionCollectionText::ViolatesXmlOptionsConstraint(size_t startPosition, size_t endPosition, TranslationOption *transOpt) const
00094 {
00095
00096 Sentence const& source=static_cast<Sentence const&>(m_source);
00097 if (!source.XmlOverlap(startPosition,endPosition)) {
00098 return false;
00099 }
00100 vector <TranslationOption*> xmlOptions;
00101 source.GetXmlTranslationOptions(xmlOptions);
00102 for(size_t i=0; i<xmlOptions.size(); i++) {
00103 const Range &range = xmlOptions[i]->GetSourceWordsRange();
00104
00105 if (range.GetStartPos() <= startPosition && range.GetEndPos() >= endPosition &&
00106 (range.GetStartPos() < startPosition || range.GetEndPos() > endPosition)) {
00107 return true;
00108 }
00109
00110 if ((range.GetStartPos() < startPosition && range.GetEndPos() >= startPosition && range.GetEndPos() < endPosition) ||
00111 (range.GetEndPos() > endPosition && range.GetStartPos() <= endPosition && range.GetStartPos() > startPosition)) {
00112 return true;
00113 }
00114
00115 if (range.GetStartPos() >= startPosition && range.GetEndPos() <= endPosition) {
00116 const TargetPhrase &phrase = transOpt->GetTargetPhrase();
00117 const TargetPhrase &xmlPhrase = xmlOptions[i]->GetTargetPhrase();
00118
00119 if (phrase.GetSize() < xmlPhrase.GetSize()) {
00120 return true;
00121 }
00122
00123 for(size_t offset=0; offset <= phrase.GetSize()-xmlPhrase.GetSize(); offset++) {
00124 bool match = true;
00125
00126 for(size_t wordPos=0; match && wordPos < xmlPhrase.GetSize(); wordPos++) {
00127 if (phrase.GetFactor( wordPos+offset,0 )->Compare(*(xmlPhrase.GetFactor( wordPos,0 )))) {
00128 match = false;
00129 }
00130 }
00131 if (match) {
00132 return false;
00133 }
00134 }
00135 return true;
00136 }
00137 }
00138 return false;
00139 }
00140
00144 void TranslationOptionCollectionText::CreateXmlOptionsForRange(size_t startPos, size_t endPos)
00145 {
00146 Sentence const& source=static_cast<Sentence const&>(m_source);
00147 InputPath &inputPath = GetInputPath(startPos,endPos);
00148
00149 vector <TranslationOption*> xmlOptions;
00150 source.GetXmlTranslationOptions(xmlOptions,startPos,endPos);
00151
00152
00153 for(size_t i=0; i<xmlOptions.size(); i++) {
00154 TranslationOption *transOpt = xmlOptions[i];
00155 transOpt->SetInputPath(inputPath);
00156 Add(transOpt);
00157 }
00158
00159 };
00160
00161 InputPath &TranslationOptionCollectionText::GetInputPath(size_t startPos, size_t endPos)
00162 {
00163 size_t offset = endPos - startPos;
00164 assert(offset < m_inputPathMatrix[startPos].size());
00165 return *m_inputPathMatrix[startPos][offset];
00166 }
00167
00168 void TranslationOptionCollectionText::CreateTranslationOptions()
00169 {
00170 GetTargetPhraseCollectionBatch();
00171 TranslationOptionCollection::CreateTranslationOptions();
00172 }
00173
00182 bool
00183 TranslationOptionCollectionText::
00184 CreateTranslationOptionsForRange
00185 (const DecodeGraph &decodeGraph, size_t startPos, size_t endPos,
00186 bool adhereTableLimit, size_t graphInd)
00187 {
00188 InputPath &inputPath = GetInputPath(startPos, endPos);
00189
00190 return
00191 TranslationOptionCollection::
00192 CreateTranslationOptionsForRange
00193 (decodeGraph, startPos, endPos, adhereTableLimit, graphInd, inputPath);
00194 }
00195
00196
00197 }
00198