00001 #include "GlueRuleSynthesizer.h" 00002 00003 #include <sstream> 00004 00005 #include "moses/FF/UnknownWordPenaltyProducer.h" 00006 #include <boost/scoped_ptr.hpp> 00007 00008 namespace Moses 00009 { 00010 namespace Syntax 00011 { 00012 namespace T2S 00013 { 00014 00015 void 00016 GlueRuleSynthesizer:: 00017 SynthesizeRule(const InputTree::Node &node) 00018 { 00019 const Word &sourceLhs = node.pvertex.symbol; 00020 boost::scoped_ptr<Phrase> sourceRhs(SynthesizeSourcePhrase(node)); 00021 TargetPhrase *tp = SynthesizeTargetPhrase(node, *sourceRhs); 00022 TargetPhraseCollection::shared_ptr tpc 00023 = GetOrCreateTargetPhraseCollection(m_ruleTrie, sourceLhs, *sourceRhs); 00024 tpc->Add(tp); 00025 } 00026 00027 Phrase* 00028 GlueRuleSynthesizer:: 00029 SynthesizeSourcePhrase(const InputTree::Node &node) 00030 { 00031 Phrase *phrase = new Phrase(node.children.size()); 00032 for (std::vector<InputTree::Node*>::const_iterator p = node.children.begin(); 00033 p != node.children.end(); ++p) { 00034 phrase->AddWord((*p)->pvertex.symbol); 00035 } 00036 /* 00037 TODO What counts as an OOV? 00038 phrase->AddWord() = sourceWord; 00039 phrase->GetWord(0).SetIsOOV(true); 00040 */ 00041 return phrase; 00042 } 00043 00044 TargetPhrase* 00045 GlueRuleSynthesizer:: 00046 SynthesizeTargetPhrase(const InputTree::Node &node, const Phrase &sourceRhs) 00047 { 00048 const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = 00049 UnknownWordPenaltyProducer::Instance(); 00050 00051 TargetPhrase *targetPhrase = new TargetPhrase(); 00052 00053 util::StringStream alignmentSS; 00054 for (std::size_t i = 0; i < node.children.size(); ++i) { 00055 const Word &symbol = node.children[i]->pvertex.symbol; 00056 if (symbol.IsNonTerminal()) { 00057 targetPhrase->AddWord(m_output_default_nonterminal); 00058 } else { 00059 // TODO Check this 00060 Word &targetWord = targetPhrase->AddWord(); 00061 targetWord.CreateUnknownWord(symbol); 00062 } 00063 alignmentSS << i << "-" << i << " "; 00064 } 00065 00066 // Assign the lowest possible score so that glue rules are only used when 00067 // absolutely required. 00068 float score = LOWEST_SCORE; 00069 targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score); 00070 targetPhrase->EvaluateInIsolation(sourceRhs); 00071 Word *targetLhs = new Word(m_output_default_nonterminal); 00072 targetPhrase->SetTargetLHS(targetLhs); 00073 targetPhrase->SetAlignmentInfo(alignmentSS.str()); 00074 00075 return targetPhrase; 00076 } 00077 00078 } // T2S 00079 } // Syntax 00080 } // Moses