00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "XmlTreeParser.h"
00021
00022 #include "ParseTree.h"
00023 #include "tables-core.h"
00024 #include "XmlException.h"
00025 #include "XmlTree.h"
00026
00027 #include <cassert>
00028 #include <vector>
00029
00030 using namespace MosesTraining;
00031
00032 namespace Moses
00033 {
00034 namespace GHKM
00035 {
00036
00037 XmlTreeParser::XmlTreeParser(std::set<std::string> &labelSet,
00038 std::map<std::string, int> &topLabelSet)
00039 : m_labelSet(labelSet)
00040 , m_topLabelSet(topLabelSet)
00041 {
00042 }
00043
00044 std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line)
00045 {
00046 m_line = line;
00047 m_tree.Clear();
00048 try {
00049 if (!ProcessAndStripXMLTags(m_line, m_tree, m_labelSet, m_topLabelSet,
00050 false)) {
00051 throw Exception("");
00052 }
00053 } catch (const XmlException &e) {
00054 throw Exception(e.getMsg());
00055 }
00056 m_tree.ConnectNodes();
00057 SyntaxNode *root = m_tree.GetTop();
00058 assert(root);
00059 m_words = tokenize(m_line.c_str());
00060 return ConvertTree(*root, m_words);
00061 }
00062
00063
00064 std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
00065 const SyntaxNode &tree,
00066 const std::vector<std::string> &words)
00067 {
00068 std::auto_ptr<ParseTree> root(new ParseTree(tree.GetLabel()));
00069 root->SetPcfgScore(tree.GetPcfgScore());
00070 const std::vector<SyntaxNode*> &children = tree.GetChildren();
00071 if (children.empty()) {
00072 if (tree.GetStart() != tree.GetEnd()) {
00073 std::ostringstream msg;
00074 msg << "leaf node covers multiple words (" << tree.GetStart()
00075 << "-" << tree.GetEnd() << "): this is currently unsupported";
00076 throw Exception(msg.str());
00077 }
00078 std::auto_ptr<ParseTree> leaf(new ParseTree(words[tree.GetStart()]));
00079 leaf->SetParent(root.get());
00080 root->AddChild(leaf.release());
00081 } else {
00082 for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
00083 p != children.end(); ++p) {
00084 assert(*p);
00085 std::auto_ptr<ParseTree> child = ConvertTree(**p, words);
00086 child->SetParent(root.get());
00087 root->AddChild(child.release());
00088 }
00089 }
00090 return root;
00091 }
00092
00093 }
00094 }