00001 #include "InternalTree.h"
00002 #include "moses/StaticData.h"
00003
00004 namespace Moses
00005 {
00006
00007 InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool nonterminal)
00008 {
00009
00010 std::vector<FactorType> const& oFactors
00011 = StaticData::Instance().options()->output.factor_order;
00012 if (len > 0) {
00013 m_value.CreateFromString(Output, oFactors, StringPiece(line).substr(start, len),
00014 nonterminal);
00015 }
00016 }
00017
00018 InternalTree::InternalTree(const std::string & line, const bool nonterminal)
00019 {
00020
00021 size_t found = line.find_first_of("[] ");
00022
00023 if (found == line.npos) {
00024 m_value.CreateFromString(Output,
00025 StaticData::Instance().options()->output.factor_order,
00026 line, nonterminal);
00027 } else {
00028 AddSubTree(line, 0);
00029 }
00030 }
00031
00032 size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
00033 {
00034
00035 char token = 0;
00036 size_t len = 0;
00037 bool has_value = false;
00038
00039 while (token != ']' && pos != std::string::npos) {
00040 size_t oldpos = pos;
00041 pos = line.find_first_of("[] ", pos);
00042 if (pos == std::string::npos) break;
00043 token = line[pos];
00044 len = pos-oldpos;
00045
00046 if (token == '[') {
00047 if (has_value) {
00048 m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
00049 pos = m_children.back()->AddSubTree(line, pos+1);
00050 } else {
00051 if (len > 0) {
00052 m_value.CreateFromString(Output,
00053 StaticData::Instance().options()->output.factor_order,
00054 StringPiece(line).substr(oldpos, len), false);
00055 has_value = true;
00056 }
00057 pos = AddSubTree(line, pos+1);
00058 }
00059 } else if (token == ' ' || token == ']') {
00060 if (len > 0 && !has_value) {
00061 m_value.CreateFromString(Output,
00062 StaticData::Instance().options()->output.factor_order,
00063 StringPiece(line).substr(oldpos, len), true);
00064 has_value = true;
00065 } else if (len > 0) {
00066 m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
00067 }
00068 if (token == ' ') {
00069 pos++;
00070 }
00071 }
00072 }
00073
00074 if (pos == std::string::npos) {
00075 return line.size();
00076 }
00077 return std::min(line.size(),pos+1);
00078
00079 }
00080
00081 std::string InternalTree::GetString(bool start) const
00082 {
00083
00084 std::string ret = "";
00085 if (!start) {
00086 ret += " ";
00087 }
00088
00089 if (!IsTerminal()) {
00090 ret += "[";
00091 }
00092
00093 ret += m_value.GetString(StaticData::Instance().options()->output.factor_order, false);
00094 for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
00095 ret += (*it)->GetString(false);
00096 }
00097
00098 if (!IsTerminal()) {
00099 ret += "]";
00100 }
00101 return ret;
00102
00103 }
00104
00105
00106 void InternalTree::Combine(const std::vector<TreePointer> &previous)
00107 {
00108
00109 std::vector<TreePointer>::iterator it;
00110 bool found = false;
00111 leafNT next_leafNT(this);
00112 for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
00113 found = next_leafNT(it);
00114 if (found) {
00115 *it = *it_prev;
00116 } else {
00117 std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
00118 }
00119 }
00120 }
00121
00122
00123 void InternalTree::Unbinarize()
00124 {
00125
00126
00127 if (m_value.GetString(0).empty() || m_value.GetString(0).as_string()[0] == '^') {
00128 return;
00129 }
00130
00131
00132 for (std::vector<TreePointer>::iterator it = m_children.begin(); it != m_children.end(); ++it) {
00133 if (!(*it)->IsTerminal() && (*it)->GetLabel().GetString(0).as_string()[0] == '^') {
00134 std::vector<TreePointer> new_children;
00135 GetUnbinarizedChildren(new_children);
00136 m_children = new_children;
00137 break;
00138 }
00139 }
00140
00141
00142 for (std::vector<TreePointer>::iterator it = m_children.begin(); it != m_children.end(); ++it) {
00143 (*it)->Unbinarize();
00144 }
00145 }
00146
00147
00148 void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
00149 {
00150 for (std::vector<TreePointer>::const_iterator itx = m_children.begin(); itx != m_children.end(); ++itx) {
00151 const StringPiece label = (*itx)->GetLabel().GetString(0);
00152 if (!label.empty() && label.as_string()[0] == '^') {
00153 (*itx)->GetUnbinarizedChildren(ret);
00154 } else {
00155 ret.push_back(*itx);
00156 }
00157 }
00158 }
00159
00160 bool InternalTree::FlatSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const
00161 {
00162 for (it = m_children.begin(); it != m_children.end(); ++it) {
00163 if ((*it)->GetLabel() == label) {
00164 return true;
00165 }
00166 }
00167 return false;
00168 }
00169
00170 bool InternalTree::RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const
00171 {
00172 for (it = m_children.begin(); it != m_children.end(); ++it) {
00173 if ((*it)->GetLabel() == label) {
00174 return true;
00175 }
00176 std::vector<TreePointer>::const_iterator it2;
00177 if ((*it)->RecursiveSearch(label, it2)) {
00178 it = it2;
00179 return true;
00180 }
00181 }
00182 return false;
00183 }
00184
00185 bool InternalTree::RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
00186 {
00187 for (it = m_children.begin(); it != m_children.end(); ++it) {
00188 if ((*it)->GetLabel() == label) {
00189 parent = this;
00190 return true;
00191 }
00192 std::vector<TreePointer>::const_iterator it2;
00193 if ((*it)->RecursiveSearch(label, it2, parent)) {
00194 it = it2;
00195 return true;
00196 }
00197 }
00198 return false;
00199 }
00200
00201 }