00001 #include "StaticData.h"
00002 #include "WordLattice.h"
00003 #include "PCNTools.h"
00004 #include "Util.h"
00005 #include "FloydWarshall.h"
00006
00007 namespace Moses
00008 {
00009 WordLattice::WordLattice() {}
00010
00011 size_t WordLattice::GetColumnIncrement(size_t i, size_t j) const
00012 {
00013 return next_nodes[i][j];
00014 }
00015
00016 void WordLattice::Print(std::ostream& out) const
00017 {
00018 out<<"word lattice: "<<data.size()<<"\n";
00019 for(size_t i=0; i<data.size(); ++i) {
00020 out<<i<<" -- ";
00021 for(size_t j=0; j<data[i].size(); ++j) {
00022 out<<"("<<data[i][j].first.ToString()<<", ";
00023 for(std::vector<float>::const_iterator scoreIterator = data[i][j].second.begin(); scoreIterator<data[i][j].second.end(); scoreIterator++) {
00024 out<<*scoreIterator<<", ";
00025 }
00026 out << GetColumnIncrement(i,j) << ") ";
00027 }
00028
00029 out<<"\n";
00030 }
00031 out<<"\n\n";
00032 }
00033
00034 int WordLattice::InitializeFromPCNDataType(const PCN::CN& cn, const std::vector<FactorType>& factorOrder, const std::string& debug_line)
00035 {
00036 size_t numLinkParams = StaticData::Instance().GetNumLinkParams();
00037 size_t numLinkWeights = StaticData::Instance().GetNumInputScores();
00038 size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
00039
00040
00041 bool addRealWordCount = ((numLinkParams + 1) == numLinkWeights);
00042 data.resize(cn.size());
00043 next_nodes.resize(cn.size());
00044 for(size_t i=0; i<cn.size(); ++i) {
00045 const PCN::CNCol& col = cn[i];
00046 if (col.empty()) return false;
00047 data[i].resize(col.size());
00048 next_nodes[i].resize(col.size());
00049 for (size_t j=0; j<col.size(); ++j) {
00050 const PCN::CNAlt& alt = col[j];
00051
00052
00053
00054 if (alt.first.second.size() != numLinkParams) {
00055 TRACE_ERR("ERROR: need " << numLinkParams << " link parameters, found " << alt.first.second.size() << " while reading column " << i << " from " << debug_line << "\n");
00056 return false;
00057 }
00058
00059
00060 std::vector<float>::const_iterator probsIterator;
00061 data[i][j].second = std::vector<float>(0);
00062 for(probsIterator = alt.first.second.begin(); probsIterator < alt.first.second.end(); probsIterator++) {
00063 IFVERBOSE(1) {
00064 if (*probsIterator < 0.0f) {
00065 TRACE_ERR("WARN: neg probability: " << *probsIterator << "\n");
00066
00067 }
00068 if (*probsIterator > 1.0f) {
00069 TRACE_ERR("WARN: probability > 1: " << *probsIterator << "\n");
00070
00071 }
00072 }
00073 data[i][j].second.push_back(std::max(static_cast<float>(log(*probsIterator)), LOWEST_SCORE));
00074 }
00075
00076 if (addRealWordCount) {
00077
00078 float value = (alt.first.first=="" || alt.first.first==EPSILON) ? 0.0f : -1.0f;
00079 data[i][j].second.push_back(value);
00080 }
00081 String2Word(alt.first.first,data[i][j].first,factorOrder);
00082 next_nodes[i][j] = alt.second;
00083
00084 if(next_nodes[i][j] > maxSizePhrase) {
00085 TRACE_ERR("ERROR: Jump length " << next_nodes[i][j] << " in word lattice exceeds maximum phrase length " << maxSizePhrase << ".\n");
00086 TRACE_ERR("ERROR: Increase max-phrase-length to process this lattice.\n");
00087 return false;
00088 }
00089 }
00090 }
00091 if (!cn.empty()) {
00092 std::vector<std::vector<bool> > edges(0);
00093 this->GetAsEdgeMatrix(edges);
00094 floyd_warshall(edges,distances);
00095
00096 IFVERBOSE(2) {
00097 TRACE_ERR("Shortest paths:\n");
00098 for (size_t i=0; i<edges.size(); ++i) {
00099 for (size_t j=0; j<edges.size(); ++j) {
00100 int d = distances[i][j];
00101 if (d > 99999) {
00102 d=-1;
00103 }
00104 TRACE_ERR("\t" << d);
00105 }
00106 TRACE_ERR("\n");
00107 }
00108 }
00109 }
00110 return !cn.empty();
00111 }
00112
00113 int WordLattice::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
00114 {
00115 Clear();
00116 std::string line;
00117 if(!getline(in,line)) return 0;
00118 std::map<std::string, std::string> meta=ProcessAndStripSGML(line);
00119 if (meta.find("id") != meta.end()) {
00120 this->SetTranslationId(atol(meta["id"].c_str()));
00121 }
00122
00123 PCN::CN cn = PCN::parsePCN(line);
00124 return InitializeFromPCNDataType(cn, factorOrder, line);
00125 }
00126
00127 void WordLattice::GetAsEdgeMatrix(std::vector<std::vector<bool> >& edges) const
00128 {
00129 edges.resize(data.size()+1,std::vector<bool>(data.size()+1, false));
00130 for (size_t i=0; i<data.size(); ++i) {
00131 for (size_t j=0; j<data[i].size(); ++j) {
00132 edges[i][i+next_nodes[i][j]] = true;
00133 }
00134 }
00135 }
00136
00137 int WordLattice::ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const
00138 {
00139 int result;
00140
00141 if (prev.GetStartPos() == NOT_FOUND && current.GetStartPos() == 0) {
00142 result = 0;
00143
00144 VERBOSE(4, "Word lattice distortion: monotonic initial step\n");
00145 } else if (prev.GetEndPos()+1 == current.GetStartPos()) {
00146 result = 0;
00147
00148 VERBOSE(4, "Word lattice distortion: monotonic step from " << prev.GetEndPos() << " to " << current.GetStartPos() << "\n");
00149 } else if (prev.GetStartPos() == NOT_FOUND) {
00150 result = distances[0][current.GetStartPos()];
00151
00152 VERBOSE(4, "Word lattice distortion: initial step from 0 to " << current.GetStartPos() << " of length " << result << "\n");
00153 if (result < 0 || result > 99999) {
00154 TRACE_ERR("prev: " << prev << "\ncurrent: " << current << "\n");
00155 TRACE_ERR("A: got a weird distance from 0 to " << (current.GetStartPos()+1) << " of " << result << "\n");
00156 }
00157 } else if (prev.GetEndPos() > current.GetStartPos()) {
00158 result = distances[current.GetStartPos()][prev.GetEndPos() + 1];
00159
00160 VERBOSE(4, "Word lattice distortion: backward step from " << (prev.GetEndPos()+1) << " to " << current.GetStartPos() << " of length " << result << "\n");
00161 if (result < 0 || result > 99999) {
00162 TRACE_ERR("prev: " << prev << "\ncurrent: " << current << "\n");
00163 TRACE_ERR("B: got a weird distance from "<< current.GetStartPos() << " to " << prev.GetEndPos()+1 << " of " << result << "\n");
00164 }
00165 } else {
00166 result = distances[prev.GetEndPos() + 1][current.GetStartPos()];
00167
00168 VERBOSE(4, "Word lattice distortion: forward step from " << (prev.GetEndPos()+1) << " to " << current.GetStartPos() << " of length " << result << "\n");
00169 if (result < 0 || result > 99999) {
00170 TRACE_ERR("prev: " << prev << "\ncurrent: " << current << "\n");
00171 TRACE_ERR("C: got a weird distance from "<< prev.GetEndPos()+1 << " to " << current.GetStartPos() << " of " << result << "\n");
00172 }
00173 }
00174
00175 return result;
00176 }
00177
00178 bool WordLattice::CanIGetFromAToB(size_t start, size_t end) const
00179 {
00180
00181 return distances[start][end] < 100000;
00182 }
00183
00184
00185 }
00186