00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "PhraseOrientation.h"
00021
00022 #include <iostream>
00023 #include <sstream>
00024 #include <limits>
00025 #include <cassert>
00026
00027 #include <boost/assign/list_of.hpp>
00028
00029 namespace MosesTraining
00030 {
00031
00032 std::vector<float> PhraseOrientation::m_l2rOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0);
00033 std::vector<float> PhraseOrientation::m_r2lOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0);
00034
00035 PhraseOrientation::PhraseOrientation(int sourceSize,
00036 int targetSize,
00037 const Alignment &alignment)
00038 : m_countF(sourceSize)
00039 , m_countE(targetSize)
00040 {
00041
00042 std::vector<std::vector<int> > alignedToS;
00043 for(int i=0; i<m_countF; ++i) {
00044 std::vector< int > dummy;
00045 alignedToS.push_back(dummy);
00046 }
00047 for(int i=0; i<m_countE; ++i) {
00048 std::vector< int > dummy;
00049 m_alignedToT.push_back(dummy);
00050 }
00051 std::vector<int> alignedCountS(m_countF,0);
00052
00053 for (Alignment::const_iterator a=alignment.begin(); a!=alignment.end(); ++a) {
00054 alignedToS[a->first].push_back(a->second);
00055 alignedCountS[a->first]++;
00056 m_alignedToT[a->second].push_back(a->first);
00057 }
00058
00059 Init(sourceSize, targetSize, m_alignedToT, alignedToS, alignedCountS);
00060 }
00061
00062
00063 PhraseOrientation::PhraseOrientation(int sourceSize,
00064 int targetSize,
00065 const Moses::AlignmentInfo &alignTerm,
00066 const Moses::AlignmentInfo &alignNonTerm)
00067 : m_countF(sourceSize)
00068 , m_countE(targetSize)
00069 {
00070
00071 std::vector<std::vector<int> > alignedToS;
00072 for(int i=0; i<m_countF; ++i) {
00073 std::vector< int > dummy;
00074 alignedToS.push_back(dummy);
00075 }
00076 for(int i=0; i<m_countE; ++i) {
00077 std::vector< int > dummy;
00078 m_alignedToT.push_back(dummy);
00079 }
00080 std::vector<int> alignedCountS(m_countF,0);
00081
00082 for (Moses::AlignmentInfo::const_iterator it=alignTerm.begin();
00083 it!=alignTerm.end(); ++it) {
00084 alignedToS[it->first].push_back(it->second);
00085 alignedCountS[it->first]++;
00086 m_alignedToT[it->second].push_back(it->first);
00087 }
00088
00089 for (Moses::AlignmentInfo::const_iterator it=alignNonTerm.begin();
00090 it!=alignNonTerm.end(); ++it) {
00091 alignedToS[it->first].push_back(it->second);
00092 alignedCountS[it->first]++;
00093 m_alignedToT[it->second].push_back(it->first);
00094 }
00095
00096 Init(sourceSize, targetSize, m_alignedToT, alignedToS, alignedCountS);
00097 }
00098
00099 PhraseOrientation::PhraseOrientation(int sourceSize,
00100 int targetSize,
00101 const std::vector<std::vector<int> > &alignedToT,
00102 const std::vector<std::vector<int> > &alignedToS,
00103 const std::vector<int> &alignedCountS)
00104 : m_countF(sourceSize)
00105 , m_countE(targetSize)
00106 , m_alignedToT(alignedToT)
00107 {
00108 Init(sourceSize, targetSize, m_alignedToT, alignedToS, alignedCountS);
00109 }
00110
00111
00112 void PhraseOrientation::Init(int sourceSize,
00113 int targetSize,
00114 const std::vector<std::vector<int> > &alignedToT,
00115 const std::vector<std::vector<int> > &alignedToS,
00116 const std::vector<int> &alignedCountS)
00117 {
00118 for (int startF=0; startF<m_countF; ++startF) {
00119 for (int endF=startF; endF<m_countF; ++endF) {
00120
00121 int minE = std::numeric_limits<int>::max();
00122 int maxE = -1;
00123 for (int fi=startF; fi<=endF; ++fi) {
00124 for (size_t i=0; i<alignedToS[fi].size(); ++i) {
00125 int ei = alignedToS[fi][i];
00126 if (ei<minE) {
00127 minE = ei;
00128 }
00129 if (ei>maxE) {
00130 maxE = ei;
00131 }
00132 }
00133 }
00134
00135 m_minAndMaxAlignedToSourceSpan[ std::pair<int,int>(startF,endF) ] = std::pair<int,int>(minE,maxE);
00136 }
00137 }
00138
00139
00140
00141 for (int startE=0; startE<m_countE; ++startE) {
00142 for (int endE=startE; endE<m_countE; ++endE) {
00143
00144 int minF = std::numeric_limits<int>::max();
00145 int maxF = -1;
00146 std::vector< int > usedF = alignedCountS;
00147 for (int ei=startE; ei<=endE; ++ei) {
00148 for (size_t i=0; i<alignedToT[ei].size(); ++i) {
00149 int fi = alignedToT[ei][i];
00150 if (fi<minF) {
00151 minF = fi;
00152 }
00153 if (fi>maxF) {
00154 maxF = fi;
00155 }
00156 usedF[fi]--;
00157 }
00158 }
00159
00160 m_minAndMaxAlignedToTargetSpan[ std::pair<int,int>(startE,endE) ] = std::pair<int,int>(minF,maxF);
00161
00162 if (maxF >= 0) {
00163
00164
00165 bool out_of_bounds = false;
00166 for (int fi=minF; fi<=maxF && !out_of_bounds; ++fi)
00167 if (usedF[fi]>0) {
00168
00169 out_of_bounds = true;
00170 }
00171
00172
00173 if (!out_of_bounds) {
00174
00175 for (int startF=minF;
00176 (startF>=0 &&
00177 (startF==minF || alignedCountS[startF]==0));
00178 startF--) {
00179
00180 for (int endF=maxF;
00181 (endF<m_countF &&
00182 (endF==maxF || alignedCountS[endF]==0));
00183 endF++) {
00184
00185 InsertPhraseVertices(m_topLeft, m_topRight, m_bottomLeft, m_bottomRight,
00186 startF, startE, endF, endE);
00187 }
00188 }
00189 }
00190 }
00191 }
00192 }
00193 }
00194
00195
00196 void PhraseOrientation::InsertVertex( HSentenceVertices & corners, int x, int y )
00197 {
00198 std::set<int> tmp;
00199 tmp.insert(x);
00200 std::pair< HSentenceVertices::iterator, bool > ret = corners.insert( std::pair<int, std::set<int> > (y, tmp) );
00201 if (ret.second == false) {
00202 ret.first->second.insert(x);
00203 }
00204 }
00205
00206
00207 void PhraseOrientation::InsertPhraseVertices(HSentenceVertices & topLeft,
00208 HSentenceVertices & topRight,
00209 HSentenceVertices & bottomLeft,
00210 HSentenceVertices & bottomRight,
00211 int startF, int startE, int endF, int endE)
00212 {
00213
00214 InsertVertex(topLeft, startF, startE);
00215 InsertVertex(topRight, endF, startE);
00216 InsertVertex(bottomLeft, startF, endE);
00217 InsertVertex(bottomRight, endF, endE);
00218 }
00219
00220
00221 const std::string PhraseOrientation::GetOrientationInfoString(int startF, int endF, REO_DIR direction) const
00222 {
00223 boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator foundMinMax
00224 = m_minAndMaxAlignedToSourceSpan.find( std::pair<int,int>(startF,endF) );
00225
00226 if ( foundMinMax != m_minAndMaxAlignedToSourceSpan.end() ) {
00227 int startE = (foundMinMax->second).first;
00228 int endE = (foundMinMax->second).second;
00229
00230
00231
00232
00233
00234
00235 return GetOrientationInfoString(startF, startE, endF, endE, direction);
00236 } else {
00237 std::cerr << "PhraseOrientation::GetOrientationInfoString(): Error: not able to determine phrase orientation" << std::endl;
00238 std::exit(1);
00239 }
00240 }
00241
00242
00243 const std::string PhraseOrientation::GetOrientationInfoString(int startF, int startE, int endF, int endE, REO_DIR direction) const
00244 {
00245 REO_CLASS hierPrevOrient=REO_CLASS_UNKNOWN, hierNextOrient=REO_CLASS_UNKNOWN;
00246
00247 if ( direction == REO_DIR_L2R || direction == REO_DIR_BIDIR )
00248 hierPrevOrient = GetOrientationInfo(startF, startE, endF, endE, REO_DIR_L2R);
00249
00250 if ( direction == REO_DIR_R2L || direction == REO_DIR_BIDIR )
00251 hierNextOrient = GetOrientationInfo(startF, startE, endF, endE, REO_DIR_R2L);
00252
00253 switch (direction) {
00254 case REO_DIR_L2R:
00255 return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR);
00256 break;
00257 case REO_DIR_R2L:
00258 return GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
00259 break;
00260 case REO_DIR_BIDIR:
00261 return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
00262 break;
00263 default:
00264 return GetOrientationString(hierPrevOrient, REO_MODEL_TYPE_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MODEL_TYPE_MSLR);
00265 break;
00266 }
00267 return "PhraseOrientationERROR";
00268 }
00269
00270
00271 PhraseOrientation::REO_CLASS PhraseOrientation::GetOrientationInfo(int startF, int endF, REO_DIR direction) const
00272 {
00273 boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator foundMinMax
00274 = m_minAndMaxAlignedToSourceSpan.find( std::pair<int,int>(startF,endF) );
00275
00276 if ( foundMinMax != m_minAndMaxAlignedToSourceSpan.end() ) {
00277 int startE = (foundMinMax->second).first;
00278 int endE = (foundMinMax->second).second;
00279
00280
00281
00282
00283
00284
00285 return GetOrientationInfo(startF, startE, endF, endE, direction);
00286 } else {
00287 std::cerr << "PhraseOrientation::GetOrientationInfo(): Error: not able to determine phrase orientation" << std::endl;
00288 std::exit(1);
00289 }
00290 }
00291
00292
00293 PhraseOrientation::REO_CLASS PhraseOrientation::GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const
00294 {
00295 if ( direction != REO_DIR_L2R && direction != REO_DIR_R2L ) {
00296 std::cerr << "PhraseOrientation::GetOrientationInfo(): Error: direction should be either L2R or R2L" << std::endl;
00297 std::exit(1);
00298 }
00299
00300 if ( direction == REO_DIR_L2R )
00301 return GetOrientHierModel(REO_MODEL_TYPE_MSLR,
00302 startF, endF, startE, endE, m_countF-1, 0, 0, 1,
00303 &ge, &le,
00304 m_bottomRight, m_bottomLeft);
00305
00306 if ( direction == REO_DIR_R2L )
00307 return GetOrientHierModel(REO_MODEL_TYPE_MSLR,
00308 endF, startF, endE, startE, 0, m_countF-1, m_countE-1, -1,
00309 &le, &ge,
00310 m_topLeft, m_topRight);
00311
00312 return REO_CLASS_UNKNOWN;
00313 }
00314
00315
00316
00317 PhraseOrientation::REO_CLASS PhraseOrientation::GetOrientHierModel(REO_MODEL_TYPE modelType,
00318 int startF, int endF, int startE, int endE, int countF, int zeroF, int zeroE, int unit,
00319 bool (*ge)(int, int), bool (*le)(int, int),
00320 const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const
00321 {
00322 bool leftSourceSpanIsAligned = ( (startF != zeroF) && SourceSpanIsAligned(zeroF,startF-unit) );
00323 bool topTargetSpanIsAligned = ( (startE != zeroE) && TargetSpanIsAligned(zeroE,startE-unit) );
00324
00325 if (!topTargetSpanIsAligned && !leftSourceSpanIsAligned)
00326 return REO_CLASS_LEFT;
00327
00328 HSentenceVertices::const_iterator it;
00329
00330 if (
00331 ((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
00332 it->second.find(startF-unit) != it->second.end()))
00333 return REO_CLASS_LEFT;
00334
00335 if (modelType == REO_MODEL_TYPE_MONO)
00336 return REO_CLASS_UNKNOWN;
00337
00338 if (
00339 ((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
00340 it->second.find(endF + unit) != it->second.end()))
00341 return REO_CLASS_RIGHT;
00342
00343 if (modelType == REO_MODEL_TYPE_MSD)
00344 return REO_CLASS_UNKNOWN;
00345
00346 for (int indexF=startF-2*unit; (*ge)(indexF, zeroF); indexF=indexF-unit) {
00347 if ((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
00348 it->second.find(indexF) != it->second.end())
00349 return REO_CLASS_DLEFT;
00350 }
00351
00352 for (int indexF=endF+2*unit; (*le)(indexF, countF); indexF=indexF+unit) {
00353 if ((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
00354 it->second.find(indexF) != it->second.end())
00355 return REO_CLASS_DRIGHT;
00356 }
00357
00358 return REO_CLASS_UNKNOWN;
00359 }
00360
00361 bool PhraseOrientation::SourceSpanIsAligned(int index1, int index2) const
00362 {
00363 return SpanIsAligned(index1, index2, m_minAndMaxAlignedToSourceSpan);
00364 }
00365
00366 bool PhraseOrientation::TargetSpanIsAligned(int index1, int index2) const
00367 {
00368 return SpanIsAligned(index1, index2, m_minAndMaxAlignedToTargetSpan);
00369 }
00370
00371 bool PhraseOrientation::SpanIsAligned(int index1, int index2, const boost::unordered_map< std::pair<int,int> , std::pair<int,int> > &minAndMaxAligned) const
00372 {
00373 boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator itMinAndMaxAligned =
00374 minAndMaxAligned.find(std::pair<int,int>(std::min(index1,index2),std::max(index1,index2)));
00375
00376 if (itMinAndMaxAligned == minAndMaxAligned.end()) {
00377 std::cerr << "PhraseOrientation::SourceSpanIsAligned(): Error" << std::endl;
00378 std::exit(1);
00379 } else {
00380 if (itMinAndMaxAligned->second.first == std::numeric_limits<int>::max()) {
00381 return false;
00382 }
00383 }
00384 return true;
00385 }
00386
00387
00388 const std::string PhraseOrientation::GetOrientationString(const REO_CLASS orient, const REO_MODEL_TYPE modelType)
00389 {
00390 std::ostringstream oss;
00391 WriteOrientation(oss, orient, modelType);
00392 return oss.str();
00393 }
00394
00395
00396 void PhraseOrientation::WriteOrientation(std::ostream& out, const REO_CLASS orient, const REO_MODEL_TYPE modelType)
00397 {
00398 switch(orient) {
00399 case REO_CLASS_LEFT:
00400 out << "mono";
00401 break;
00402 case REO_CLASS_RIGHT:
00403 out << "swap";
00404 break;
00405 case REO_CLASS_DLEFT:
00406 out << "dleft";
00407 break;
00408 case REO_CLASS_DRIGHT:
00409 out << "dright";
00410 break;
00411 case REO_CLASS_UNKNOWN:
00412 switch(modelType) {
00413 case REO_MODEL_TYPE_MONO:
00414 out << "nomono";
00415 break;
00416 case REO_MODEL_TYPE_MSD:
00417 out << "other";
00418 break;
00419 case REO_MODEL_TYPE_MSLR:
00420 out << "dleft";
00421 break;
00422 }
00423 break;
00424 }
00425 }
00426
00427
00428 bool PhraseOrientation::IsAligned(int fi, int ei) const
00429 {
00430 if (ei == -1 && fi == -1)
00431 return true;
00432
00433 if (ei <= -1 || fi <= -1)
00434 return false;
00435
00436 if (ei == m_countE && fi == m_countF)
00437 return true;
00438
00439 if (ei >= m_countE || fi >= m_countF)
00440 return false;
00441
00442 for (size_t i=0; i<m_alignedToT[ei].size(); ++i)
00443 if (m_alignedToT[ei][i] == fi)
00444 return true;
00445
00446 return false;
00447 }
00448
00449
00450 void PhraseOrientation::IncrementPriorCount(REO_DIR direction, REO_CLASS orient, float increment)
00451 {
00452 assert(direction==REO_DIR_L2R || direction==REO_DIR_R2L);
00453 if (direction == REO_DIR_L2R) {
00454 m_l2rOrientationPriorCounts[orient] += increment;
00455 } else if (direction == REO_DIR_R2L) {
00456 m_r2lOrientationPriorCounts[orient] += increment;
00457 }
00458 }
00459
00460
00461 void PhraseOrientation::WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE modelType)
00462 {
00463 std::map<std::string,float> l2rOrientationPriorCountsMap;
00464 std::map<std::string,float> r2lOrientationPriorCountsMap;
00465 for (int orient=0; orient<=REO_CLASS_UNKNOWN; ++orient) {
00466 l2rOrientationPriorCountsMap[GetOrientationString((REO_CLASS)orient, modelType)] += m_l2rOrientationPriorCounts[orient];
00467 }
00468 for (int orient=0; orient<=REO_CLASS_UNKNOWN; ++orient) {
00469 r2lOrientationPriorCountsMap[GetOrientationString((REO_CLASS)orient, modelType)] += m_r2lOrientationPriorCounts[orient];
00470 }
00471 for (std::map<std::string,float>::const_iterator l2rOrientationPriorCountsMapIt = l2rOrientationPriorCountsMap.begin();
00472 l2rOrientationPriorCountsMapIt != l2rOrientationPriorCountsMap.end(); ++l2rOrientationPriorCountsMapIt) {
00473 out << "L2R_" << l2rOrientationPriorCountsMapIt->first << " " << l2rOrientationPriorCountsMapIt->second << std::endl;
00474 }
00475 for (std::map<std::string,float>::const_iterator r2lOrientationPriorCountsMapIt = r2lOrientationPriorCountsMap.begin();
00476 r2lOrientationPriorCountsMapIt != r2lOrientationPriorCountsMap.end(); ++r2lOrientationPriorCountsMapIt) {
00477 out << "R2L_" << r2lOrientationPriorCountsMapIt->first << " " << r2lOrientationPriorCountsMapIt->second << std::endl;
00478 }
00479 }
00480
00481 }