00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include <algorithm>
00020 #include <set>
00021 #include "AlignmentInfo.h"
00022 #include "TypeDef.h"
00023 #include "StaticData.h"
00024 #include "Util.h"
00025 #include "util/exception.hh"
00026
00027 namespace Moses
00028 {
00029
00030 AlignmentInfo::AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
00031 : m_collection(pairs)
00032 {
00033 BuildNonTermIndexMaps();
00034 }
00035
00036 AlignmentInfo::AlignmentInfo(const std::vector<unsigned char> &aln)
00037 {
00038 assert(aln.size()%2==0);
00039 for (size_t i = 0; i < aln.size(); i+= 2)
00040 m_collection.insert(std::make_pair(size_t(aln[i]),size_t(aln[i+1])));
00041 BuildNonTermIndexMaps();
00042 }
00043
00044 AlignmentInfo::AlignmentInfo(const std::string &str)
00045 {
00046 std::vector<std::string> points = Tokenize(str, " ");
00047 std::vector<std::string>::const_iterator iter;
00048 for (iter = points.begin(); iter != points.end(); iter++) {
00049 std::vector<size_t> point = Tokenize<size_t>(*iter, "-");
00050 UTIL_THROW_IF2(point.size() != 2, "Bad format of word alignment point: " << *iter);
00051 Add(point[0], point[1]);
00052 }
00053 }
00054
00055 void AlignmentInfo::BuildNonTermIndexMaps()
00056 {
00057 if (m_collection.empty()) {
00058 return;
00059 }
00060 const_iterator p = begin();
00061 size_t maxIndex = p->second;
00062 for (++p; p != end(); ++p) {
00063 if (p->second > maxIndex) {
00064 maxIndex = p->second;
00065 }
00066 }
00067 m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
00068 m_nonTermIndexMap2.resize(maxIndex+1, NOT_FOUND);
00069 size_t i = 0;
00070 for (p = begin(); p != end(); ++p) {
00071 if (m_nonTermIndexMap[p->second] != NOT_FOUND) {
00072
00073 m_nonTermIndexMap.clear();
00074 m_nonTermIndexMap2.clear();
00075 return;
00076 }
00077 m_nonTermIndexMap[p->second] = i++;
00078 m_nonTermIndexMap2[p->second] = p->first;
00079 }
00080 }
00081
00082 std::set<size_t> AlignmentInfo::GetAlignmentsForSource(size_t sourcePos) const
00083 {
00084 std::set<size_t> ret;
00085 CollType::const_iterator iter;
00086 for (iter = begin(); iter != end(); ++iter) {
00087
00088 if (iter->first == sourcePos) {
00089 ret.insert(iter->second);
00090 }
00091 }
00092 return ret;
00093 }
00094
00095 std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
00096 {
00097 std::set<size_t> ret;
00098 CollType::const_iterator iter;
00099 for (iter = begin(); iter != end(); ++iter) {
00100
00101 if (iter->second == targetPos) {
00102 ret.insert(iter->first);
00103 }
00104 }
00105 return ret;
00106 }
00107
00108
00109 bool
00110 compare_target(std::pair<size_t,size_t> const* a,
00111 std::pair<size_t,size_t> const* b)
00112 {
00113 if(a->second < b->second) return true;
00114 if(a->second == b->second) return (a->first < b->first);
00115 return false;
00116 }
00117
00118
00119 std::vector< const std::pair<size_t,size_t>* >
00120 AlignmentInfo::
00121 GetSortedAlignments(WordAlignmentSort SortOrder) const
00122 {
00123 std::vector< const std::pair<size_t,size_t>* > ret;
00124
00125 CollType::const_iterator iter;
00126 for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
00127 const std::pair<size_t,size_t> &alignPair = *iter;
00128 ret.push_back(&alignPair);
00129 }
00130
00131 switch (SortOrder) {
00132 case NoSort:
00133 break;
00134
00135 case TargetOrder:
00136 std::sort(ret.begin(), ret.end(), compare_target);
00137 break;
00138
00139 default:
00140 UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
00141 << SortOrder);
00142 }
00143
00144 return ret;
00145
00146 }
00147
00148 std::vector<size_t> AlignmentInfo::GetSourceIndex2PosMap() const
00149 {
00150 std::set<size_t> sourcePoses;
00151
00152 CollType::const_iterator iter;
00153 for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
00154 size_t sourcePos = iter->first;
00155 sourcePoses.insert(sourcePos);
00156 }
00157 std::vector<size_t> ret(sourcePoses.begin(), sourcePoses.end());
00158 return ret;
00159 }
00160
00161 std::ostream& operator<<(std::ostream &out, const AlignmentInfo &alignmentInfo)
00162 {
00163 AlignmentInfo::const_iterator iter;
00164 for (iter = alignmentInfo.begin(); iter != alignmentInfo.end(); ++iter) {
00165 out << iter->first << "-" << iter->second << " ";
00166 }
00167 return out;
00168 }
00169
00170 }