00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef moses_WordsBitmap_h
00023 #define moses_WordsBitmap_h
00024
00025 #include <algorithm>
00026 #include <limits>
00027 #include <vector>
00028 #include <iostream>
00029 #include <cstring>
00030 #include <cmath>
00031 #include <cstdlib>
00032 #include "TypeDef.h"
00033 #include "WordsRange.h"
00034
00035 namespace Moses
00036 {
00037 typedef unsigned long WordsBitmapID;
00038
00050 class WordsBitmap
00051 {
00052 friend std::ostream& operator<<(std::ostream& out, const WordsBitmap& wordsBitmap);
00053 private:
00054 std::vector<char> m_bitmap;
00055 size_t m_firstGap;
00056
00057 WordsBitmap();
00058 WordsBitmap& operator= (const WordsBitmap& other);
00059
00061 void UpdateFirstGap(size_t startPos, size_t endPos, bool value) {
00062 if (value) {
00063
00064 if (startPos <= m_firstGap && m_firstGap <= endPos) {
00065 m_firstGap = NOT_FOUND;
00066 for (size_t i = endPos + 1 ; i < m_bitmap.size(); ++i) {
00067 if (!m_bitmap[i]) {
00068 m_firstGap = i;
00069 break;
00070 }
00071 }
00072 }
00073
00074 } else {
00075
00076 if (startPos < m_firstGap) {
00077 m_firstGap = startPos;
00078 }
00079 }
00080 }
00081
00082
00083 public:
00085 WordsBitmap(size_t size, const std::vector<bool>& initializer)
00086 :m_bitmap(initializer.begin(), initializer.end()), m_firstGap(0) {
00087
00088
00089
00090 m_bitmap.resize(size, false);
00091
00092
00093 std::vector<char>::const_iterator first_gap = std::find(
00094 m_bitmap.begin(), m_bitmap.end(), false);
00095 m_firstGap = (
00096 (first_gap == m_bitmap.end()) ?
00097 NOT_FOUND : first_gap - m_bitmap.begin());
00098 }
00099
00101 WordsBitmap(size_t size)
00102 :m_bitmap(size, false), m_firstGap(0) {
00103 }
00104
00106 WordsBitmap(const WordsBitmap ©)
00107 :m_bitmap(copy.m_bitmap), m_firstGap(copy.m_firstGap) {
00108 }
00109
00111 size_t GetNumWordsCovered() const {
00112 return std::count(m_bitmap.begin(), m_bitmap.end(), true);
00113 }
00114
00116 size_t GetFirstGapPos() const {
00117 return m_firstGap;
00118 }
00119
00120
00122 size_t GetLastGapPos() const {
00123 for (int pos = int(m_bitmap.size()) - 1 ; pos >= 0 ; pos--) {
00124 if (!m_bitmap[pos]) {
00125 return pos;
00126 }
00127 }
00128
00129 return NOT_FOUND;
00130 }
00131
00132
00134 size_t GetLastPos() const {
00135 for (int pos = int(m_bitmap.size()) - 1 ; pos >= 0 ; pos--) {
00136 if (m_bitmap[pos]) {
00137 return pos;
00138 }
00139 }
00140
00141 return NOT_FOUND;
00142 }
00143
00144 bool IsAdjacent(size_t startPos, size_t endPos) const;
00145
00147 bool GetValue(size_t pos) const {
00148 return bool(m_bitmap[pos]);
00149 }
00151 void SetValue( size_t pos, bool value ) {
00152 m_bitmap[pos] = value;
00153 UpdateFirstGap(pos, pos, value);
00154 }
00156 void
00157 SetValue( size_t startPos, size_t endPos, bool value ) {
00158 for(size_t pos = startPos ; pos <= endPos ; pos++) {
00159 m_bitmap[pos] = value;
00160 }
00161 UpdateFirstGap(startPos, endPos, value);
00162 }
00163
00164 void
00165 SetValue(WordsRange const& range, bool val) {
00166 SetValue(range.GetStartPos(), range.GetEndPos(), val);
00167 }
00168
00170 bool IsComplete() const {
00171 return GetSize() == GetNumWordsCovered();
00172 }
00174 bool Overlap(const WordsRange &compare) const {
00175 for (size_t pos = compare.GetStartPos() ; pos <= compare.GetEndPos() ; pos++) {
00176 if (m_bitmap[pos])
00177 return true;
00178 }
00179 return false;
00180 }
00182 size_t GetSize() const {
00183 return m_bitmap.size();
00184 }
00185
00186 inline size_t GetEdgeToTheLeftOf(size_t l) const {
00187 if (l == 0) return l;
00188 while (l && !m_bitmap[l-1]) {
00189 --l;
00190 }
00191 return l;
00192 }
00193
00194 inline size_t GetEdgeToTheRightOf(size_t r) const {
00195 if (r+1 == m_bitmap.size()) return r;
00196 return (
00197 std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) -
00198 m_bitmap.begin()
00199 ) - 1;
00200 }
00201
00202
00204 WordsBitmapID GetID() const {
00205 assert(m_bitmap.size() < (1<<16));
00206
00207 size_t start = GetFirstGapPos();
00208 if (start == NOT_FOUND) start = m_bitmap.size();
00209
00210 size_t end = GetLastPos();
00211 if (end == NOT_FOUND) end = 0;
00212
00213 assert(end < start || end-start <= 16);
00214 WordsBitmapID id = 0;
00215 for(size_t pos = end; pos > start; pos--) {
00216 id = id*2 + (int) GetValue(pos);
00217 }
00218 return id + (1<<16) * start;
00219 }
00220
00222 WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
00223 assert(m_bitmap.size() < (1<<16));
00224
00225 size_t start = GetFirstGapPos();
00226 if (start == NOT_FOUND) start = m_bitmap.size();
00227
00228 size_t end = GetLastPos();
00229 if (end == NOT_FOUND) end = 0;
00230
00231 if (start == startPos) start = endPos+1;
00232 if (end < endPos) end = endPos;
00233
00234 assert(end < start || end-start <= 16);
00235 WordsBitmapID id = 0;
00236 for(size_t pos = end; pos > start; pos--) {
00237 id = id*2;
00238 if (GetValue(pos) || (startPos<=pos && pos<=endPos))
00239 id++;
00240 }
00241 return id + (1<<16) * start;
00242 }
00243
00244
00245 size_t hash() const;
00246 bool operator==(const WordsBitmap& other) const;
00247 bool operator!=(const WordsBitmap& other) const {
00248 return !(*this == other);
00249 }
00250
00251 TO_STRING();
00252 };
00253
00254 }
00255 #endif