00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef MERT_HYPERGRAPH_H
00021 #define MERT_HYPERGRAPH_H
00022
00023 #include <string>
00024
00025 #include <boost/noncopyable.hpp>
00026 #include <boost/scoped_array.hpp>
00027 #include <boost/shared_ptr.hpp>
00028 #include <boost/functional/hash/hash.hpp>
00029 #include <boost/unordered_map.hpp>
00030
00031
00032 #include "util/exception.hh"
00033 #include "util/file_piece.hh"
00034 #include "util/murmur_hash.hh"
00035 #include "util/pool.hh"
00036 #include "util/string_piece.hh"
00037
00038 #include "FeatureStats.h"
00039
00040 namespace MosesTuning
00041 {
00042
00043 typedef unsigned int WordIndex;
00044 const WordIndex kMaxWordIndex = UINT_MAX;
00045 const FeatureStatsType kMinScore = -1e10;
00046
00047 template <class T> class FixedAllocator : boost::noncopyable
00048 {
00049 public:
00050 FixedAllocator() : current_(NULL), end_(NULL) {}
00051
00052 void Init(std::size_t count) {
00053 assert(!current_);
00054 array_.reset(new T[count]);
00055 current_ = array_.get();
00056 end_ = current_ + count;
00057 }
00058
00059 T &operator[](std::size_t idx) {
00060 return array_.get()[idx];
00061 }
00062 const T &operator[](std::size_t idx) const {
00063 return array_.get()[idx];
00064 }
00065
00066 T *New() {
00067 T *ret = current_++;
00068 UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
00069 return ret;
00070 }
00071
00072 std::size_t Capacity() const {
00073 return end_ - array_.get();
00074 }
00075
00076 std::size_t Size() const {
00077 return current_ - array_.get();
00078 }
00079
00080 private:
00081 boost::scoped_array<T> array_;
00082 T *current_, *end_;
00083 };
00084
00085
00086 class Vocab
00087 {
00088 public:
00089 Vocab();
00090
00091 typedef std::pair<const char *const, WordIndex> Entry;
00092
00093 const Entry &FindOrAdd(const StringPiece &str);
00094
00095 const Entry& Bos() const {
00096 return bos_;
00097 }
00098
00099 const Entry& Eos() const {
00100 return eos_;
00101 }
00102
00103 private:
00104 util::Pool piece_backing_;
00105
00106 struct Hash : public std::unary_function<const char *, std::size_t> {
00107 std::size_t operator()(StringPiece str) const {
00108 return util::MurmurHashNative(str.data(), str.size());
00109 }
00110 };
00111
00112 struct Equals : public std::binary_function<const char *, const char *, bool> {
00113 bool operator()(StringPiece first, StringPiece second) const {
00114 return first == second;
00115 }
00116 };
00117
00118 typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
00119 Map map_;
00120 Entry eos_;
00121 Entry bos_;
00122
00123 };
00124
00125 typedef std::vector<const Vocab::Entry*> WordVec;
00126
00127 class Vertex;
00128
00129
00130 typedef boost::shared_ptr<SparseVector> FeaturePtr;
00131
00135 class Edge
00136 {
00137 public:
00138 Edge() {
00139 features_.reset(new SparseVector());
00140 }
00141
00142 void AddWord(const Vocab::Entry *word) {
00143 words_.push_back(word);
00144 }
00145
00146 void AddChild(size_t child) {
00147 children_.push_back(child);
00148 }
00149
00150 void AddFeature(const StringPiece& name, FeatureStatsType value) {
00151
00152 features_->set(name.as_string(),value);
00153 }
00154
00155
00156 const WordVec &Words() const {
00157 return words_;
00158 }
00159
00160 const FeaturePtr& Features() const {
00161 return features_;
00162 }
00163
00164 void SetFeatures(const FeaturePtr& features) {
00165 features_ = features;
00166 }
00167
00168 const std::vector<size_t>& Children() const {
00169 return children_;
00170 }
00171
00172 FeatureStatsType GetScore(const SparseVector& weights) const {
00173 return inner_product(*(features_.get()), weights);
00174 }
00175
00176 private:
00177
00178 std::vector<const Vocab::Entry*> words_;
00179 std::vector<size_t> children_;
00180 boost::shared_ptr<SparseVector> features_;
00181 };
00182
00183
00184
00185
00186 class Vertex
00187 {
00188 public:
00189 Vertex() : sourceCovered_(0) {}
00190
00191 void AddEdge(const Edge* edge) {
00192 incoming_.push_back(edge);
00193 }
00194
00195 void SetSourceCovered(size_t sourceCovered) {
00196 sourceCovered_ = sourceCovered;
00197 }
00198
00199 const std::vector<const Edge*>& GetIncoming() const {
00200 return incoming_;
00201 }
00202
00203 size_t SourceCovered() const {
00204 return sourceCovered_;
00205 }
00206
00207 private:
00208 std::vector<const Edge*> incoming_;
00209 size_t sourceCovered_;
00210 };
00211
00212
00213 class Graph : boost::noncopyable
00214 {
00215 public:
00216 Graph(Vocab& vocab) : vocab_(vocab) {}
00217
00218 void SetCounts(std::size_t vertices, std::size_t edges) {
00219 vertices_.Init(vertices);
00220 edges_.Init(edges);
00221 }
00222
00223 Vocab &MutableVocab() {
00224 return vocab_;
00225 }
00226
00227 Edge *NewEdge() {
00228 return edges_.New();
00229 }
00230
00231 Vertex *NewVertex() {
00232 return vertices_.New();
00233 }
00234
00235 const Vertex &GetVertex(std::size_t index) const {
00236 return vertices_[index];
00237 }
00238
00239 Edge &GetEdge(std::size_t index) {
00240 return edges_[index];
00241 }
00242
00243
00244
00245
00246 void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
00247
00248 std::size_t VertexSize() const {
00249 return vertices_.Size();
00250 }
00251 std::size_t EdgeSize() const {
00252 return edges_.Size();
00253 }
00254
00255 bool IsBoundary(const Vocab::Entry* word) const {
00256 return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
00257 }
00258
00259 private:
00260 FixedAllocator<Edge> edges_;
00261 FixedAllocator<Vertex> vertices_;
00262 Vocab& vocab_;
00263 };
00264
00265 class HypergraphException : public util::Exception
00266 {
00267 public:
00268 HypergraphException() {}
00269 ~HypergraphException() throw() {}
00270 };
00271
00272
00273 void ReadGraph(util::FilePiece &from, Graph &graph);
00274
00275
00276 };
00277
00278 #endif