00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef MERT_FEATURE_STATS_H_
00010 #define MERT_FEATURE_STATS_H_
00011
00012 #include <cstring>
00013 #include <iostream>
00014 #include <map>
00015 #include <string>
00016 #include <vector>
00017
00018 #include <boost/unordered_map.hpp>
00019 #include "util/string_piece.hh"
00020 #include "Types.h"
00021
00022 namespace MosesTuning
00023 {
00024
00025
00026
00027 class SparseVector
00028 {
00029 public:
00030 typedef std::map<std::size_t,FeatureStatsType> fvector_t;
00031 typedef std::map<std::string, std::size_t> name2id_t;
00032 typedef std::vector<std::string> id2name_t;
00033
00034 FeatureStatsType get(const std::string& name) const;
00035 FeatureStatsType get(std::size_t id) const;
00036 void set(const std::string& name, FeatureStatsType value);
00037 void set(size_t id, FeatureStatsType value);
00038 void clear();
00039 void load(const std::string& file);
00040 std::size_t size() const {
00041 return m_fvector.size();
00042 }
00043
00044 void write(std::ostream& out, const std::string& sep = " ") const;
00045
00046 SparseVector& operator-=(const SparseVector& rhs);
00047 SparseVector& operator+=(const SparseVector& rhs);
00048 FeatureStatsType inner_product(const SparseVector& rhs) const;
00049
00050
00051 std::vector<std::size_t> feats() const;
00052 friend bool operator==(SparseVector const& item1, SparseVector const& item2);
00053 friend std::size_t hash_value(SparseVector const& item);
00054 static std::size_t encode(const std::string& feat);
00055 static std::string decode(std::size_t feat);
00056
00057
00058 private:
00059 static name2id_t m_name_to_id;
00060 static id2name_t m_id_to_name;
00061 fvector_t m_fvector;
00062 };
00063
00064 SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
00065 FeatureStatsType inner_product(const SparseVector& lhs, const SparseVector& rhs);
00066
00067 class FeatureStats
00068 {
00069 private:
00070 std::size_t m_available_size;
00071 std::size_t m_entries;
00072
00073
00074 featstats_t m_array;
00075 SparseVector m_map;
00076
00077 public:
00078 FeatureStats();
00079 explicit FeatureStats(const std::size_t size);
00080
00081 ~FeatureStats();
00082
00083
00084 FeatureStats(const FeatureStats &stats);
00085 FeatureStats& operator=(const FeatureStats &stats);
00086
00087 void Copy(const FeatureStats &stats);
00088
00089 bool isfull() const {
00090 return (m_entries < m_available_size) ? 0 : 1;
00091 }
00092 void expand();
00093 void add(FeatureStatsType v);
00094 void addSparse(const std::string& name, FeatureStatsType v);
00095
00096 void clear() {
00097 memset((void*)m_array, 0, GetArraySizeWithBytes());
00098 m_map.clear();
00099 }
00100
00101 void reset() {
00102 m_entries = 0;
00103 clear();
00104 }
00105
00106 FeatureStatsType get(std::size_t i) {
00107 return m_array[i];
00108 }
00109 FeatureStatsType get(std::size_t i)const {
00110 return m_array[i];
00111 }
00112 featstats_t getArray() const {
00113 return m_array;
00114 }
00115
00116 const SparseVector& getSparse() const {
00117 return m_map;
00118 }
00119
00120 void set(std::string &theString, const SparseVector& sparseWeights);
00121
00122 inline std::size_t bytes() const {
00123 return GetArraySizeWithBytes();
00124 }
00125
00126 std::size_t GetArraySizeWithBytes() const {
00127 return m_entries * sizeof(FeatureStatsType);
00128 }
00129
00130 std::size_t size() const {
00131 return m_entries;
00132 }
00133
00134 std::size_t available() const {
00135 return m_available_size;
00136 }
00137
00138 void savetxt(const std::string &file);
00139 void savetxt(std::ostream* os);
00140 void savebin(std::ostream* os);
00141 void savetxt();
00142
00143 void loadtxt(std::istream* is, const SparseVector& sparseWeights);
00144 void loadbin(std::istream* is);
00145
00149 friend std::ostream& operator<<(std::ostream& o, const FeatureStats& e);
00150 };
00151
00152 bool operator==(const FeatureStats& f1, const FeatureStats& f2);
00153
00154 }
00155
00156 #endif // MERT_FEATURE_STATS_H_