00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef moses_Util_h
00023 #define moses_Util_h
00024
00025 #include <iostream>
00026 #include <fstream>
00027 #include <sstream>
00028 #include <string>
00029 #include <vector>
00030 #include <cmath>
00031 #include <limits>
00032 #include <map>
00033 #include <cstdlib>
00034 #include <cstring>
00035 #include "util/check.hh"
00036 #include "TypeDef.h"
00037
00038 namespace Moses
00039 {
00040
00050 #ifdef TRACE_ENABLE
00051 #define TRACE_ERR(str) do { std::cerr << str; } while (false)
00052 #else
00053 #define TRACE_ERR(str) do {} while (false)
00054 #endif
00055
00058 #define VERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR(str); } }
00059 #define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level)
00060
00062 const std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r");
00063 const std::string ToLower(const std::string& str);
00064
00066 template<typename T>
00067 inline std::string SPrint(const T &input)
00068 {
00069 std::stringstream stream("");
00070 stream << input;
00071 return stream.str();
00072 }
00073
00075 template<typename T>
00076 inline T Scan(const std::string &input)
00077 {
00078 std::stringstream stream(input);
00079 T ret;
00080 stream >> ret;
00081 return ret;
00082 }
00083
00085 template<>
00086 inline std::string Scan<std::string>(const std::string &input)
00087 {
00088 return input;
00089 }
00090
00092 template<>
00093 bool Scan<bool>(const std::string &input);
00094
00096 template<typename T>
00097 inline std::vector<T> Scan(const std::vector< std::string > &input)
00098 {
00099 std::vector<T> output(input.size());
00100 for (size_t i = 0 ; i < input.size() ; i++) {
00101 output[i] = Scan<T>( input[i] );
00102 }
00103 return output;
00104 }
00105
00107 template<typename T>
00108 inline void Scan(std::vector<T> &output, const std::vector< std::string > &input)
00109 {
00110 output.resize(input.size());
00111 for (size_t i = 0 ; i < input.size() ; i++) {
00112 output[i] = Scan<T>( input[i] );
00113 }
00114 }
00115
00117 inline std::string Replace(const std::string& str,
00118 const std::string& todelStr,
00119 const std::string& toaddStr)
00120 {
00121 size_t pos=0;
00122 std::string newStr=str;
00123 while ((pos=newStr.find(todelStr,pos))!=std::string::npos) {
00124 newStr.replace(pos++,todelStr.size(),toaddStr);
00125 }
00126 return newStr;
00127 }
00128
00132 inline std::vector<std::string> Tokenize(const std::string& str,
00133 const std::string& delimiters = " \t")
00134 {
00135 std::vector<std::string> tokens;
00136
00137 std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
00138
00139 std::string::size_type pos = str.find_first_of(delimiters, lastPos);
00140
00141 while (std::string::npos != pos || std::string::npos != lastPos) {
00142
00143 tokens.push_back(str.substr(lastPos, pos - lastPos));
00144
00145 lastPos = str.find_first_not_of(delimiters, pos);
00146
00147 pos = str.find_first_of(delimiters, lastPos);
00148 }
00149
00150 return tokens;
00151 }
00152
00153
00154 inline void Tokenize(std::vector<std::string> &output
00155 , const std::string& str
00156 , const std::string& delimiters = " \t")
00157 {
00158
00159 std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
00160
00161 std::string::size_type pos = str.find_first_of(delimiters, lastPos);
00162
00163 while (std::string::npos != pos || std::string::npos != lastPos) {
00164
00165 output.push_back(str.substr(lastPos, pos - lastPos));
00166
00167 lastPos = str.find_first_not_of(delimiters, pos);
00168
00169 pos = str.find_first_of(delimiters, lastPos);
00170 }
00171 }
00172
00174 template<typename T>
00175 inline std::vector<T> Tokenize( const std::string &input
00176 , const std::string& delimiters = " \t")
00177 {
00178 std::vector<std::string> stringVector = Tokenize(input, delimiters);
00179 return Scan<T>( stringVector );
00180 }
00181
00182
00183 template<typename T>
00184 inline void Tokenize( std::vector<T> &output
00185 , const std::string &input
00186 , const std::string& delimiters = " \t")
00187 {
00188 std::vector<std::string> stringVector;
00189 Tokenize(stringVector, input, delimiters);
00190 return Scan<T>(output, stringVector );
00191 }
00192
00193 inline std::vector<std::string> TokenizeMultiCharSeparator(
00194 const std::string& str,
00195 const std::string& separator)
00196 {
00197 std::vector<std::string> tokens;
00198
00199 size_t pos = 0;
00200
00201 std::string::size_type nextPos = str.find(separator, pos);
00202
00203 while (nextPos != std::string::npos) {
00204
00205 tokens.push_back(str.substr(pos, nextPos - pos));
00206
00207 pos = nextPos + separator.size();
00208
00209 nextPos = str.find(separator, pos);
00210 }
00211 tokens.push_back(str.substr(pos, nextPos - pos));
00212
00213 return tokens;
00214 }
00215
00216
00217 inline void TokenizeMultiCharSeparator(std::vector<std::string> &output
00218 ,const std::string& str
00219 ,const std::string& separator)
00220 {
00221 size_t pos = 0;
00222
00223 std::string::size_type nextPos = str.find(separator, pos);
00224
00225 while (nextPos != std::string::npos) {
00226
00227 output.push_back(Trim(str.substr(pos, nextPos - pos)));
00228
00229 pos = nextPos + separator.size();
00230
00231 nextPos = str.find(separator, pos);
00232 }
00233 output.push_back(Trim(str.substr(pos, nextPos - pos)));
00234 }
00235
00236
00240 template <typename T>
00241 std::string Join(const std::string& delimiter, const std::vector<T>& items)
00242 {
00243 std::ostringstream outstr;
00244 if(items.size() == 0) return "";
00245 outstr << items[0];
00246 for(unsigned int i = 1; i < items.size(); i++)
00247 outstr << delimiter << items[i];
00248 return outstr.str();
00249 }
00250
00252 inline float TransformScore(float prob)
00253 {
00254 return log(prob);
00255 }
00256
00258 inline float UntransformScore(float score)
00259 {
00260 return exp(score);
00261 }
00262
00264 inline float TransformLMScore(float irstScore)
00265 {
00266 return irstScore * 2.30258509299405f;
00267 }
00268
00269 inline float UntransformLMScore(float logNScore)
00270 {
00271
00272 return logNScore / 2.30258509299405f;
00273 }
00274
00276 inline float FloorScore(float logScore)
00277 {
00278 return (std::max)(logScore , LOWEST_SCORE);
00279 }
00280
00285 inline float CalcTranslationScore(const std::vector<float> &probVector,
00286 const std::vector<float> &weightT)
00287 {
00288 CHECK(weightT.size()==probVector.size());
00289 float rv=0.0;
00290 for(float const *sb=&probVector[0],*se=sb+probVector.size(),*wb=&weightT[0];
00291 sb!=se; ++sb, ++wb)
00292 rv += TransformScore(*sb) * (*wb);
00293 return rv;
00294 }
00295
00302 #define TO_STRING() std::string ToString() const;
00303
00305 #define TO_STRING_BODY(CLASS) \
00306 std::string CLASS::ToString() const \
00307 { \
00308 std::stringstream out; \
00309 out << *this; \
00310 return out.str(); \
00311 } \
00312
00314 template<class COLL>
00315 void RemoveAllInColl(COLL &coll)
00316 {
00317 for (typename COLL::const_iterator iter = coll.begin() ; iter != coll.end() ; ++iter) {
00318 delete (*iter);
00319 }
00320 coll.clear();
00321 }
00322
00324 std::string GetTempFolder();
00326 std::string GetMD5Hash(const std::string &filePath);
00327
00329 template<typename T>
00330 inline void ShrinkToFit(T& v)
00331 {
00332 if(v.capacity()>v.size())
00333 T(v).swap(v);
00334 CHECK(v.capacity()==v.size());
00335 }
00336
00337 bool FileExists(const std::string& filePath);
00338
00339
00340 void ResetUserTime();
00341 void PrintUserTime(const std::string &message);
00342 double GetUserTime();
00343
00344
00345 std::map<std::string, std::string> ProcessAndStripSGML(std::string &line);
00346
00351 inline std::string GetFirstString(const std::string& str, int& first_pos, const std::string& delimiters = " \t")
00352 {
00353
00354 std::string first_str;
00355
00356 std::string::size_type lastPos = str.find_first_not_of(delimiters, first_pos);
00357
00358
00359 std::string::size_type pos = str.find_first_of(delimiters, lastPos);
00360
00361 if (std::string::npos != pos || std::string::npos != lastPos){
00362
00363 first_str = str.substr(lastPos, pos - lastPos);
00364
00365
00366 lastPos = str.find_first_not_of(delimiters, pos);
00367
00368 }
00369
00370 first_pos = lastPos;
00371 return first_str;
00372 }
00373
00374 template<class T>
00375 T log_sum (T log_a, T log_b)
00376 {
00377 T v;
00378 if (log_a < log_b) {
00379 v = log_b+log ( 1 + exp ( log_a-log_b ));
00380 } else {
00381 v = log_a+log ( 1 + exp ( log_b-log_a ));
00382 }
00383 return ( v );
00384 }
00385
00386 }
00387
00388 #endif