00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifdef WIN32
00023 #include <windows.h>
00024 #else
00025 #include <sys/times.h>
00026 #include <sys/resource.h>
00027 #endif
00028
00029 #include <cstring>
00030 #include <cctype>
00031 #include <algorithm>
00032 #include <stdio.h>
00033 #include <iostream>
00034 #include <iomanip>
00035 #include "TypeDef.h"
00036 #include "Util.h"
00037 #include "Timer.h"
00038 #include "util/file.hh"
00039
00040 using namespace std;
00041
00042 namespace Moses
00043 {
00044
00045
00046 Timer g_timer;
00047
00048 string GetTempFolder()
00049 {
00050 #ifdef _WIN32
00051 char *tmpPath = getenv("TMP");
00052 string str(tmpPath);
00053 if (str.substr(str.size() - 1, 1) != "\\")
00054 str += "\\";
00055 return str;
00056 #else
00057 return "/tmp/";
00058 #endif
00059 }
00060
00061 const std::string ToLower(const std::string& str)
00062 {
00063 std::string lc(str);
00064 std::transform(lc.begin(), lc.end(), lc.begin(), (int(*)(int))std::tolower);
00065 return lc;
00066 }
00067
00068 template<>
00069 bool Scan<bool>(const std::string &input)
00070 {
00071 std::string lc = ToLower(input);
00072 if (lc == "yes" || lc == "y" || lc == "true" || lc == "1")
00073 return true;
00074 if (lc == "no" || lc == "n" || lc =="false" || lc == "0")
00075 return false;
00076 TRACE_ERR( "Scan<bool>: didn't understand '" << lc << "', returning false" << std::endl);
00077 return false;
00078 }
00079
00080 bool FileExists(const std::string& filePath)
00081 {
00082 ifstream ifs(filePath.c_str());
00083 return !ifs.fail();
00084 }
00085
00086 const std::string Trim(const std::string& str, const std::string dropChars)
00087 {
00088 std::string res = str;
00089 res.erase(str.find_last_not_of(dropChars)+1);
00090 return res.erase(0, res.find_first_not_of(dropChars));
00091 }
00092
00093 void ResetUserTime()
00094 {
00095 g_timer.start();
00096 };
00097
00098 void PrintUserTime(const std::string &message)
00099 {
00100 g_timer.check(message.c_str());
00101 }
00102
00103 double GetUserTime()
00104 {
00105 return g_timer.get_elapsed_time();
00106 }
00107
00108 std::map<std::string, std::string> ProcessAndStripSGML(std::string &line)
00109 {
00110 std::map<std::string, std::string> meta;
00111 std::string lline = ToLower(line);
00112 if (lline.find("<seg")!=0) return meta;
00113 size_t close = lline.find(">");
00114 if (close == std::string::npos) return meta;
00115 size_t end = lline.find("</seg>");
00116 std::string seg = Trim(lline.substr(4, close-4));
00117 std::string text = line.substr(close+1, end - close - 1);
00118 for (size_t i = 1; i < seg.size(); i++) {
00119 if (seg[i] == '=' && seg[i-1] == ' ') {
00120 std::string less = seg.substr(0, i-1) + seg.substr(i);
00121 seg = less;
00122 i = 0;
00123 continue;
00124 }
00125 if (seg[i] == '=' && seg[i+1] == ' ') {
00126 std::string less = seg.substr(0, i+1);
00127 if (i+2 < seg.size()) less += seg.substr(i+2);
00128 seg = less;
00129 i = 0;
00130 continue;
00131 }
00132 }
00133 line = Trim(text);
00134 if (seg == "") return meta;
00135 for (size_t i = 1; i < seg.size(); i++) {
00136 if (seg[i] == '=') {
00137 std::string label = seg.substr(0, i);
00138 std::string val = seg.substr(i+1);
00139 if (val[0] == '"') {
00140 val = val.substr(1);
00141 size_t close = val.find('"');
00142 if (close == std::string::npos) {
00143 TRACE_ERR("SGML parse error: missing \"\n");
00144 seg = "";
00145 i = 0;
00146 } else {
00147 seg = val.substr(close+1);
00148 val = val.substr(0, close);
00149 i = 0;
00150 }
00151 } else {
00152 size_t close = val.find(' ');
00153 if (close == std::string::npos) {
00154 seg = "";
00155 i = 0;
00156 } else {
00157 seg = val.substr(close+1);
00158 val = val.substr(0, close);
00159 }
00160 }
00161 label = Trim(label);
00162 seg = Trim(seg);
00163 meta[label] = val;
00164 }
00165 }
00166 return meta;
00167 }
00168
00169 }
00170
00171