00001 #include "querying.hh"
00002 #include "util/exception.hh"
00003
00004 using namespace std;
00005
00006 namespace Moses
00007 {
00008
00009 QueryEngine::QueryEngine(const char * filepath)
00010 {
00011
00012
00013 std::string basepath(filepath);
00014 std::string path_to_config = basepath + "/config";
00015 std::string path_to_hashtable = basepath + "/probing_hash.dat";
00016 std::string path_to_source_vocabid = basepath + "/source_vocabids";
00017 std::string alignPath = basepath + "/Alignments.dat";
00018
00019 if (!FileExists(path_to_config)) {
00020 UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config);
00021 }
00022
00024 read_map(source_vocabids, path_to_source_vocabid.c_str());
00025
00026
00027 read_alignments(alignPath);
00028
00029
00030 boost::unordered_map<std::string, std::string> keyValue;
00031
00032 std::ifstream config(path_to_config.c_str());
00033 std::string line;
00034 while (getline(config, line)) {
00035 std::vector<std::string> toks = Tokenize(line, "\t");
00036 UTIL_THROW_IF2(toks.size() != 2, "Wrong config format:" << line);
00037 keyValue[ toks[0] ] = toks[1];
00038 }
00039
00040 bool found;
00041
00042 int version;
00043 found = Get(keyValue, "API_VERSION", version);
00044 if (!found) {
00045 std::cerr << "Old or corrupted version of ProbingPT. Please rebinarize your phrase tables." << std::endl;
00046 } else if (version != API_VERSION) {
00047 std::cerr << "The ProbingPT API has changed. " << version << "!="
00048 << API_VERSION << " Please rebinarize your phrase tables." << std::endl;
00049 exit(EXIT_FAILURE);
00050 }
00051
00052
00053 int tablesize;
00054 found = Get(keyValue, "uniq_entries", tablesize);
00055 if (!found) {
00056 std::cerr << "uniq_entries not found" << std::endl;
00057 exit(EXIT_FAILURE);
00058 }
00059
00060
00061 found = Get(keyValue, "num_scores", num_scores);
00062 if (!found) {
00063 std::cerr << "num_scores not found" << std::endl;
00064 exit(EXIT_FAILURE);
00065 }
00066
00067
00068 found = Get(keyValue, "num_lex_scores", num_lex_scores);
00069 if (!found) {
00070 std::cerr << "num_lex_scores not found" << std::endl;
00071 exit(EXIT_FAILURE);
00072 }
00073
00074
00075 found = Get(keyValue, "log_prob", logProb);
00076 if (!found) {
00077 std::cerr << "logProb not found" << std::endl;
00078 exit(EXIT_FAILURE);
00079 }
00080
00081 config.close();
00082
00083
00084 table_filesize = Table::Size(tablesize, 1.2);
00085 mem = readTable(path_to_hashtable.c_str(), table_filesize);
00086 Table table_init(mem, table_filesize);
00087 table = table_init;
00088
00089 std::cerr << "Initialized successfully! " << std::endl;
00090 }
00091
00092 QueryEngine::~QueryEngine()
00093 {
00094
00095 munmap(mem, table_filesize);
00096
00097 }
00098
00099 uint64_t QueryEngine::getKey(uint64_t source_phrase[], size_t size) const
00100 {
00101
00102
00103 return Moses::getKey(source_phrase, size);
00104 }
00105
00106 std::pair<bool, uint64_t> QueryEngine::query(uint64_t key)
00107 {
00108 std::pair<bool, uint64_t> ret;
00109
00110 const Entry * entry;
00111 ret.first = table.Find(key, entry);
00112 if (ret.first) {
00113 ret.second = entry->value;
00114 }
00115 return ret;
00116 }
00117
00118 void QueryEngine::read_alignments(const std::string &alignPath)
00119 {
00120 std::ifstream strm(alignPath.c_str());
00121
00122 string line;
00123 while (getline(strm, line)) {
00124 vector<string> toks = Tokenize(line, "\t ");
00125 UTIL_THROW_IF2(toks.size() == 0, "Corrupt alignment file");
00126
00127 uint32_t alignInd = Scan<uint32_t>(toks[0]);
00128 if (alignInd >= alignColl.size()) {
00129 alignColl.resize(alignInd + 1);
00130 }
00131
00132 Alignments &aligns = alignColl[alignInd];
00133 for (size_t i = 1; i < toks.size(); ++i) {
00134 size_t pos = Scan<size_t>(toks[i]);
00135 aligns.push_back(pos);
00136 }
00137 }
00138 }
00139
00140 }
00141