00001 #include <iostream>
00002 #include <string>
00003
00004 #include "moses/Phrase.h"
00005 #include "moses/FactorCollection.h"
00006 #include "moses/Timer.h"
00007 #include "moses/InputFileStream.h"
00008 #include "moses/TranslationModel/CompactPT/BlockHashIndex.h"
00009 #include "moses/TranslationModel/CompactPT/CanonicalHuffman.h"
00010 #include "moses/TranslationModel/CompactPT/StringVector.h"
00011
00012 using namespace Moses;
00013 using namespace std;
00014
00015 Timer timer;
00016
00017 FactorList m_factorsF, m_factorsE, m_factorsC;
00018
00019 BlockHashIndex m_hash(10, 16);
00020 size_t m_numScoreComponent;
00021 bool m_multipleScoreTrees;
00022 bool m_inMemory = false;
00023
00024 typedef CanonicalHuffman<float> ScoreTree;
00025 std::vector<ScoreTree*> m_scoreTrees;
00026
00027 StringVector<unsigned char, unsigned long, MmapAllocator> m_scoresMapped;
00028 StringVector<unsigned char, unsigned long, std::allocator> m_scoresMemory;
00029
00031 void Load(const string &filePath)
00032 {
00033 std::FILE* pFile = std::fopen(filePath.c_str(), "r");
00034 UTIL_THROW_IF2(pFile == NULL, "File " << filePath << " could not be opened");
00035
00036
00037 m_hash.Load(pFile);
00038
00039
00040
00041 size_t read = 0;
00042 read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, pFile);
00043 read += std::fread(&m_multipleScoreTrees,
00044 sizeof(m_multipleScoreTrees), 1, pFile);
00045
00046 if(m_multipleScoreTrees) {
00047 m_scoreTrees.resize(m_numScoreComponent);
00048 for(size_t i = 0; i < m_numScoreComponent; i++)
00049 m_scoreTrees[i] = new CanonicalHuffman<float>(pFile);
00050 } else {
00051 m_scoreTrees.resize(1);
00052 m_scoreTrees[0] = new CanonicalHuffman<float>(pFile);
00053 }
00054
00055 if(m_inMemory)
00056 m_scoresMemory.load(pFile, false);
00057 else
00058 m_scoresMapped.load(pFile, true);
00059
00060 }
00061
00063
00064 std::string
00065 MakeKey(const std::string& f,
00066 const std::string& e,
00067 const std::string& c)
00068 {
00069 std::string key;
00070 if(!f.empty()) key += f;
00071 if(!m_factorsE.empty()) {
00072 if(!key.empty()) key += " ||| ";
00073 key += e;
00074 }
00075 if(!m_factorsC.empty()) {
00076 if(!key.empty()) key += " ||| ";
00077 key += c;
00078 }
00079 key += " ||| ";
00080 return key;
00081 }
00082
00084
00085 std::vector<float>
00086 GetScore(const std::string& f, const std::string& e, const std::string& c)
00087 {
00088 std::string key;
00089 std::vector<float> probs;
00090
00091 key = MakeKey(f, e, c);
00092
00093 size_t index = m_hash[key];
00094 if(m_hash.GetSize() != index) {
00095 std::string scoresString;
00096 if(m_inMemory)
00097 scoresString = m_scoresMemory[index].str();
00098 else
00099 scoresString = m_scoresMapped[index].str();
00100
00101
00102 BitWrapper<> bitStream(scoresString);
00103 for(size_t i = 0; i < m_numScoreComponent; i++) {
00104 float prob = m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream);
00105 prob = exp(prob);
00106 probs.push_back(prob);
00107 }
00108
00109 return probs;
00110 } else {
00111
00112 }
00113
00114 return probs;
00115 }
00116
00118
00119 int main(int argc, char** argv)
00120 {
00121 string ptPath(argv[1]);
00122 string roPath(argv[2]);
00123
00124
00125 m_factorsF.push_back(0);
00126 m_factorsE.push_back(0);
00127
00128 Load(roPath);
00129
00130
00131 InputFileStream ptStrm(ptPath);
00132
00133 string line;
00134 while (getline(ptStrm, line)) {
00135
00136 std::vector<std::string> columns(7);
00137 std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
00138 assert(toks.size() >= 2);
00139
00140 for (size_t i = 0; i < toks.size(); ++i) {
00141 columns[i] = Trim(toks[i]);
00142 }
00143
00144 std::vector<float> scores = GetScore(columns[0], columns[1], "");
00145
00146 if (scores.size()) {
00147 if (!columns[6].empty()) {
00148 columns[6] += " ";
00149 }
00150 columns[6] += "{{LexRO ";
00151 for (size_t i = 0; i < scores.size() - 1; ++i) {
00152 columns[6] += Moses::SPrint(scores[i]);
00153 columns[6] += " ";
00154 }
00155 columns[6] += Moses::SPrint(scores[scores.size() - 1]);
00156 columns[6] += "}}";
00157 }
00158
00159
00160 for (size_t i = 0; i < columns.size() - 1; ++i) {
00161 cout << columns[i] << " ||| ";
00162 }
00163 cout << columns[columns.size() - 1] << endl;
00164 }
00165
00166 }
00167
00168