00001 #include "BiLM_NPLM.h"
00002 #include "neuralLM.h"
00003 #include "vocabulary.h"
00004
00005 namespace Moses
00006 {
00007
00008 BilingualLM_NPLM::BilingualLM_NPLM(const std::string &line)
00009 : BilingualLM(line),
00010 premultiply(true),
00011 factored(false),
00012 neuralLM_cache(1000000)
00013 {
00014
00015 NULL_string = "<null>";
00016 FactorCollection& factorFactory = FactorCollection::Instance();
00017 const Factor* NULL_factor = factorFactory.AddFactor(NULL_string);
00018 NULL_word.SetFactor(0, NULL_factor);
00019 }
00020
00021 float BilingualLM_NPLM::Score(std::vector<int>& source_words, std::vector<int>& target_words) const
00022 {
00023 source_words.reserve(source_ngrams+target_ngrams+1);
00024 source_words.insert( source_words.end(), target_words.begin(), target_words.end() );
00025 return FloorScore(m_neuralLM->lookup_ngram(source_words));
00026 }
00027
00028 const Word& BilingualLM_NPLM::getNullWord() const
00029 {
00030 return NULL_word;
00031 }
00032
00033 int BilingualLM_NPLM::getNeuralLMId(const Word& word, bool is_source_word) const
00034 {
00035 initSharedPointer();
00036
00037
00038 boost::unordered_map<const Factor*, int> * neuralLMids;
00039 int unknown_word_id;
00040 if (is_source_word) {
00041 neuralLMids = &source_neuralLMids;
00042 unknown_word_id = source_unknown_word_id;
00043 } else {
00044 neuralLMids = &target_neuralLMids;
00045 unknown_word_id = target_unknown_word_id;
00046 }
00047
00048 boost::unordered_map<const Factor*, int>::iterator it;
00049 const Factor* factor = word.GetFactor(word_factortype);
00050
00051 it = neuralLMids->find(factor);
00052
00053 if (it != neuralLMids->end()) {
00054 return it->second;
00055 }
00056
00057 if (!factored) {
00058 return unknown_word_id;
00059 }
00060
00061 const Factor* pos_factor = word.GetFactor(pos_factortype);
00062 it = neuralLMids->find(pos_factor);
00063 if (it != neuralLMids->end()) {
00064 return it->second;
00065 } else {
00066 return unknown_word_id;
00067 }
00068 }
00069
00070 void BilingualLM_NPLM::initSharedPointer() const
00071 {
00072 if (!m_neuralLM.get()) {
00073 m_neuralLM.reset(new nplm::neuralLM(*m_neuralLM_shared));
00074 }
00075 }
00076
00077 void BilingualLM_NPLM::SetParameter(const std::string& key, const std::string& value)
00078 {
00079 if (key == "order") {
00080 target_ngrams = Scan<int>(value)-1;
00081 } else if (key == "source_window") {
00082 source_ngrams = Scan<int>(value)*2+1;
00083 } else if (key == "factored") {
00084 factored = Scan<bool>(value);
00085 } else if (key == "pos_factor") {
00086 pos_factortype = Scan<FactorType>(value);
00087 } else if (key == "source_vocab") {
00088 source_vocab_path = value;
00089 } else if (key == "target_vocab") {
00090 target_vocab_path = value;
00091 } else if (key == "cache_size") {
00092 neuralLM_cache = atoi(value.c_str());
00093 } else if (key == "premultiply") {
00094 premultiply = Scan<bool>(value);
00095
00096 } else if (key == "null_word") {
00097 NULL_string = value;
00098 NULL_overwrite = true;
00099 } else {
00100 BilingualLM::SetParameter(key, value);
00101 }
00102 }
00103
00104 void BilingualLM_NPLM::loadModel()
00105 {
00106 m_neuralLM_shared = new nplm::neuralLM();
00107 m_neuralLM_shared->read(m_filePath);
00108 if (premultiply) {
00109 m_neuralLM_shared->premultiply();
00110 }
00111
00112 int ngram_order = target_ngrams + source_ngrams + 1;
00113 UTIL_THROW_IF2(
00114 ngram_order != m_neuralLM_shared->get_order(),
00115 "Wrong order of neuralLM: LM has " << m_neuralLM_shared->get_order() <<
00116 ", but Moses expects " << ngram_order);
00117
00118 m_neuralLM_shared->set_cache(neuralLM_cache);
00119
00120
00121 FactorCollection& factorFactory = FactorCollection::Instance();
00122 int wordid_counter = 0;
00123 target_unknown_word_id = wordid_counter;
00124 std::string raw_word;
00125 std::ifstream infile_target(target_vocab_path.c_str());
00126 while (infile_target >> raw_word) {
00127 const Factor * factor = factorFactory.AddFactor(raw_word);
00128 target_neuralLMids.insert(std::make_pair(factor, wordid_counter));
00129 wordid_counter++;
00130 }
00131 infile_target.close();
00132 source_unknown_word_id = wordid_counter;
00133
00134
00135 std::ifstream infile_source(source_vocab_path.c_str());
00136 while (infile_source >> raw_word) {
00137 const Factor * factor = factorFactory.AddFactor(raw_word);
00138 source_neuralLMids.insert(std::make_pair(factor, wordid_counter));
00139 wordid_counter++;
00140 }
00141 infile_source.close();
00142
00143 }
00144
00145 }