00001 #ifndef LM_NGRAM_QUERY__
00002 #define LM_NGRAM_QUERY__
00003
00004 #include "lm/enumerate_vocab.hh"
00005 #include "lm/model.hh"
00006 #include "util/usage.hh"
00007
00008 #include <cstdlib>
00009 #include <iostream>
00010 #include <ostream>
00011 #include <istream>
00012 #include <string>
00013
00014 namespace lm {
00015 namespace ngram {
00016
00017 template <class Model> void Query(const Model &model, bool sentence_context, std::istream &in_stream, std::ostream &out_stream) {
00018 std::cerr << "Loading statistics:\n";
00019 util::PrintUsage(std::cerr);
00020 typename Model::State state, out;
00021 lm::FullScoreReturn ret;
00022 std::string word;
00023
00024 while (in_stream) {
00025 state = sentence_context ? model.BeginSentenceState() : model.NullContextState();
00026 float total = 0.0;
00027 bool got = false;
00028 unsigned int oov = 0;
00029 while (in_stream >> word) {
00030 got = true;
00031 lm::WordIndex vocab = model.GetVocabulary().Index(word);
00032 if (vocab == 0) ++oov;
00033 ret = model.FullScore(state, vocab, out);
00034 total += ret.prob;
00035 out_stream << word << '=' << vocab << ' ' << static_cast<unsigned int>(ret.ngram_length) << ' ' << ret.prob << '\t';
00036 state = out;
00037 char c;
00038 while (true) {
00039 c = in_stream.get();
00040 if (!in_stream) break;
00041 if (c == '\n') break;
00042 if (!isspace(c)) {
00043 in_stream.unget();
00044 break;
00045 }
00046 }
00047 if (c == '\n') break;
00048 }
00049 if (!got && !in_stream) break;
00050 if (sentence_context) {
00051 ret = model.FullScore(state, model.GetVocabulary().EndSentence(), out);
00052 total += ret.prob;
00053 out_stream << "</s>=" << model.GetVocabulary().EndSentence() << ' ' << static_cast<unsigned int>(ret.ngram_length) << ' ' << ret.prob << '\t';
00054 }
00055 out_stream << "Total: " << total << " OOV: " << oov << '\n';
00056 }
00057 std::cerr << "After queries:\n";
00058 util::PrintUsage(std::cerr);
00059 }
00060
00061 template <class M> void Query(const char *file, bool sentence_context, std::istream &in_stream, std::ostream &out_stream) {
00062 Config config;
00063 M model(file, config);
00064 Query(model, sentence_context, in_stream, out_stream);
00065 }
00066
00067 }
00068 }
00069
00070 #endif // LM_NGRAM_QUERY__
00071
00072