00001 #include "lm/builder/output.hh"
00002
00003 #include "lm/common/model_buffer.hh"
00004 #include "lm/common/print.hh"
00005 #include "util/file_stream.hh"
00006 #include "util/stream/multi_stream.hh"
00007
00008 #include <iostream>
00009
00010 namespace lm { namespace builder {
00011
00012 OutputHook::~OutputHook() {}
00013
00014 Output::Output(StringPiece file_base, bool keep_buffer, bool output_q)
00015 : buffer_(file_base, keep_buffer, output_q) {}
00016
00017 void Output::SinkProbs(util::stream::Chains &chains) {
00018 Apply(PROB_PARALLEL_HOOK, chains);
00019 if (!buffer_.Keep() && !Have(PROB_SEQUENTIAL_HOOK)) {
00020 chains >> util::stream::kRecycle;
00021 chains.Wait(true);
00022 return;
00023 }
00024 buffer_.Sink(chains, header_.counts_pruned);
00025 chains >> util::stream::kRecycle;
00026 chains.Wait(false);
00027 if (Have(PROB_SEQUENTIAL_HOOK)) {
00028 std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl;
00029 buffer_.Source(chains);
00030 Apply(PROB_SEQUENTIAL_HOOK, chains);
00031 chains >> util::stream::kRecycle;
00032 chains.Wait(true);
00033 }
00034 }
00035
00036 void Output::Apply(HookType hook_type, util::stream::Chains &chains) {
00037 for (boost::ptr_vector<OutputHook>::iterator entry = outputs_[hook_type].begin(); entry != outputs_[hook_type].end(); ++entry) {
00038 entry->Sink(header_, VocabFile(), chains);
00039 }
00040 }
00041
00042 void PrintHook::Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) {
00043 if (verbose_header_) {
00044 util::FileStream out(file_.get(), 50);
00045 out << "# Input file: " << info.input_file << '\n';
00046 out << "# Token count: " << info.token_count << '\n';
00047 out << "# Smoothing: Modified Kneser-Ney" << '\n';
00048 }
00049 chains >> PrintARPA(vocab_file, file_.get(), info.counts_pruned);
00050 }
00051
00052 }}