00001 #ifndef LM_BUILDER_DEBUG_PRINT_H
00002 #define LM_BUILDER_DEBUG_PRINT_H
00003
00004 #include "lm/builder/payload.hh"
00005 #include "lm/common/print.hh"
00006 #include "lm/common/ngram_stream.hh"
00007 #include "util/file_stream.hh"
00008 #include "util/file.hh"
00009
00010 #include <boost/lexical_cast.hpp>
00011
00012 namespace lm { namespace builder {
00013
00014 template <class T> void PrintPayload(util::FileStream &to, const BuildingPayload &payload);
00015 template <> inline void PrintPayload<uint64_t>(util::FileStream &to, const BuildingPayload &payload) {
00016 to << payload.count;
00017 }
00018 template <> inline void PrintPayload<Uninterpolated>(util::FileStream &to, const BuildingPayload &payload) {
00019 to << log10(payload.uninterp.prob) << ' ' << log10(payload.uninterp.gamma);
00020 }
00021 template <> inline void PrintPayload<ProbBackoff>(util::FileStream &to, const BuildingPayload &payload) {
00022 to << payload.complete.prob << ' ' << payload.complete.backoff;
00023 }
00024
00025
00026 template <class V> class Print {
00027 public:
00028 static void DumpSeparateFiles(const VocabReconstitute &vocab, const std::string &file_base, util::stream::Chains &chains) {
00029 for (unsigned int i = 0; i < chains.size(); ++i) {
00030 std::string file(file_base + boost::lexical_cast<std::string>(i));
00031 chains[i] >> Print(vocab, util::CreateOrThrow(file.c_str()));
00032 }
00033 }
00034
00035 explicit Print(const VocabReconstitute &vocab, int fd) : vocab_(vocab), to_(fd) {}
00036
00037 void Run(const util::stream::ChainPositions &chains) {
00038 util::scoped_fd fd(to_);
00039 util::FileStream out(to_);
00040 NGramStreams<BuildingPayload> streams(chains);
00041 for (NGramStream<BuildingPayload> *s = streams.begin(); s != streams.end(); ++s) {
00042 DumpStream(*s, out);
00043 }
00044 }
00045
00046 void Run(const util::stream::ChainPosition &position) {
00047 util::scoped_fd fd(to_);
00048 util::FileStream out(to_);
00049 NGramStream<BuildingPayload> stream(position);
00050 DumpStream(stream, out);
00051 }
00052
00053 private:
00054 void DumpStream(NGramStream<BuildingPayload> &stream, util::FileStream &to) {
00055 for (; stream; ++stream) {
00056 PrintPayload<V>(to, stream->Value());
00057 for (const WordIndex *w = stream->begin(); w != stream->end(); ++w) {
00058 to << ' ' << vocab_.Lookup(*w) << '=' << *w;
00059 }
00060 to << '\n';
00061 }
00062 }
00063
00064 const VocabReconstitute &vocab_;
00065 int to_;
00066 };
00067
00068 }}
00069
00070 #endif // LM_BUILDER_DEBUG_PRINT_H