00001 #pragma once
00002
00003 #include <boost/unordered_set.hpp>
00004 #include <boost/unordered_map.hpp>
00005 #include <cstdio>
00006 #include <sstream>
00007 #include <fstream>
00008 #include <iostream>
00009 #include <string>
00010 #include <queue>
00011 #include <sys/stat.h>
00012
00013 #include "hash.hh"
00014 #include "probing_hash_utils.hh"
00015
00016 #include "util/file_piece.hh"
00017 #include "util/file.hh"
00018 #include "vocabid.hh"
00019
00020 namespace Moses
00021 {
00022 typedef std::vector<uint64_t> SourcePhrase;
00023
00024
00025 class Node
00026 {
00027 typedef boost::unordered_map<uint64_t, Node> Children;
00028 Children m_children;
00029
00030 public:
00031 uint64_t key;
00032 bool done;
00033
00034 Node()
00035 :done(false)
00036 {}
00037
00038 void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0);
00039 void Write(Table &table);
00040 };
00041
00042
00043 void createProbingPT(const std::string &phrasetable_path,
00044 const std::string &basepath, int num_scores, int num_lex_scores,
00045 bool log_prob, int max_cache_size, bool scfg);
00046 uint64_t getKey(const std::vector<uint64_t> &source_phrase);
00047
00048 std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos);
00049
00050 template<typename T>
00051 std::string Debug(const std::vector<T> &vec)
00052 {
00053 std::stringstream strm;
00054 for (size_t i = 0; i < vec.size(); ++i) {
00055 strm << vec[i] << " ";
00056 }
00057 return strm.str();
00058 }
00059
00060 size_t countUniqueSource(const std::string &path);
00061
00062 class CacheItem
00063 {
00064 public:
00065 std::string source;
00066 uint64_t sourceKey;
00067 float count;
00068 CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount)
00069 :source(vSource)
00070 ,sourceKey(vSourceKey)
00071 ,count(vCount) {
00072 }
00073
00074 bool operator<(const CacheItem &other) const {
00075 return count > other.count;
00076 }
00077 };
00078
00079 class CacheItemOrderer
00080 {
00081 public:
00082 bool operator()(const CacheItem* a, const CacheItem* b) const {
00083 return (*a) < (*b);
00084 }
00085 };
00086
00087 void serialize_cache(
00088 std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
00089 const std::string &path, float totalSourceCount);
00090
00091 }
00092