00001 #ifndef LM_BUILDER_SORT__
00002 #define LM_BUILDER_SORT__
00003
00004 #include "lm/builder/multi_stream.hh"
00005 #include "lm/builder/ngram.hh"
00006 #include "lm/word_index.hh"
00007 #include "util/stream/sort.hh"
00008
00009 #include "util/stream/timer.hh"
00010
00011 #include <functional>
00012 #include <string>
00013
00014 namespace lm {
00015 namespace builder {
00016
00017 template <class Child> class Comparator : public std::binary_function<const void *, const void *, bool> {
00018 public:
00019 explicit Comparator(std::size_t order) : order_(order) {}
00020
00021 inline bool operator()(const void *lhs, const void *rhs) const {
00022 return static_cast<const Child*>(this)->Compare(static_cast<const WordIndex*>(lhs), static_cast<const WordIndex*>(rhs));
00023 }
00024
00025 std::size_t Order() const { return order_; }
00026
00027 protected:
00028 std::size_t order_;
00029 };
00030
00031 class SuffixOrder : public Comparator<SuffixOrder> {
00032 public:
00033 explicit SuffixOrder(std::size_t order) : Comparator<SuffixOrder>(order) {}
00034
00035 inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
00036 for (std::size_t i = order_ - 1; i != 0; --i) {
00037 if (lhs[i] != rhs[i])
00038 return lhs[i] < rhs[i];
00039 }
00040 return lhs[0] < rhs[0];
00041 }
00042
00043 static const unsigned kMatchOffset = 1;
00044 };
00045
00046 class ContextOrder : public Comparator<ContextOrder> {
00047 public:
00048 explicit ContextOrder(std::size_t order) : Comparator<ContextOrder>(order) {}
00049
00050 inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
00051 for (int i = order_ - 2; i >= 0; --i) {
00052 if (lhs[i] != rhs[i])
00053 return lhs[i] < rhs[i];
00054 }
00055 return lhs[order_ - 1] < rhs[order_ - 1];
00056 }
00057 };
00058
00059 class PrefixOrder : public Comparator<PrefixOrder> {
00060 public:
00061 explicit PrefixOrder(std::size_t order) : Comparator<PrefixOrder>(order) {}
00062
00063 inline bool Compare(const WordIndex *lhs, const WordIndex *rhs) const {
00064 for (std::size_t i = 0; i < order_; ++i) {
00065 if (lhs[i] != rhs[i])
00066 return lhs[i] < rhs[i];
00067 }
00068 return false;
00069 }
00070
00071 static const unsigned kMatchOffset = 0;
00072 };
00073
00074
00075 struct AddCombiner {
00076 bool operator()(void *first_void, const void *second_void, const SuffixOrder &compare) const {
00077 NGram first(first_void, compare.Order());
00078
00079 NGram second(const_cast<void*>(second_void), compare.Order());
00080 if (memcmp(first.begin(), second.begin(), sizeof(WordIndex) * compare.Order())) return false;
00081 first.Count() += second.Count();
00082 return true;
00083 }
00084 };
00085
00086
00087
00088 template <class Compare> class Sorts : public FixedArray<util::stream::Sort<Compare> > {
00089 private:
00090 typedef util::stream::Sort<Compare> S;
00091 typedef FixedArray<S> P;
00092
00093 public:
00094 void push_back(util::stream::Chain &chain, const util::stream::SortConfig &config, const Compare &compare) {
00095 new (P::end()) S(chain, config, compare);
00096 P::Constructed();
00097 }
00098 };
00099
00100 }
00101 }
00102
00103 #endif // LM_BUILDER_SORT__