#include <ug_tsa_tree_iterator.h>
Classes | |
struct | SortByApproximateCount |
Public Types | |
typedef TKN | Token |
Public Member Functions | |
virtual | ~TSA_tree_iterator () |
TSA_tree_iterator (TSA< Token > const *s) | |
TSA_tree_iterator (TSA< Token > const *s, TSA_tree_iterator< Token > const &other) | |
TSA_tree_iterator (TSA< Token > const *r, id_type const *s, size_t const len) | |
TSA_tree_iterator (TSA< Token > const *s, Token const *kstart, size_t const len, bool full_match_only=true) | |
TSA_tree_iterator (TSA< Token > const *s, Token const *kstart, Token const *kend, bool full_match_only=true) | |
TSA_tree_iterator (TSA< Token > const *s, TokenIndex const &V, std::string const &key) | |
char const * | lower_bound (int p) const |
char const * | upper_bound (int p) const |
size_t | size () const |
Token const * | getToken (int p) const |
id_type | getSid () const |
ushort | getOffset (int p) const |
size_t | sntCnt (int p=-1) const |
size_t | rawCnt (int p=-1) const |
::uint64_t | getPid (int p=-1) const |
virtual bool | extend (Token const &id) |
virtual bool | extend (id_type id) |
virtual bool | down () |
virtual bool | over () |
virtual bool | up () |
std::string | str (TokenIndex const *V=NULL, int start=0, int stop=0) const |
bool | match (Token const *start, Token const *stop) const |
bool | match (id_type sid) const |
count_type | fillBitSet (boost::dynamic_bitset< uint64_t > &bitset) const |
count_type | markEndOfSequence (Token const *start, Token const *stop, boost::dynamic_bitset< uint64_t > &dest) const |
count_type | markSequence (Token const *start, Token const *stop, bitvector &dest) const |
count_type | markSentences (boost::dynamic_bitset< uint64_t > &bitset) const |
count_type | markOccurrences (boost::dynamic_bitset< uint64_t > &bitset, bool markOnlyStartPosition=false) const |
count_type | markOccurrences (std::vector< ushort > &dest) const |
::uint64_t | getSequenceId () const |
bitvector & | filterSentences (bitvector &foo) const |
void | tfAndRoot (bitvector const &ref, bitvector const &snt, bitvector &dest) const |
a special auxiliary function for finding trees | |
size_t | arrayByteSpanSize (int p=-1) const |
double | ca (int p=-1) const |
double | approxOccurrenceCount (int p=-1) const |
size_t | grow (Token const *t, Token const *stop) |
size_t | grow (Token const *snt, bitvector const &cov) |
SPTR< std::vector< typename ttrack::Position > > | randomSample (int level, size_t N) const |
randomly select up to N occurrences of the sequence | |
Public Attributes | |
TSA< Token > const * | root |
Protected Member Functions | |
void | showBounds (std::ostream &out) const |
Protected Attributes | |
std::vector< char const * > | lower |
std::vector< char const * > | upper |
Definition at line 46 of file ug_tsa_tree_iterator.h.
typedef TKN ugdiss::TSA_tree_iterator< TKN >::Token |
Definition at line 56 of file ug_tsa_tree_iterator.h.
virtual ugdiss::TSA_tree_iterator< TKN >::~TSA_tree_iterator | ( | ) | [inline, virtual] |
Definition at line 58 of file ug_tsa_tree_iterator.h.
ugdiss::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s | ) | [inline] |
Definition at line 336 of file ug_tsa_tree_iterator.h.
ugdiss::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
TSA_tree_iterator< Token > const & | other | |||
) | [inline] |
Definition at line 342 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA_tree_iterator< TKN >::extend(), ugdiss::TSA_tree_iterator< TKN >::getToken(), and ugdiss::TSA_tree_iterator< TKN >::size().
ugdiss::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | r, | |
id_type const * | s, | |||
size_t const | len | |||
) | [inline] |
Definition at line 355 of file ug_tsa_tree_iterator.h.
ugdiss::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
Token const * | kstart, | |||
size_t const | len, | |||
bool | full_match_only = true | |||
) | [inline] |
Definition at line 420 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA_tree_iterator< TKN >::extend(), ugdiss::TSA_tree_iterator< TKN >::lower, ugdiss::TSA_tree_iterator< TKN >::root, and ugdiss::TSA_tree_iterator< TKN >::upper.
ugdiss::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
Token const * | kstart, | |||
Token const * | kend, | |||
bool | full_match_only = true | |||
) | [inline] |
Definition at line 439 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA_tree_iterator< TKN >::extend(), ugdiss::TSA_tree_iterator< TKN >::lower, and ugdiss::TSA_tree_iterator< TKN >::upper.
ugdiss::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
TokenIndex const & | V, | |||
std::string const & | key | |||
) | [inline] |
Definition at line 368 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA_tree_iterator< TKN >::extend(), ugdiss::TSA_tree_iterator< TKN >::lower, and ugdiss::TSA_tree_iterator< TKN >::upper.
double ugdiss::TSA_tree_iterator< TKN >::approxOccurrenceCount | ( | int | p = -1 |
) | const [inline] |
Definition at line 173 of file ug_tsa_tree_iterator.h.
Referenced by Moses::bitext::Bitext< TKN >::agenda::job::check_sample_distribution(), dump(), getoccs(), lookup_phrases(), Moses::bitext::Bitext< TKN >::prep2(), and show().
size_t ugdiss::TSA_tree_iterator< TKN >::arrayByteSpanSize | ( | int | p = -1 |
) | const [inline] |
Definition at line 139 of file ug_tsa_tree_iterator.h.
Referenced by ugdiss::TSA_tree_iterator< TKN >::SortByApproximateCount::operator()().
double ugdiss::TSA_tree_iterator< TKN >::ca | ( | int | p = -1 |
) | const [inline] |
Definition at line 159 of file ug_tsa_tree_iterator.h.
bool ugdiss::TSA_tree_iterator< TSA_TYPE >::down | ( | ) | [inline, virtual] |
Definition at line 204 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA< TKN >::arrayEnd(), ugdiss::TSA< TKN >::arrayStart(), ugdiss::TSA< TKN >::corpus, ugdiss::TSA< TKN >::find_end(), ugdiss::TSA< TKN >::find_longer(), ugdiss::TSA_tree_iterator< TKN >::getToken(), ugdiss::TSA< TKN >::getUpperBound(), ugdiss::TSA_tree_iterator< TKN >::lower, ugdiss::next(), ugdiss::TSA< TKN >::readEntry(), ugdiss::TSA_tree_iterator< TKN >::root, ugdiss::TSA_tree_iterator< TKN >::size(), u(), ugdiss::TSA_tree_iterator< TKN >::up(), ugdiss::TSA_tree_iterator< TKN >::upper, and ugdiss::TSA< TKN >::upper_bound().
Referenced by dump().
bool ugdiss::TSA_tree_iterator< Token >::extend | ( | id_type | id | ) | [inline, virtual] |
Definition at line 460 of file ug_tsa_tree_iterator.h.
bool ugdiss::TSA_tree_iterator< Token >::extend | ( | Token const & | id | ) | [inline, virtual] |
Definition at line 469 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA< TKN >::corpus, ugdiss::TSA< TKN >::find_end(), ugdiss::TSA< TKN >::find_start(), ugdiss::TSA< TKN >::getLowerBound(), ugdiss::TSA_tree_iterator< TKN >::getToken(), ugdiss::TSA< TKN >::getUpperBound(), I, ugdiss::TSA_tree_iterator< TKN >::lower, ugdiss::TSA< TKN >::readEntry(), ugdiss::TSA_tree_iterator< TKN >::root, and ugdiss::TSA_tree_iterator< TKN >::upper.
Referenced by lookup_phrases(), and ugdiss::TSA_tree_iterator< TKN >::TSA_tree_iterator().
count_type ugdiss::TSA_tree_iterator< Token >::fillBitSet | ( | boost::dynamic_bitset< uint64_t > & | bitset | ) | const [inline] |
Definition at line 627 of file ug_tsa_tree_iterator.h.
bitvector & ugdiss::TSA_tree_iterator< Token >::filterSentences | ( | bitvector & | foo | ) | const [inline] |
Definition at line 888 of file ug_tsa_tree_iterator.h.
ushort ugdiss::TSA_tree_iterator< TKN >::getOffset | ( | int | p | ) | const |
uint64_t ugdiss::TSA_tree_iterator< Token >::getPid | ( | int | p = -1 |
) | const [inline] |
Definition at line 531 of file ug_tsa_tree_iterator.h.
References sid.
Referenced by Moses::bitext::Bitext< TKN >::agenda::job::check_sample_distribution(), lookup_phrases(), and Moses::bitext::Bitext< TKN >::prep2().
uint64_t ugdiss::TSA_tree_iterator< Token >::getSequenceId | ( | ) | const [inline] |
Definition at line 770 of file ug_tsa_tree_iterator.h.
References I, and ugdiss::TSA< TKN >::readEntry().
id_type ugdiss::TSA_tree_iterator< Token >::getSid | ( | ) | const [inline] |
Definition at line 517 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA< TKN >::endArray, ugdiss::TSA_tree_iterator< TKN >::lower, ugdiss::TSA< TKN >::readSid(), ugdiss::TSA_tree_iterator< TKN >::root, sid, ugdiss::TSA< TKN >::startArray, and ugdiss::TSA_tree_iterator< TKN >::upper.
Token const * ugdiss::TSA_tree_iterator< Token >::getToken | ( | int | p | ) | const [inline] |
Definition at line 575 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA< TKN >::corpus, ugdiss::TSA_tree_iterator< TKN >::lower, ugdiss::next(), NULL, and ugdiss::TSA_tree_iterator< TKN >::root.
Referenced by ugdiss::TSA_tree_iterator< TKN >::down(), ugdiss::TSA_tree_iterator< TKN >::extend(), ugdiss::TSA_tree_iterator< TKN >::markSequence(), ugdiss::TSA_tree_iterator< TKN >::match(), show(), ugdiss::TSA_tree_iterator< TKN >::str(), and ugdiss::TSA_tree_iterator< TKN >::TSA_tree_iterator().
size_t ugdiss::TSA_tree_iterator< TKN >::grow | ( | Token const * | snt, | |
bitvector const & | cov | |||
) | [inline] |
Definition at line 184 of file ug_tsa_tree_iterator.h.
size_t ugdiss::TSA_tree_iterator< TKN >::grow | ( | Token const * | t, | |
Token const * | stop | |||
) | [inline] |
Definition at line 178 of file ug_tsa_tree_iterator.h.
char const * ugdiss::TSA_tree_iterator< Token >::lower_bound | ( | int | p | ) | const [inline] |
Definition at line 548 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA_tree_iterator< TKN >::lower.
Referenced by getoccs().
count_type ugdiss::TSA_tree_iterator< Token >::markEndOfSequence | ( | Token const * | start, | |
Token const * | stop, | |||
boost::dynamic_bitset< uint64_t > & | dest | |||
) | const [inline] |
Definition at line 710 of file ug_tsa_tree_iterator.h.
count_type ugdiss::TSA_tree_iterator< Token >::markOccurrences | ( | std::vector< ushort > & | dest | ) | const [inline] |
Definition at line 680 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA< TKN >::corpus, ugdiss::TSA_tree_iterator< TKN >::lower, ugdiss::TSA< TKN >::readOffset(), ugdiss::TSA< TKN >::readSid(), ugdiss::TSA_tree_iterator< TKN >::root, sid, and ugdiss::TSA_tree_iterator< TKN >::upper.
count_type ugdiss::TSA_tree_iterator< Token >::markOccurrences | ( | boost::dynamic_bitset< uint64_t > & | bitset, | |
bool | markOnlyStartPosition = false | |||
) | const [inline] |
Definition at line 664 of file ug_tsa_tree_iterator.h.
count_type ugdiss::TSA_tree_iterator< Token >::markSentences | ( | boost::dynamic_bitset< uint64_t > & | bitset | ) | const [inline] |
Definition at line 637 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA< TKN >::corpus, ugdiss::TSA_tree_iterator< TKN >::lower, ugdiss::TSA< TKN >::readOffset(), ugdiss::TSA< TKN >::readSid(), ugdiss::TSA_tree_iterator< TKN >::root, sid, and ugdiss::TSA_tree_iterator< TKN >::upper.
count_type ugdiss::TSA_tree_iterator< Token >::markSequence | ( | Token const * | start, | |
Token const * | stop, | |||
bitvector & | dest | |||
) | const [inline] |
Definition at line 742 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA_tree_iterator< TKN >::getToken(), and ugdiss::TSA_tree_iterator< TKN >::size().
bool ugdiss::TSA_tree_iterator< Token >::match | ( | id_type | sid | ) | const [inline] |
Definition at line 853 of file ug_tsa_tree_iterator.h.
bool ugdiss::TSA_tree_iterator< Token >::match | ( | Token const * | start, | |
Token const * | stop | |||
) | const [inline] |
Definition at line 828 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA_tree_iterator< TKN >::getToken(), ugdiss::TSA_tree_iterator< TKN >::lower, and ugdiss::L2R_Token< T >::next().
bool ugdiss::TSA_tree_iterator< Token >::over | ( | ) | [inline, virtual] |
Definition at line 253 of file ug_tsa_tree_iterator.h.
Referenced by dump().
SPTR< std::vector< typename ttrack::Position > > ugdiss::TSA_tree_iterator< Token >::randomSample | ( | int | level, | |
size_t | N | |||
) | const [inline] |
randomly select up to N occurrences of the sequence
Definition at line 912 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA< TKN >::aveIndexEntrySize(), I, ugdiss::TSA_tree_iterator< TKN >::lower, m, util::rand_excl(), ugdiss::TSA< TKN >::readEntry(), ugdiss::TSA_tree_iterator< TKN >::root, and ugdiss::TSA_tree_iterator< TKN >::upper.
size_t ugdiss::TSA_tree_iterator< Token >::rawCnt | ( | int | p = -1 |
) | const [inline] |
Definition at line 614 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA< TKN >::getCorpusSize(), ugdiss::TSA_tree_iterator< TKN >::lower, ugdiss::TSA< TKN >::rawCnt(), ugdiss::TSA_tree_iterator< TKN >::root, and ugdiss::TSA_tree_iterator< TKN >::upper.
void ugdiss::TSA_tree_iterator< TKN >::showBounds | ( | std::ostream & | out | ) | const [protected] |
size_t ugdiss::TSA_tree_iterator< Token >::size | ( | ) | const [inline] |
Definition at line 507 of file ug_tsa_tree_iterator.h.
Referenced by ugdiss::TSA_tree_iterator< TKN >::down(), dump(), ugdiss::TSA_tree_iterator< TKN >::markSequence(), ugdiss::TSA_tree_iterator< TKN >::SortByApproximateCount::operator()(), show(), ugdiss::TSA_tree_iterator< TKN >::str(), and ugdiss::TSA_tree_iterator< TKN >::TSA_tree_iterator().
size_t ugdiss::TSA_tree_iterator< Token >::sntCnt | ( | int | p = -1 |
) | const [inline] |
Definition at line 600 of file ug_tsa_tree_iterator.h.
std::string ugdiss::TSA_tree_iterator< Token >::str | ( | TokenIndex const * | V = NULL , |
|
int | start = 0 , |
|||
int | stop = 0 | |||
) | const [inline] |
Definition at line 782 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA_tree_iterator< TKN >::getToken(), and ugdiss::TSA_tree_iterator< TKN >::size().
Referenced by show().
void ugdiss::TSA_tree_iterator< Token >::tfAndRoot | ( | bitvector const & | ref, | |
bitvector const & | snt, | |||
bitvector & | dest | |||
) | const [inline] |
a special auxiliary function for finding trees
Definition at line 864 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA< TKN >::corpus, I, ugdiss::TSA_tree_iterator< TKN >::lower, ugdiss::TSA< TKN >::readEntry(), ugdiss::TSA_tree_iterator< TKN >::root, and ugdiss::TSA_tree_iterator< TKN >::upper.
bool ugdiss::TSA_tree_iterator< Token >::up | ( | ) | [inline, virtual] |
Definition at line 318 of file ug_tsa_tree_iterator.h.
References ugdiss::TSA_tree_iterator< TKN >::lower, and ugdiss::TSA_tree_iterator< TKN >::upper.
Referenced by ugdiss::TSA_tree_iterator< TKN >::down(), and dump().
char const * ugdiss::TSA_tree_iterator< Token >::upper_bound | ( | int | p | ) | const [inline] |
Definition at line 560 of file ug_tsa_tree_iterator.h.
Referenced by getoccs().
std::vector<char const*> ugdiss::TSA_tree_iterator< TKN >::lower [protected] |
Definition at line 50 of file ug_tsa_tree_iterator.h.
Referenced by ugdiss::TSA_tree_iterator< TKN >::down(), ugdiss::TSA_tree_iterator< TKN >::extend(), ugdiss::TSA_tree_iterator< TKN >::getSid(), ugdiss::TSA_tree_iterator< TKN >::getToken(), ugdiss::TSA_tree_iterator< TKN >::lower_bound(), ugdiss::TSA_tree_iterator< TKN >::markOccurrences(), ugdiss::TSA_tree_iterator< TKN >::markSentences(), ugdiss::TSA_tree_iterator< TKN >::match(), ugdiss::TSA_tree_iterator< TKN >::randomSample(), ugdiss::TSA_tree_iterator< TKN >::rawCnt(), ugdiss::TSA_tree_iterator< TKN >::tfAndRoot(), ugdiss::TSA_tree_iterator< TKN >::TSA_tree_iterator(), and ugdiss::TSA_tree_iterator< TKN >::up().
TSA<Token> const* ugdiss::TSA_tree_iterator< TKN >::root |
Definition at line 58 of file ug_tsa_tree_iterator.h.
Referenced by Moses::bitext::Bitext< TKN >::agenda::job::check_sample_distribution(), ugdiss::TSA_tree_iterator< TKN >::down(), ugdiss::TSA_tree_iterator< TKN >::extend(), getoccs(), ugdiss::TSA_tree_iterator< TKN >::getSid(), ugdiss::TSA_tree_iterator< TKN >::getToken(), ugdiss::TSA_tree_iterator< TKN >::markOccurrences(), ugdiss::TSA_tree_iterator< TKN >::markSentences(), Moses::bitext::Bitext< TKN >::prep2(), ugdiss::TSA_tree_iterator< TKN >::randomSample(), ugdiss::TSA_tree_iterator< TKN >::rawCnt(), ugdiss::TSA_tree_iterator< TKN >::tfAndRoot(), and ugdiss::TSA_tree_iterator< TKN >::TSA_tree_iterator().
std::vector<char const*> ugdiss::TSA_tree_iterator< TKN >::upper [protected] |
Definition at line 51 of file ug_tsa_tree_iterator.h.
Referenced by ugdiss::TSA_tree_iterator< TKN >::down(), ugdiss::TSA_tree_iterator< TKN >::extend(), ugdiss::TSA_tree_iterator< TKN >::getSid(), ugdiss::TSA_tree_iterator< TKN >::markOccurrences(), ugdiss::TSA_tree_iterator< TKN >::markSentences(), ugdiss::TSA_tree_iterator< TKN >::randomSample(), ugdiss::TSA_tree_iterator< TKN >::rawCnt(), ugdiss::TSA_tree_iterator< TKN >::tfAndRoot(), ugdiss::TSA_tree_iterator< TKN >::TSA_tree_iterator(), and ugdiss::TSA_tree_iterator< TKN >::up().