#include <ug_tsa_base.h>
Public Types | |
typedef TSA_tree_iterator< TKN > | tree_iterator |
typedef tsa::ArrayEntry | ArrayEntry |
typedef boost::shared_ptr < bitvector > | bitset_pointer |
typedef TKN | Token |
typedef BitSetCache< TSA< TKN > > | BSC_t |
Public Member Functions | |
virtual | ~TSA () |
char const * | arrayStart () const |
char const * | arrayEnd () const |
char const * | lower_bound (typename std::vector< TKN >::const_iterator const &keyStart, typename std::vector< TKN >::const_iterator const &keyStop) const |
char const * | lower_bound (TKN const *keyStart, TKN const *keyStop) const |
char const * | lower_bound (TKN const *keyStart, int keyLen) const |
char const * | upper_bound (typename std::vector< TKN >::const_iterator const &keyStart, typename std::vector< TKN >::const_iterator const &keyStop) const |
char const * | upper_bound (TKN const *keyStart, int keyLength) const |
void | dump (std::ostream &out, TokenIndex const &T) const |
count_type | fillBitSet (std::vector< TKN > const &phrase, bdBitset &dest) const |
count_type | fillBitSet (TKN const *key, size_t keyLen, bdBitset &dest) const |
count_type | setBits (char const *startRange, char const *endRange, boost::dynamic_bitset< uint64_t > &bs) const |
void | setTokenBits (char const *startRange, char const *endRange, size_t len, bitvector &bs) const |
virtual char const * | readSid (char const *p, char const *q, id_type &sid) const =0 |
virtual char const * | readSid (char const *p, char const *q,::uint64_t &sid) const =0 |
virtual char const * | readOffset (char const *p, char const *q, uint16_t &offset) const =0 |
virtual char const * | readOffset (char const *p, char const *q,::uint64_t &offset) const =0 |
count_type | sntCnt (char const *p, char const *const q) const |
count_type | rawCnt2 (TKN const *keyStart, size_t keyLen) const |
virtual count_type | rawCnt (char const *p, char const *const q) const =0 |
virtual void | getCounts (char const *p, char const *const q, count_type &sids, count_type &raw) const =0 |
std::string | suffixAt (char const *p, TokenIndex const *V=NULL, size_t maxlen=0) const |
std::string | suffixAt (ArrayEntry const &I, TokenIndex const *V=NULL, size_t maxlen=0) const |
tsa::ArrayEntry & | readEntry (char const *p, tsa::ArrayEntry &I) const |
char const * | dataEnd () const |
bool | sanityCheck1 () const |
::uint64_t | getSequenceId (typename std::vector< TKN >::const_iterator const &pstart, typename std::vector< TKN >::const_iterator const &pstop) const |
::uint64_t | getSequenceId (TKN const *t, ushort plen) const |
std::string | getSequence (::uint64_t pid, TokenIndex const &V) const |
std::vector< TKN > | getSequence (::uint64_t pid) const |
TKN const * | getSequenceStart (::uint64_t) const |
ushort | getSequenceLength (::uint64_t) const |
size_t | getCorpusSize () const |
Ttrack< TKN > const * | getCorpus () const |
bitset_pointer | getBitSet (TKN const *startKey, size_t keyLen) const |
find all instances of the tree described by [treeStart, treeEnd) | |
boost::shared_ptr< bitvector > | findTree (TKN const *treeStart, TKN const *treeEnd, bitvector const *filter) const |
size_t | markOccurrences (char const *lo, char const *up, size_t len, bitvector &bitset, bool markOnlyStartPosition) const |
bool | findBranches (TKN const *base, bitvector const &terminals, std::vector< tree_iterator > &dest) const |
double | aveIndexEntrySize () const |
SPTR< TSA_tree_iterator< TKN > > | find (TKN const *start, size_t len) const |
Public Attributes | |
boost::shared_ptr< BSC_t > | bsc |
Protected Member Functions | |
virtual char const * | index_jump (char const *startRange, char const *stopRange, float fraction) const =0 |
char const * | find_start (char const *lo, char const *const upX, TKN const *const refStart, int refLen, size_t d) const |
char const * | find_end (char const *lo, char const *const upX, TKN const *const refStart, int refLen, size_t d) const |
char const * | find_longer (char const *lo, char const *const upX, TKN const *const refStart, int refLen, size_t d) const |
virtual char const * | getLowerBound (id_type id) const =0 |
virtual char const * | getUpperBound (id_type id) const =0 |
Protected Attributes | |
boost::shared_ptr< Ttrack< TKN > const > | corpus |
char const * | startArray |
char const * | endArray |
size_t | corpusSize |
id_type | numTokens |
id_type | indexSize |
size_t | BitSetCachingThreshold |
Friends | |
class | TSA_tree_iterator< TKN > |
Token types (TKN) must provide a number of functions, see the class SimpleWordId (as a simple example of a "core token base class") and the template class L2R_Token (a class derived from its template parameter (e.g. SimpleWordId) that handles the ordering of sequences. Both are decleared/defined in ug_corpus_token.{h|cc}
Definition at line 46 of file ug_tsa_base.h.
typedef tsa::ArrayEntry ugdiss::TSA< TKN >::ArrayEntry |
Definition at line 52 of file ug_tsa_base.h.
typedef boost::shared_ptr<bitvector> ugdiss::TSA< TKN >::bitset_pointer |
Definition at line 56 of file ug_tsa_base.h.
typedef BitSetCache<TSA<TKN> > ugdiss::TSA< TKN >::BSC_t |
Definition at line 58 of file ug_tsa_base.h.
typedef TKN ugdiss::TSA< TKN >::Token |
Definition at line 57 of file ug_tsa_base.h.
typedef TSA_tree_iterator<TKN> ugdiss::TSA< TKN >::tree_iterator |
Reimplemented in ugdiss::mmTSA< TOKEN >, ugdiss::imTSA< TOKEN >, and ugdiss::mmTSA< Token >.
Definition at line 49 of file ug_tsa_base.h.
virtual ugdiss::TSA< TKN >::~TSA | ( | ) | [inline, virtual] |
Definition at line 49 of file ug_tsa_base.h.
char const* ugdiss::TSA< TKN >::arrayEnd | ( | ) | const [inline] |
Definition at line 145 of file ug_tsa_base.h.
Referenced by ugdiss::TSA_tree_iterator< TKN >::down(), and ugdiss::TSA< TKN >::upper_bound().
char const* ugdiss::TSA< TKN >::arrayStart | ( | ) | const [inline] |
Definition at line 144 of file ug_tsa_base.h.
Referenced by ugdiss::TSA_tree_iterator< TKN >::down().
double ugdiss::TSA< TKN >::aveIndexEntrySize | ( | ) | const [inline] |
Definition at line 313 of file ug_tsa_base.h.
Referenced by ugdiss::TSA_tree_iterator< Token >::ca(), and ugdiss::TSA_tree_iterator< TKN >::randomSample().
char const* ugdiss::TSA< TKN >::dataEnd | ( | ) | const |
return pointer to the end of the data block
void ugdiss::TSA< TKN >::dump | ( | std::ostream & | out, | |
TokenIndex const & | T | |||
) | const |
dump all suffixes in order to /out/
count_type ugdiss::TSA< TKN >::fillBitSet | ( | TKN const * | key, | |
size_t | keyLen, | |||
bdBitset & | dest | |||
) | const [inline] |
fill the dynamic bitset with information as to which sentences the phrase occurs in
Definition at line 375 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::corpus, ugdiss::TSA< TKN >::lower_bound(), ugdiss::TSA< TKN >::setBits(), ugdiss::up(), and ugdiss::TSA< TKN >::upper_bound().
count_type ugdiss::TSA< TKN >::fillBitSet | ( | std::vector< TKN > const & | key, | |
bdBitset & | dest | |||
) | const [inline] |
fill the dynamic bit set with true for all sentences that contain /phrase/.
Definition at line 359 of file ug_tsa_base.h.
Referenced by ugdiss::TSA< TKN >::getBitSet().
SPTR<TSA_tree_iterator<TKN> > ugdiss::TSA< TKN >::find | ( | TKN const * | start, | |
size_t | len | |||
) | const [inline] |
Definition at line 321 of file ug_tsa_base.h.
char const * ugdiss::TSA< TKN >::find_end | ( | char const * | lo, | |
char const *const | upX, | |||
TKN const *const | refStart, | |||
int | refLen, | |||
size_t | d | |||
) | const [inline, protected] |
return the index position of the first item that is greater than [refStart,refStart+refLen) and does not include it as a prefix
return the upper bound (first entry beyond) of the token range matching [startKey,endKey)
Definition at line 490 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::corpus, I, ugdiss::TSA< TKN >::index_jump(), NULL, ugdiss::TSA< TKN >::readEntry(), and ugdiss::up().
Referenced by ugdiss::TSA_tree_iterator< TKN >::down(), ugdiss::TSA_tree_iterator< TKN >::extend(), and ugdiss::TSA< TKN >::upper_bound().
char const * ugdiss::TSA< TKN >::find_longer | ( | char const * | lo, | |
char const *const | upX, | |||
TKN const *const | refStart, | |||
int | refLen, | |||
size_t | d | |||
) | const [inline, protected] |
return the index position of the first item that is longer than [refStart,refStart+refLen) and includes it as a prefix
return the first entry that has the prefix [refStart,refStart+refLen) but continues on
Definition at line 525 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::corpus, I, ugdiss::TSA< TKN >::index_jump(), NULL, ugdiss::TSA< TKN >::readEntry(), and ugdiss::up().
Referenced by ugdiss::TSA_tree_iterator< TKN >::down().
char const * ugdiss::TSA< TKN >::find_start | ( | char const * | lo, | |
char const *const | upX, | |||
TKN const *const | refStart, | |||
int | refLen, | |||
size_t | d | |||
) | const [inline, protected] |
return the index position of the first item that is equal to or includes [refStart,refStart+refLen) as a prefix
return the lower bound (first matching entry) of the token range matching [startKey,endKey)
Definition at line 457 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::corpus, I, ugdiss::TSA< TKN >::index_jump(), NULL, ugdiss::TSA< TKN >::readEntry(), and ugdiss::up().
Referenced by ugdiss::TSA_tree_iterator< TKN >::extend(), and ugdiss::TSA< TKN >::lower_bound().
bool ugdiss::TSA< TKN >::findBranches | ( | TKN const * | base, | |
bitvector const & | terminals, | |||
std::vector< tree_iterator > & | dest | |||
) | const [inline] |
Definition at line 808 of file ug_tsa_base.h.
References k, sort(), and sorter.
boost::shared_ptr<bitvector> ugdiss::TSA< TKN >::findTree | ( | TKN const * | treeStart, | |
TKN const * | treeEnd, | |||
bitvector const * | filter | |||
) | const |
TSA< TKN >::bitset_pointer ugdiss::TSA< TKN >::getBitSet | ( | TKN const * | startKey, | |
size_t | keyLen | |||
) | const [inline] |
find all instances of the tree described by [treeStart, treeEnd)
Definition at line 764 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::bsc, ugdiss::TSA< TKN >::corpus, ugdiss::TSA< TKN >::fillBitSet(), and NULL.
Ttrack< TKN > const * ugdiss::TSA< TKN >::getCorpus | ( | ) | const [inline] |
Definition at line 738 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::corpus.
Referenced by fill(), ugdiss::BitSetCache< TSA >::get(), ugdiss::BitSetCache< TSA >::get2(), and nbest().
size_t ugdiss::TSA< TKN >::getCorpusSize | ( | ) | const [inline] |
Definition at line 728 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::corpusSize.
Referenced by ugdiss::TSA_tree_iterator< TKN >::rawCnt().
virtual void ugdiss::TSA< TKN >::getCounts | ( | char const * | p, | |
char const *const | q, | |||
count_type & | sids, | |||
count_type & | raw | |||
) | const [pure virtual] |
get both sentence and word counts.
Avoids having to go over the byte range representing the range of suffixes in question twice when dealing with memory-mapped suffix arrays.
Implemented in ugdiss::imTSA< TOKEN >, ugdiss::mmTSA< TOKEN >, and ugdiss::mmTSA< Token >.
virtual char const* ugdiss::TSA< TKN >::getLowerBound | ( | id_type | id | ) | const [protected, pure virtual] |
Returns a char const* pointing to the position in the data block where the first item starting with token /id/ is located.
Referenced by ugdiss::TSA_tree_iterator< TKN >::extend(), ugdiss::TSA< TKN >::lower_bound(), and ugdiss::TSA< TKN >::upper_bound().
std::vector< TKN > ugdiss::TSA< TKN >::getSequence | ( | ::uint64_t | pid | ) | const [inline] |
Return the phrase represented by phrase ID pid_
Definition at line 673 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::corpus.
std::string ugdiss::TSA< TKN >::getSequence | ( | ::uint64_t | pid, | |
TokenIndex const & | V | |||
) | const [inline] |
Return the phrase represented by phrase ID pid_
Definition at line 690 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::getSequenceLength(), and ugdiss::TSA< TKN >::getSequenceStart().
uint64_t ugdiss::TSA< TKN >::getSequenceId | ( | TKN const * | t, | |
ushort | plen | |||
) | const [inline] |
Definition at line 654 of file ug_tsa_base.h.
References I, ugdiss::TSA< TKN >::lower_bound(), and ugdiss::TSA< TKN >::readEntry().
uint64_t ugdiss::TSA< TKN >::getSequenceId | ( | typename std::vector< TKN >::const_iterator const & | pstart, | |
typename std::vector< TKN >::const_iterator const & | pstop | |||
) | const [inline] |
Return an ID that represents a given phrase; This should NEVER be 0! Structure of a phrase ID: leftmost 32 bits: sentence ID in the corpus next 16 bits: offset from the start of the sentence next 16 bits: length of the phrase
Definition at line 643 of file ug_tsa_base.h.
ushort ugdiss::TSA< TKN >::getSequenceLength | ( | ::uint64_t | pid | ) | const [inline] |
Definition at line 718 of file ug_tsa_base.h.
Referenced by ugdiss::TSA< TKN >::getSequence().
TKN const * ugdiss::TSA< TKN >::getSequenceStart | ( | ::uint64_t | pid | ) | const [inline] |
Definition at line 707 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::corpus.
Referenced by ugdiss::TSA< TKN >::getSequence().
virtual char const* ugdiss::TSA< TKN >::getUpperBound | ( | id_type | id | ) | const [protected, pure virtual] |
Referenced by ugdiss::TSA_tree_iterator< TKN >::down(), ugdiss::TSA_tree_iterator< TKN >::extend(), ugdiss::TSA< TKN >::lower_bound(), and ugdiss::TSA< TKN >::upper_bound().
virtual char const* ugdiss::TSA< TKN >::index_jump | ( | char const * | startRange, | |
char const * | stopRange, | |||
float | fraction | |||
) | const [protected, pure virtual] |
Referenced by ugdiss::TSA< TKN >::find_end(), ugdiss::TSA< TKN >::find_longer(), and ugdiss::TSA< TKN >::find_start().
char const * ugdiss::TSA< TKN >::lower_bound | ( | TKN const * | keyStart, | |
int | keyLen | |||
) | const [inline] |
Definition at line 584 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::find_start(), ugdiss::TSA< TKN >::getLowerBound(), ugdiss::TSA< TKN >::getUpperBound(), and ugdiss::TSA< TKN >::startArray.
char const * ugdiss::TSA< TKN >::lower_bound | ( | TKN const * | keyStart, | |
TKN const * | keyStop | |||
) | const [inline] |
returns the start position in the byte array representing the tightly packed sorted list of corpus positions for the given search phrase
Definition at line 575 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::lower_bound().
char const * ugdiss::TSA< TKN >::lower_bound | ( | typename std::vector< TKN >::const_iterator const & | keyStart, | |
typename std::vector< TKN >::const_iterator const & | keyStop | |||
) | const [inline] |
Definition at line 558 of file ug_tsa_base.h.
Referenced by ugdiss::TSA< TKN >::fillBitSet(), ugdiss::BitSetCache< TSA >::get(), ugdiss::BitSetCache< TSA >::get2(), ugdiss::TSA< TKN >::getSequenceId(), ugdiss::TSA< TKN >::lower_bound(), nbest(), and ugdiss::TSA< TKN >::rawCnt2().
size_t ugdiss::TSA< TKN >::markOccurrences | ( | char const * | lo, | |
char const * | up, | |||
size_t | len, | |||
bitvector & | bitset, | |||
bool | markOnlyStartPosition | |||
) | const [inline] |
Definition at line 782 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::corpus, ugdiss::TSA< TKN >::readOffset(), ugdiss::TSA< TKN >::readSid(), and sid.
virtual count_type ugdiss::TSA< TKN >::rawCnt | ( | char const * | p, | |
char const *const | q | |||
) | const [pure virtual] |
Implemented in ugdiss::imTSA< TOKEN >, ugdiss::mmTSA< TOKEN >, and ugdiss::mmTSA< Token >.
Referenced by main(), ugdiss::TSA_tree_iterator< TKN >::rawCnt(), and ugdiss::TSA< TKN >::rawCnt2().
count_type ugdiss::TSA< TKN >::rawCnt2 | ( | TKN const * | keyStart, | |
size_t | keyLen | |||
) | const [inline] |
Definition at line 630 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::lower_bound(), ugdiss::TSA< TKN >::rawCnt(), ugdiss::up(), and ugdiss::TSA< TKN >::upper_bound().
tsa::ArrayEntry & ugdiss::TSA< TKN >::readEntry | ( | char const * | p, | |
tsa::ArrayEntry & | I | |||
) | const [inline] |
Definition at line 748 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::corpus, ugdiss::TSA< TKN >::endArray, ugdiss::TSA< TKN >::readOffset(), and ugdiss::TSA< TKN >::readSid().
Referenced by ugdiss::TSA_tree_iterator< TKN >::down(), ugdiss::TSA_tree_iterator< TKN >::extend(), ugdiss::TSA< TKN >::find_end(), ugdiss::TSA< TKN >::find_longer(), ugdiss::TSA< TKN >::find_start(), getoccs(), ugdiss::TSA_tree_iterator< TKN >::getSequenceId(), ugdiss::TSA< TKN >::getSequenceId(), nbest(), ugdiss::TSA_tree_iterator< TKN >::randomSample(), ugdiss::TSA< TKN >::setTokenBits(), and ugdiss::TSA_tree_iterator< TKN >::tfAndRoot().
virtual char const* ugdiss::TSA< TKN >::readOffset | ( | char const * | p, | |
char const * | q, | |||
::uint64_t & | offset | |||
) | const [pure virtual] |
Implemented in ugdiss::imTSA< TOKEN >, ugdiss::mmTSA< TOKEN >, and ugdiss::mmTSA< Token >.
virtual char const* ugdiss::TSA< TKN >::readOffset | ( | char const * | p, | |
char const * | q, | |||
uint16_t & | offset | |||
) | const [pure virtual] |
read the offset part of the index entry into /offset/
Implemented in ugdiss::imTSA< TOKEN >, ugdiss::mmTSA< TOKEN >, and ugdiss::mmTSA< Token >.
Referenced by ugdiss::TSA_tree_iterator< TKN >::markOccurrences(), ugdiss::TSA< TKN >::markOccurrences(), ugdiss::TSA_tree_iterator< TKN >::markSentences(), ugdiss::TSA< TKN >::readEntry(), ugdiss::TSA< TKN >::setBits(), and ugdiss::TSA< TKN >::sntCnt().
virtual char const* ugdiss::TSA< TKN >::readSid | ( | char const * | p, | |
char const * | q, | |||
::uint64_t & | sid | |||
) | const [pure virtual] |
Implemented in ugdiss::imTSA< TOKEN >, ugdiss::mmTSA< TOKEN >, and ugdiss::mmTSA< Token >.
virtual char const* ugdiss::TSA< TKN >::readSid | ( | char const * | p, | |
char const * | q, | |||
id_type & | sid | |||
) | const [pure virtual] |
read the sentence ID into /sid/
Implemented in ugdiss::imTSA< TOKEN >, ugdiss::mmTSA< TOKEN >, and ugdiss::mmTSA< Token >.
Referenced by ugdiss::TSA_tree_iterator< TKN >::getSid(), ugdiss::TSA_tree_iterator< TKN >::markOccurrences(), ugdiss::TSA< TKN >::markOccurrences(), ugdiss::TSA_tree_iterator< TKN >::markSentences(), ugdiss::TSA< TKN >::readEntry(), ugdiss::TSA< TKN >::setBits(), and ugdiss::TSA< TKN >::sntCnt().
bool ugdiss::TSA< TKN >::sanityCheck1 | ( | ) | const |
count_type ugdiss::TSA< TKN >::setBits | ( | char const * | startRange, | |
char const * | endRange, | |||
boost::dynamic_bitset< uint64_t > & | bs | |||
) | const [inline] |
Definition at line 390 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::readOffset(), ugdiss::TSA< TKN >::readSid(), and sid.
Referenced by ugdiss::TSA< TKN >::fillBitSet().
void ugdiss::TSA< TKN >::setTokenBits | ( | char const * | startRange, | |
char const * | endRange, | |||
size_t | len, | |||
bitvector & | bs | |||
) | const [inline] |
Definition at line 412 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::corpus, I, ugdiss::L2R_Token< T >::next(), ugdiss::TSA< TKN >::readEntry(), ugdiss::L2R_Token< T >::stop(), and stop.
count_type ugdiss::TSA< TKN >::sntCnt | ( | char const * | p, | |
char const *const | q | |||
) | const [inline] |
Reimplemented in ugdiss::imTSA< TOKEN >, ugdiss::mmTSA< TOKEN >, and ugdiss::mmTSA< Token >.
Definition at line 436 of file ug_tsa_base.h.
References MosesServer::check(), ugdiss::TSA< TKN >::corpus, ugdiss::TSA< TKN >::readOffset(), ugdiss::TSA< TKN >::readSid(), and sid.
std::string ugdiss::TSA< TKN >::suffixAt | ( | ArrayEntry const & | I, | |
TokenIndex const * | V = NULL , |
|||
size_t | maxlen = 0 | |||
) | const |
std::string ugdiss::TSA< TKN >::suffixAt | ( | char const * | p, | |
TokenIndex const * | V = NULL , |
|||
size_t | maxlen = 0 | |||
) | const |
char const * ugdiss::TSA< TKN >::upper_bound | ( | TKN const * | keyStart, | |
int | keyLength | |||
) | const [inline] |
returns the upper bound in the byte array representing the tightly packed sorted list of corpus positions for the given search phrase (i.e., points just beyond the range)
Definition at line 617 of file ug_tsa_base.h.
References ugdiss::TSA< TKN >::arrayEnd(), ugdiss::TSA< TKN >::find_end(), ugdiss::TSA< TKN >::getLowerBound(), and ugdiss::TSA< TKN >::getUpperBound().
char const * ugdiss::TSA< TKN >::upper_bound | ( | typename std::vector< TKN >::const_iterator const & | keyStart, | |
typename std::vector< TKN >::const_iterator const & | keyStop | |||
) | const [inline] |
Definition at line 600 of file ug_tsa_base.h.
Referenced by ugdiss::TSA_tree_iterator< TKN >::down(), ugdiss::TSA< TKN >::fillBitSet(), ugdiss::BitSetCache< TSA >::get(), ugdiss::BitSetCache< TSA >::get2(), nbest(), and ugdiss::TSA< TKN >::rawCnt2().
friend class TSA_tree_iterator< TKN > [friend] |
Definition at line 62 of file ug_tsa_base.h.
size_t ugdiss::TSA< TKN >::BitSetCachingThreshold [protected] |
Definition at line 92 of file ug_tsa_base.h.
boost::shared_ptr<BSC_t> ugdiss::TSA< TKN >::bsc |
boost::shared_ptr<Ttrack<TKN> const> ugdiss::TSA< TKN >::corpus [protected] |
Definition at line 65 of file ug_tsa_base.h.
Referenced by ugdiss::TSA_tree_iterator< Token >::ca(), ugdiss::TSA_tree_iterator< TKN >::down(), ugdiss::TSA_tree_iterator< TKN >::extend(), ugdiss::TSA< TKN >::fillBitSet(), ugdiss::TSA< TKN >::find_end(), ugdiss::TSA< TKN >::find_longer(), ugdiss::TSA< TKN >::find_start(), ugdiss::TSA< TKN >::getBitSet(), ugdiss::TSA< TKN >::getCorpus(), ugdiss::TSA< TKN >::getSequence(), ugdiss::TSA< TKN >::getSequenceStart(), ugdiss::TSA_tree_iterator< TKN >::getToken(), ugdiss::TSA_tree_iterator< TKN >::markOccurrences(), ugdiss::TSA< TKN >::markOccurrences(), ugdiss::TSA_tree_iterator< TKN >::markSentences(), ugdiss::TSA< TKN >::readEntry(), ugdiss::TSA< TKN >::setTokenBits(), ugdiss::TSA< TKN >::sntCnt(), and ugdiss::TSA_tree_iterator< TKN >::tfAndRoot().
size_t ugdiss::TSA< TKN >::corpusSize [protected] |
char const* ugdiss::TSA< TKN >::endArray [protected] |
Definition at line 67 of file ug_tsa_base.h.
Referenced by ugdiss::TSA< TOKEN >::arrayEnd(), ugdiss::TSA< TOKEN >::aveIndexEntrySize(), ugdiss::TSA_tree_iterator< TKN >::getSid(), and ugdiss::TSA< TKN >::readEntry().
id_type ugdiss::TSA< TKN >::indexSize [protected] |
size of the corpus (in number of tokens) of the corpus underlying the sequence array.
ATTENTION: This number may differ from corpus->numTokens(), namely when the suffix array is based on a subset of the sentences of /corpus/.
Definition at line 89 of file ug_tsa_base.h.
id_type ugdiss::TSA< TKN >::numTokens [protected] |
size of the corpus (in number of sentences) of the corpus underlying the sequence array.
ATTENTION: This number may differ from corpus->size(), namely when the suffix array is based on a subset of the sentences of /corpus/.
Definition at line 80 of file ug_tsa_base.h.
Referenced by ugdiss::TSA< TOKEN >::aveIndexEntrySize().
char const* ugdiss::TSA< TKN >::startArray [protected] |
Definition at line 66 of file ug_tsa_base.h.
Referenced by ugdiss::TSA< TOKEN >::arrayStart(), ugdiss::TSA< TOKEN >::aveIndexEntrySize(), ugdiss::TSA_tree_iterator< TKN >::getSid(), and ugdiss::TSA< TKN >::lower_bound().