#include <ug_im_tsa.h>
Classes | |
class | tree_iterator |
Public Member Functions | |
imTSA () | |
imTSA (boost::shared_ptr< Ttrack< TOKEN > const > c, bdBitset const *filt, std::ostream *log=NULL, size_t threads=0) | |
imTSA (imTSA< TOKEN > const &prior, boost::shared_ptr< imTtrack< TOKEN > const > const &crp, std::vector< id_type > const &newsids, size_t const vsize) | |
count_type | sntCnt (char const *p, char const *const q) const |
count_type | rawCnt (char const *p, char const *const q) const |
void | getCounts (char const *p, char const *const q, count_type &sids, count_type &raw) const |
char const * | readSid (char const *p, char const *q, id_type &sid) const |
char const * | readSid (char const *p, char const *q,::uint64_t &sid) const |
char const * | readOffset (char const *p, char const *q, uint16_t &offset) const |
char const * | readOffset (char const *p, char const *q,::uint64_t &offset) const |
void | sanityCheck () const |
void | save_as_mm_tsa (std::string fname) const |
Friends | |
class | tree_iterator |
Definition at line 57 of file ug_im_tsa.h.
sapt::imTSA< TOKEN >::imTSA | ( | ) | [inline] |
Definition at line 154 of file ug_im_tsa.h.
References sapt::TSA< TOKEN >::BitSetCachingThreshold, sapt::TSA< TOKEN >::corpusSize, sapt::TSA< TOKEN >::endArray, sapt::TSA< TOKEN >::indexSize, NULL, and sapt::TSA< TOKEN >::startArray.
sapt::imTSA< TOKEN >::imTSA | ( | boost::shared_ptr< Ttrack< TOKEN > const > | c, | |
bdBitset const * | filt, | |||
std::ostream * | log = NULL , |
|||
size_t | threads = 0 | |||
) | [inline] |
Definition at line 168 of file ug_im_tsa.h.
References sapt::TSA< TOKEN >::corpus, sapt::TSA< TOKEN >::corpusSize, sapt::TSA< TOKEN >::endArray, index, sapt::TSA< TOKEN >::indexSize, k, m, sapt::TSA< TOKEN >::numTokens, sapt::ttrack::Position::offset, sapt::ttrack::Position::sid, sid, sorter, sapt::TSA< TOKEN >::startArray, stop, and util::WallTime().
sapt::imTSA< TOKEN >::imTSA | ( | imTSA< TOKEN > const & | prior, | |
boost::shared_ptr< imTtrack< TOKEN > const > const & | crp, | |||
std::vector< id_type > const & | newsids, | |||
size_t const | vsize | |||
) | [inline] |
Definition at line 408 of file ug_im_tsa.h.
References sapt::TSA< TOKEN >::corpus, sapt::TSA< TOKEN >::corpusSize, sapt::TSA< TOKEN >::endArray, index, k, n, sapt::TSA< TOKEN >::numTokens, sapt::ttrack::Position::offset, sapt::ttrack::Position::sid, sid, sort(), sorter, and sapt::TSA< TOKEN >::startArray.
void sapt::imTSA< TOKEN >::getCounts | ( | char const * | p, | |
char const *const | q, | |||
count_type & | sids, | |||
count_type & | raw | |||
) | const [inline, virtual] |
get both sentence and word counts.
Avoids having to go over the byte range representing the range of suffixes in question twice when dealing with memory-mapped suffix arrays.
Implements sapt::TSA< TOKEN >.
Definition at line 360 of file ug_im_tsa.h.
References sapt::check(), sapt::TSA< TOKEN >::corpus, sapt::ttrack::Position::sid, and sid.
count_type sapt::imTSA< TOKEN >::rawCnt | ( | char const * | p, | |
char const *const | q | |||
) | const [inline, virtual] |
Implements sapt::TSA< TOKEN >.
Definition at line 350 of file ug_im_tsa.h.
char const * sapt::imTSA< TOKEN >::readOffset | ( | char const * | p, | |
char const * | q, | |||
::uint64_t & | offset | |||
) | const [inline, virtual] |
char const * sapt::imTSA< TOKEN >::readOffset | ( | char const * | p, | |
char const * | q, | |||
uint16_t & | offset | |||
) | const [inline, virtual] |
read the offset part of the index entry into /offset/
Implements sapt::TSA< TOKEN >.
Definition at line 328 of file ug_im_tsa.h.
char const * sapt::imTSA< TOKEN >::readSid | ( | char const * | p, | |
char const * | q, | |||
::uint64_t & | sid | |||
) | const [inline, virtual] |
char const * sapt::imTSA< TOKEN >::readSid | ( | char const * | p, | |
char const * | q, | |||
id_type & | sid | |||
) | const [inline, virtual] |
read the sentence ID into /sid/
Implements sapt::TSA< TOKEN >.
Definition at line 306 of file ug_im_tsa.h.
void sapt::imTSA< TOKEN >::sanityCheck | ( | ) | const |
void sapt::imTSA< TOKEN >::save_as_mm_tsa | ( | std::string | fname | ) | const [inline] |
Definition at line 380 of file ug_im_tsa.h.
References index, k, tpt::numwrite(), sid, and tpt::tightwrite().
Referenced by build_mmTSA().
count_type sapt::imTSA< TOKEN >::sntCnt | ( | char const * | p, | |
char const *const | q | |||
) | const |
friend class tree_iterator [friend] |