#include <ug_mm_tsa.h>
Public Types | |
typedef TSA< TOKEN >::tree_iterator | tree_iterator |
Public Member Functions | |
mmTSA () | |
mmTSA (std::string fname, Ttrack< TOKEN > const *c) | |
void | open (std::string fname, typename boost::shared_ptr< Ttrack< TOKEN > const > c) |
count_type | sntCnt (char const *p, char const *const q) const |
count_type | rawCnt (char const *p, char const *const q) const |
void | getCounts (char const *p, char const *const q, count_type &sids, count_type &raw) const |
char const * | readSid (char const *p, char const *q, id_type &sid) const |
char const * | readSid (char const *p, char const *q,::uint64_t &sid) const |
char const * | readOffset (char const *p, char const *q, uint16_t &offset) const |
char const * | readOffset (char const *p, char const *q,::uint64_t &offset) const |
void | sanityCheck () const |
Public Attributes | |
filepos_type const * | index |
Friends | |
class | TSA_tree_iterator< TOKEN > |
Definition at line 26 of file ug_mm_tsa.h.
typedef TSA<TOKEN>::tree_iterator ugdiss::mmTSA< TOKEN >::tree_iterator |
ugdiss::mmTSA< TOKEN >::mmTSA | ( | ) | [inline] |
Definition at line 101 of file ug_mm_tsa.h.
References ugdiss::TSA< TOKEN >::BitSetCachingThreshold, ugdiss::TSA< TOKEN >::endArray, NULL, and ugdiss::TSA< TOKEN >::startArray.
ugdiss::mmTSA< TOKEN >::mmTSA | ( | std::string | fname, | |
Ttrack< TOKEN > const * | c | |||
) | [inline] |
Definition at line 112 of file ug_mm_tsa.h.
References ugdiss::mmTSA< TOKEN >::open().
void ugdiss::mmTSA< TOKEN >::getCounts | ( | char const * | p, | |
char const *const | q, | |||
count_type & | sids, | |||
count_type & | raw | |||
) | const [inline, virtual] |
get both sentence and word counts.
Avoids having to go over the byte range representing the range of suffixes in question twice when dealing with memory-mapped suffix arrays.
Implements ugdiss::TSA< TOKEN >.
Definition at line 241 of file ug_mm_tsa.h.
References MosesServer::check(), ugdiss::TSA< TOKEN >::corpus, sid, and ugdiss::tightread().
void ugdiss::mmTSA< TOKEN >::open | ( | std::string | fname, | |
typename boost::shared_ptr< Ttrack< TOKEN > const > | c | |||
) | [inline] |
Definition at line 122 of file ug_mm_tsa.h.
References ugdiss::TSA< TOKEN >::bsc, ugdiss::TSA< TOKEN >::corpus, ugdiss::TSA< TOKEN >::corpusSize, ugdiss::TSA< TOKEN >::endArray, ugdiss::mmTSA< TOKEN >::index, ugdiss::TSA< TOKEN >::indexSize, ugdiss::numread(), ugdiss::TSA< TOKEN >::numTokens, Moses::prime(), and ugdiss::TSA< TOKEN >::startArray.
Referenced by main(), ugdiss::mmTSA< TOKEN >::mmTSA(), Moses::bitext::mmBitext< TKN >::open(), Moses::bitext::imBitext< TKN >::open(), and open_bitext().
count_type ugdiss::mmTSA< TOKEN >::rawCnt | ( | char const * | p, | |
char const *const | q | |||
) | const [inline, virtual] |
Implements ugdiss::TSA< TOKEN >.
Definition at line 223 of file ug_mm_tsa.h.
References sid, and ugdiss::tightread().
char const * ugdiss::mmTSA< TOKEN >::readOffset | ( | char const * | p, | |
char const * | q, | |||
::uint64_t & | offset | |||
) | const [inline, virtual] |
Implements ugdiss::TSA< TOKEN >.
Definition at line 213 of file ug_mm_tsa.h.
References ugdiss::tightread().
char const * ugdiss::mmTSA< TOKEN >::readOffset | ( | char const * | p, | |
char const * | q, | |||
uint16_t & | offset | |||
) | const [inline, virtual] |
read the offset part of the index entry into /offset/
Implements ugdiss::TSA< TOKEN >.
Definition at line 202 of file ug_mm_tsa.h.
References ugdiss::tightread().
char const * ugdiss::mmTSA< TOKEN >::readSid | ( | char const * | p, | |
char const * | q, | |||
::uint64_t & | sid | |||
) | const [inline, virtual] |
Implements ugdiss::TSA< TOKEN >.
Definition at line 191 of file ug_mm_tsa.h.
References ugdiss::tightread().
char const * ugdiss::mmTSA< TOKEN >::readSid | ( | char const * | p, | |
char const * | q, | |||
id_type & | sid | |||
) | const [inline, virtual] |
read the sentence ID into /sid/
Implements ugdiss::TSA< TOKEN >.
Definition at line 181 of file ug_mm_tsa.h.
References ugdiss::tightread().
void ugdiss::mmTSA< TOKEN >::sanityCheck | ( | ) | const |
count_type ugdiss::mmTSA< TOKEN >::sntCnt | ( | char const * | p, | |
char const *const | q | |||
) | const |
friend class TSA_tree_iterator< TOKEN > [friend] |
Definition at line 30 of file ug_mm_tsa.h.
filepos_type const* ugdiss::mmTSA< TOKEN >::index |