#include <tpt_tokenindex.h>
Classes | |
class | CompFunc |
class | Entry |
Public Member Functions | |
TokenIndex (std::string unkToken="UNK") | |
void | open (std::string fname, std::string unkToken="UNK", bool dyna=false) |
void | close () |
id_type | operator[] (char const *w) const |
id_type | operator[] (std::string const &w) const |
char const *const | operator[] (id_type id) const |
char const *const | operator[] (id_type id) |
std::vector< char const * > | reverseIndex () const |
std::string | toString (std::vector< id_type > const &v) |
std::string | toString (std::vector< id_type > const &v) const |
std::string | toString (id_type const *start, id_type const *const stop) |
std::string | toString (id_type const *start, id_type const *const stop) const |
std::vector< id_type > | toIdSeq (std::string const &line) const |
bool | fillIdSeq (std::string const &line, std::vector< id_type > &v) const |
void | iniReverseIndex () |
id_type | getNumTokens () const |
id_type | getUnkId () const |
id_type | knownVocabSize () const |
id_type | totalVocabSize () const |
id_type | ksize () const |
id_type | tsize () const |
char const *const | getUnkToken () const |
void | write (std::string fname) |
bool | isDynamic () const |
bool | setDynamic (bool onoff) |
void | setUnkLabel (std::string unk) |
Public Attributes | |
bio::mapped_file_source | file |
Entry const * | startIdx |
Entry const * | endIdx |
CompFunc | comp |
Definition at line 28 of file tpt_tokenindex.h.
sapt::TokenIndex::TokenIndex | ( | std::string | unkToken = "UNK" |
) |
void sapt::TokenIndex::close | ( | ) |
Definition at line 77 of file tpt_tokenindex.cc.
References file.
Referenced by write_tokenindex().
bool sapt::TokenIndex::fillIdSeq | ( | std::string const & | line, | |
std::vector< id_type > & | v | |||
) | const |
id_type sapt::TokenIndex::getNumTokens | ( | ) | const |
Definition at line 294 of file tpt_tokenindex.cc.
id_type sapt::TokenIndex::getUnkId | ( | ) | const |
Definition at line 301 of file tpt_tokenindex.cc.
char const *const sapt::TokenIndex::getUnkToken | ( | ) | const |
Definition at line 308 of file tpt_tokenindex.cc.
void sapt::TokenIndex::iniReverseIndex | ( | ) |
Definition at line 174 of file tpt_tokenindex.cc.
References reverseIndex().
Referenced by main().
bool sapt::TokenIndex::isDynamic | ( | ) | const |
Definition at line 388 of file tpt_tokenindex.cc.
id_type sapt::TokenIndex::knownVocabSize | ( | ) | const |
Definition at line 316 of file tpt_tokenindex.cc.
Referenced by conservative_sort(), main(), and save_vocabs().
id_type sapt::TokenIndex::ksize | ( | ) | const |
Definition at line 323 of file tpt_tokenindex.cc.
Referenced by main(), Counter::operator()(), writeTable(), and writeTableHeader().
void sapt::TokenIndex::open | ( | std::string | fname, | |
std::string | unkToken = "UNK" , |
|||
bool | dyna = false | |||
) |
char const *const sapt::TokenIndex::operator[] | ( | id_type | id | ) |
Definition at line 186 of file tpt_tokenindex.cc.
References reverseIndex().
char const *const sapt::TokenIndex::operator[] | ( | id_type | id | ) | const |
Definition at line 154 of file tpt_tokenindex.cc.
References reverseIndex().
id_type sapt::TokenIndex::operator[] | ( | std::string const & | w | ) | const |
id_type sapt::TokenIndex::operator[] | ( | char const * | w | ) | const |
Definition at line 97 of file tpt_tokenindex.cc.
References sapt::TokenIndex::CompFunc::base, comp, endIdx, sapt::TokenIndex::Entry::id, sapt::TokenIndex::Entry::offset, and startIdx.
vector< char const * > sapt::TokenIndex::reverseIndex | ( | ) | const |
Definition at line 134 of file tpt_tokenindex.cc.
References sapt::TokenIndex::CompFunc::base, comp, endIdx, NULL, and startIdx.
Referenced by iniReverseIndex(), main(), operator[](), and toString().
bool sapt::TokenIndex::setDynamic | ( | bool | onoff | ) |
Definition at line 395 of file tpt_tokenindex.cc.
References NULL.
Referenced by main(), and open_vocab().
void sapt::TokenIndex::setUnkLabel | ( | std::string | unk | ) |
std::vector<id_type> sapt::TokenIndex::toIdSeq | ( | std::string const & | line | ) | const |
string sapt::TokenIndex::toString | ( | id_type const * | start, | |
id_type const *const | stop | |||
) | const |
Definition at line 250 of file tpt_tokenindex.cc.
References reverseIndex().
string sapt::TokenIndex::toString | ( | id_type const * | start, | |
id_type const *const | stop | |||
) |
Definition at line 233 of file tpt_tokenindex.cc.
References reverseIndex().
std::string sapt::TokenIndex::toString | ( | std::vector< id_type > const & | v | ) | const |
std::string sapt::TokenIndex::toString | ( | std::vector< id_type > const & | v | ) |
id_type sapt::TokenIndex::totalVocabSize | ( | ) | const |
Definition at line 330 of file tpt_tokenindex.cc.
References tsize().
Referenced by conservative_sort(), ini_cnt_vec(), main(), remap(), and save_vocabs().
id_type sapt::TokenIndex::tsize | ( | ) | const |
Definition at line 335 of file tpt_tokenindex.cc.
References NULL.
Referenced by sapt::snt_adder< L2R_Token< SimpleWordId > >::operator()(), and totalVocabSize().
void sapt::TokenIndex::write | ( | std::string | fname | ) |
Entry const* sapt::TokenIndex::endIdx |
bio::mapped_file_source sapt::TokenIndex::file |
Entry const* sapt::TokenIndex::startIdx |