#include <tpt_tokenindex.h>
Classes | |
class | CompFunc |
class | Entry |
Public Member Functions | |
TokenIndex (std::string unkToken="UNK") | |
void | open (std::string fname, std::string unkToken="UNK", bool dyna=false) |
void | close () |
id_type | operator[] (char const *w) const |
id_type | operator[] (std::string const &w) const |
char const *const | operator[] (id_type id) const |
char const *const | operator[] (id_type id) |
std::vector< char const * > | reverseIndex () const |
std::string | toString (std::vector< id_type > const &v) |
std::string | toString (std::vector< id_type > const &v) const |
std::string | toString (id_type const *start, id_type const *const stop) |
std::string | toString (id_type const *start, id_type const *const stop) const |
std::vector< id_type > | toIdSeq (std::string const &line) const |
bool | fillIdSeq (std::string const &line, std::vector< id_type > &v) const |
void | iniReverseIndex () |
id_type | getNumTokens () const |
id_type | getUnkId () const |
id_type | knownVocabSize () const |
id_type | totalVocabSize () const |
id_type | ksize () const |
id_type | tsize () const |
char const *const | getUnkToken () const |
void | write (std::string fname) |
bool | isDynamic () const |
bool | setDynamic (bool onoff) |
void | setUnkLabel (std::string unk) |
Public Attributes | |
bio::mapped_file_source | file |
Entry const * | startIdx |
Entry const * | endIdx |
CompFunc | comp |
Definition at line 28 of file tpt_tokenindex.h.
ugdiss::TokenIndex::TokenIndex | ( | std::string | unkToken = "UNK" |
) |
void ugdiss::TokenIndex::close | ( | ) |
Definition at line 76 of file tpt_tokenindex.cc.
References file.
Referenced by write_tokenindex().
bool ugdiss::TokenIndex::fillIdSeq | ( | std::string const & | line, | |
std::vector< id_type > & | v | |||
) | const |
id_type ugdiss::TokenIndex::getNumTokens | ( | ) | const |
Definition at line 293 of file tpt_tokenindex.cc.
id_type ugdiss::TokenIndex::getUnkId | ( | ) | const |
Definition at line 300 of file tpt_tokenindex.cc.
char const *const ugdiss::TokenIndex::getUnkToken | ( | ) | const |
Definition at line 307 of file tpt_tokenindex.cc.
void ugdiss::TokenIndex::iniReverseIndex | ( | ) |
Definition at line 173 of file tpt_tokenindex.cc.
References reverseIndex().
Referenced by main().
bool ugdiss::TokenIndex::isDynamic | ( | ) | const |
Definition at line 387 of file tpt_tokenindex.cc.
id_type ugdiss::TokenIndex::knownVocabSize | ( | ) | const |
Definition at line 315 of file tpt_tokenindex.cc.
Referenced by conservative_sort(), main(), and save_vocabs().
id_type ugdiss::TokenIndex::ksize | ( | ) | const |
Definition at line 322 of file tpt_tokenindex.cc.
Referenced by main(), Counter::operator()(), writeTable(), and writeTableHeader().
void ugdiss::TokenIndex::open | ( | std::string | fname, | |
std::string | unkToken = "UNK" , |
|||
bool | dyna = false | |||
) |
Referenced by main(), IBM1::open(), open_bitext(), and open_vocab().
char const *const ugdiss::TokenIndex::operator[] | ( | id_type | id | ) |
Definition at line 185 of file tpt_tokenindex.cc.
References reverseIndex().
char const *const ugdiss::TokenIndex::operator[] | ( | id_type | id | ) | const |
Definition at line 153 of file tpt_tokenindex.cc.
References reverseIndex().
id_type ugdiss::TokenIndex::operator[] | ( | std::string const & | w | ) | const |
id_type ugdiss::TokenIndex::operator[] | ( | char const * | w | ) | const |
Definition at line 96 of file tpt_tokenindex.cc.
References ugdiss::TokenIndex::CompFunc::base, comp, endIdx, ugdiss::TokenIndex::Entry::id, ugdiss::TokenIndex::Entry::offset, and startIdx.
vector< char const * > ugdiss::TokenIndex::reverseIndex | ( | ) | const |
Definition at line 133 of file tpt_tokenindex.cc.
References ugdiss::TokenIndex::CompFunc::base, comp, endIdx, NULL, and startIdx.
Referenced by iniReverseIndex(), main(), operator[](), and toString().
bool ugdiss::TokenIndex::setDynamic | ( | bool | onoff | ) |
Definition at line 394 of file tpt_tokenindex.cc.
References NULL.
Referenced by main(), open_bitext(), and open_vocab().
void ugdiss::TokenIndex::setUnkLabel | ( | std::string | unk | ) |
std::vector<id_type> ugdiss::TokenIndex::toIdSeq | ( | std::string const & | line | ) | const |
Definition at line 249 of file tpt_tokenindex.cc.
References reverseIndex().
Definition at line 232 of file tpt_tokenindex.cc.
References reverseIndex().
std::string ugdiss::TokenIndex::toString | ( | std::vector< id_type > const & | v | ) | const |
std::string ugdiss::TokenIndex::toString | ( | std::vector< id_type > const & | v | ) |
id_type ugdiss::TokenIndex::totalVocabSize | ( | ) | const |
Definition at line 329 of file tpt_tokenindex.cc.
References tsize().
Referenced by conservative_sort(), ini_cnt_vec(), main(), remap(), and save_vocabs().
id_type ugdiss::TokenIndex::tsize | ( | ) | const |
Definition at line 334 of file tpt_tokenindex.cc.
References NULL.
Referenced by Moses::bitext::snt_adder< L2R_Token< SimpleWordId > >::operator()(), and totalVocabSize().
void ugdiss::TokenIndex::write | ( | std::string | fname | ) |
Entry const* ugdiss::TokenIndex::endIdx |
bio::mapped_file_source ugdiss::TokenIndex::file |