#include <ug_bitext.h>
Classes | |
class | agenda |
Public Types | |
typedef TKN | Token |
typedef TSA< Token >::tree_iterator | iter |
typedef std::vector < PhrasePair< Token > > | vec_ppair |
typedef lru_cache::LRU_Cache < uint64_t, vec_ppair > | pplist_cache_t |
typedef TSA< Token > | tsa |
Public Member Functions | |
bool | find_trg_phr_bounds (PhraseExtractionRecord &rec) const |
given the source phrase sid[start:stop] | |
bool | find_trg_phr_bounds (size_t const sid, size_t const start, size_t const stop, size_t &s1, size_t &s2, size_t &e1, size_t &e2, int &po_fwd, int &po_bwd, std::vector< unsigned char > *core_alignment, bitvector *full_alignment, bool const flip) const |
SPTR< pstats > | prep2 (iter const &phrase, int max_sample=-1) const |
SPTR< pstats > | prep2 (ttasksptr const &ttask, iter const &phrase, bool const track_sids, int max_sample=-1) const |
virtual void | open (std::string const base, std::string const L1, std::string const L2)=0 |
SPTR< pstats > | lookup (iter const &phrase, int max_sample=-1) const |
void | prep (iter const &phrase) const |
SPTR< pstats > | lookup (ttasksptr const &ttask, iter const &phrase, int max_sample=-1) const |
void | prep (ttasksptr const &ttask, iter const &phrase, bool const track_sids) const |
void | setDefaultSampleSize (size_t const max_samples) |
size_t | getDefaultSampleSize () const |
std::string | toString (uint64_t pid, int isL2) const |
virtual size_t | revision () const |
SPTR< SentenceBias > | loadSentenceBias (std::string const &fname) const |
SPTR< DocumentBias > | SetupDocumentBias (std::string const &bserver, std::string const &text, std::ostream *log) const |
SPTR< DocumentBias > | SetupDocumentBias (std::map< std::string, float > context_weights, std::ostream *log) const |
void | mark_match (Token const *start, Token const *end, iter const &m, bitvector &check) const |
void | write_yawat_alignment (id_type const sid, iter const *m1, iter const *m2, std::ostream &out) const |
std::string | sid2docname (id_type const sid) const |
std::string | docid2name (id_type const sid) const |
int | docname2docid (std::string const &name) const |
std::vector< id_type > const * | sid2did () const |
int | sid2did (uint32_t sid) const |
Public Attributes | |
SPTR< Ttrack< char > > | Tx |
SPTR< Ttrack< Token > > | T1 |
SPTR< Ttrack< Token > > | T2 |
SPTR< TokenIndex > | V1 |
SPTR< TokenIndex > | V2 |
SPTR< TSA< Token > > | I1 |
SPTR< TSA< Token > > | I2 |
Protected Member Functions | |
Bitext (size_t const max_sample=1000, size_t const xnum_workers=16) | |
Bitext (Ttrack< Token > *const t1, Ttrack< Token > *const t2, Ttrack< char > *const tx, TokenIndex *const v1, TokenIndex *const v2, TSA< Token > *const i1, TSA< Token > *const i2, size_t const max_sample=1000, size_t const xnum_workers=16) | |
Protected Attributes | |
boost::shared_mutex | m_lock |
SPTR< agenda > | ag |
size_t | m_num_workers |
size_t | m_default_sample_size |
size_t | m_pstats_cache_threshold |
SPTR< pstats::cache_t > | m_cache1 |
SPTR< pstats::cache_t > | m_cache2 |
std::vector< std::string > | m_docname |
std::map< std::string, id_type > | m_docname2docid |
SPTR< std::vector< id_type > > | m_sid2docid |
pplist_cache_t | m_pplist_cache1 |
pplist_cache_t | m_pplist_cache2 |
Friends | |
class | BitextSampler |
class | Moses::Mmsapt |
Definition at line 105 of file ug_bitext.h.
typedef TSA<Token>::tree_iterator sapt::Bitext< TKN >::iter |
Definition at line 110 of file ug_bitext.h.
typedef lru_cache::LRU_Cache<uint64_t, vec_ppair> sapt::Bitext< TKN >::pplist_cache_t |
Definition at line 112 of file ug_bitext.h.
typedef TKN sapt::Bitext< TKN >::Token |
Definition at line 109 of file ug_bitext.h.
typedef TSA<Token> sapt::Bitext< TKN >::tsa |
Definition at line 113 of file ug_bitext.h.
typedef std::vector<PhrasePair<Token> > sapt::Bitext< TKN >::vec_ppair |
Definition at line 111 of file ug_bitext.h.
sapt::Bitext< Token >::Bitext | ( | size_t const | max_sample = 1000 , |
|
size_t const | xnum_workers = 16 | |||
) | [inline, protected] |
Definition at line 876 of file ug_bitext.h.
sapt::Bitext< Token >::Bitext | ( | Ttrack< Token > *const | t1, | |
Ttrack< Token > *const | t2, | |||
Ttrack< char > *const | tx, | |||
TokenIndex *const | v1, | |||
TokenIndex *const | v2, | |||
TSA< Token > *const | i1, | |||
TSA< Token > *const | i2, | |||
size_t const | max_sample = 1000 , |
|||
size_t const | xnum_workers = 16 | |||
) | [inline, protected] |
Definition at line 886 of file ug_bitext.h.
std::string sapt::Bitext< Token >::docid2name | ( | id_type const | sid | ) | const [inline] |
Definition at line 782 of file ug_bitext.h.
Referenced by print_evidence_list().
int sapt::Bitext< Token >::docname2docid | ( | std::string const & | name | ) | const [inline] |
Definition at line 771 of file ug_bitext.h.
bool sapt::Bitext< Token >::find_trg_phr_bounds | ( | size_t const | sid, | |
size_t const | start, | |||
size_t const | stop, | |||
size_t & | s1, | |||
size_t & | s2, | |||
size_t & | e1, | |||
size_t & | e2, | |||
int & | po_fwd, | |||
int & | po_bwd, | |||
std::vector< unsigned char > * | core_alignment, | |||
bitvector * | full_alignment, | |||
bool const | flip | |||
) | const [inline] |
Definition at line 936 of file ug_bitext.h.
bool sapt::Bitext< Token >::find_trg_phr_bounds | ( | PhraseExtractionRecord & | rec | ) | const [inline] |
size_t sapt::Bitext< Token >::getDefaultSampleSize | ( | ) | const [inline] |
Definition at line 856 of file ug_bitext.h.
SPTR< SentenceBias > sapt::Bitext< Token >::loadSentenceBias | ( | std::string const & | fname | ) | const [inline] |
Definition at line 823 of file ug_bitext.h.
SPTR< pstats > sapt::Bitext< Token >::lookup | ( | ttasksptr const & | ttask, | |
iter const & | phrase, | |||
int | max_sample = -1 | |||
) | const [inline] |
SPTR<pstats> sapt::Bitext< TKN >::lookup | ( | iter const & | phrase, | |
int | max_sample = -1 | |||
) | const |
void sapt::Bitext< Token >::mark_match | ( | Token const * | start, | |
Token const * | end, | |||
iter const & | m, | |||
bitvector & | check | |||
) | const [inline] |
Definition at line 1193 of file ug_bitext.h.
virtual void sapt::Bitext< TKN >::open | ( | std::string const | base, | |
std::string const | L1, | |||
std::string const | L2 | |||
) | [pure virtual] |
Implemented in sapt::imBitext< TKN >, and sapt::mmBitext< TKN >.
void sapt::Bitext< Token >::prep | ( | ttasksptr const & | ttask, | |
iter const & | phrase, | |||
bool const | track_sids | |||
) | const [inline] |
Definition at line 33 of file ug_bitext_moses.h.
References sapt::Bitext< TKN >::m_default_sample_size, and sapt::Bitext< TKN >::prep2().
void sapt::Bitext< Token >::prep | ( | iter const & | phrase | ) | const [inline] |
Definition at line 1076 of file ug_bitext.h.
SPTR< pstats > sapt::Bitext< Token >::prep2 | ( | ttasksptr const & | ttask, | |
iter const & | phrase, | |||
bool const | track_sids, | |||
int | max_sample = -1 | |||
) | const [inline] |
Definition at line 47 of file ug_bitext_moses.h.
References sapt::TSA_tree_iterator< TKN >::approxOccurrenceCount(), sapt::TSA_tree_iterator< TKN >::getPid(), I1, NULL, sapt::TSA_tree_iterator< TKN >::root, and UTIL_THROW_IF2.
SPTR< pstats > sapt::Bitext< Token >::prep2 | ( | iter const & | phrase, | |
int | max_sample = -1 | |||
) | const [inline] |
Definition at line 1091 of file ug_bitext.h.
Referenced by sapt::Bitext< TKN >::prep().
virtual size_t sapt::Bitext< TKN >::revision | ( | ) | const [inline, virtual] |
void sapt::Bitext< Token >::setDefaultSampleSize | ( | size_t const | max_samples | ) | [inline] |
Definition at line 863 of file ug_bitext.h.
SPTR< DocumentBias > sapt::Bitext< Token >::SetupDocumentBias | ( | std::map< std::string, float > | context_weights, | |
std::ostream * | log | |||
) | const [inline] |
Definition at line 1063 of file ug_bitext.h.
SPTR< DocumentBias > sapt::Bitext< Token >::SetupDocumentBias | ( | std::string const & | bserver, | |
std::string const & | text, | |||
std::ostream * | log | |||
) | const [inline] |
Definition at line 1049 of file ug_bitext.h.
int sapt::Bitext< Token >::sid2did | ( | uint32_t | sid | ) | const [inline] |
Definition at line 812 of file ug_bitext.h.
std::vector< id_type > const * sapt::Bitext< Token >::sid2did | ( | ) | const [inline] |
Definition at line 804 of file ug_bitext.h.
Referenced by main().
std::string sapt::Bitext< Token >::sid2docname | ( | id_type const | sid | ) | const [inline] |
Definition at line 793 of file ug_bitext.h.
Referenced by main().
std::string sapt::Bitext< Token >::toString | ( | uint64_t | pid, | |
int | isL2 | |||
) | const [inline] |
Definition at line 838 of file ug_bitext.h.
void sapt::Bitext< Token >::write_yawat_alignment | ( | id_type const | sid, | |
iter const * | m1, | |||
iter const * | m2, | |||
std::ostream & | out | |||
) | const [inline] |
Definition at line 1223 of file ug_bitext.h.
friend class BitextSampler [friend] |
Definition at line 108 of file ug_bitext.h.
friend class Moses::Mmsapt [friend] |
Definition at line 114 of file ug_bitext.h.
SPTR<agenda> sapt::Bitext< TKN >::ag [mutable, protected] |
Definition at line 118 of file ug_bitext.h.
SPTR<TSA<Token> > sapt::Bitext< TKN >::I1 |
SPTR<TSA<Token> > sapt::Bitext< TKN >::I2 |
SPTR<pstats::cache_t> sapt::Bitext< TKN >::m_cache1 [protected] |
Definition at line 124 of file ug_bitext.h.
SPTR<pstats::cache_t> sapt::Bitext< TKN >::m_cache2 [protected] |
Definition at line 124 of file ug_bitext.h.
size_t sapt::Bitext< TKN >::m_default_sample_size [protected] |
std::vector<std::string> sapt::Bitext< TKN >::m_docname [protected] |
Definition at line 126 of file ug_bitext.h.
std::map<std::string,id_type> sapt::Bitext< TKN >::m_docname2docid [protected] |
Definition at line 127 of file ug_bitext.h.
boost::shared_mutex sapt::Bitext< TKN >::m_lock [mutable, protected] |
Definition at line 116 of file ug_bitext.h.
size_t sapt::Bitext< TKN >::m_num_workers [protected] |
Definition at line 120 of file ug_bitext.h.
pplist_cache_t sapt::Bitext< TKN >::m_pplist_cache1 [mutable, protected] |
Definition at line 130 of file ug_bitext.h.
pplist_cache_t sapt::Bitext< TKN >::m_pplist_cache2 [mutable, protected] |
Definition at line 130 of file ug_bitext.h.
size_t sapt::Bitext< TKN >::m_pstats_cache_threshold [protected] |
Definition at line 123 of file ug_bitext.h.
SPTR<std::vector<id_type> > sapt::Bitext< TKN >::m_sid2docid [protected] |
SPTR<Ttrack<Token> > sapt::Bitext< TKN >::T1 |
Definition at line 135 of file ug_bitext.h.
Referenced by sapt::PScoreLex1< Token >::operator()(), sapt::PScoreLengthRatio< Token >::operator()(), and sapt::Bitext< TKN >::agenda::worker::worker().
SPTR<Ttrack<Token> > sapt::Bitext< TKN >::T2 |
Definition at line 136 of file ug_bitext.h.
Referenced by sapt::PScoreLex1< Token >::operator()(), and sapt::PScoreLengthRatio< Token >::operator()().
SPTR<Ttrack<char> > sapt::Bitext< TKN >::Tx |
Definition at line 134 of file ug_bitext.h.
SPTR<TokenIndex> sapt::Bitext< TKN >::V1 |
Definition at line 137 of file ug_bitext.h.
Referenced by main(), sapt::PScoreLex1< Token >::operator()(), show(), and show_pair().
SPTR<TokenIndex> sapt::Bitext< TKN >::V2 |
Definition at line 138 of file ug_bitext.h.
Referenced by main(), sapt::PScoreLex1< Token >::operator()(), and show_pair().