#include "mm/ug_bitext.h"
#include <boost/format.hpp>
#include <unicode/translit.h>
#include <unicode/utypes.h>
#include <unicode/unistr.h>
#include <unicode/uchar.h>
#include <unicode/utf8.h>
#include "moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h"
Go to the source code of this file.
Classes | |
struct | SinglePhrase |
struct | PhrasePair2 |
struct | PhrasePair2::stats_t |
struct | SortByPositionInCorpus |
struct | RowIndexSorter |
struct | ColIndexSorter |
class | npmi_scorer1< Token > |
class | Alnhyp |
struct | ahyp |
struct | AlnPoint |
class | AlnMatrix |
struct | alink |
Namespaces | |
namespace | stats |
Defines | |
#define | smooth 1 |
Typedefs | |
typedef L2R_Token< SimpleWordId > | Token |
typedef mmTtrack< Token > | ttrack_t |
typedef mmTSA< Token > | tsa_t |
typedef vector < Moses::bitext::PhrasePair < Token > > | pplist_t |
typedef pair< ushort, ushort > | span_t |
Functions | |
float | stats::pmi (size_t j, size_t m1, size_t m2, size_t N) |
float | stats::npmi (size_t j, size_t m1, size_t m2, size_t N) |
float | stats::mi (size_t j, size_t m1, size_t m2, size_t N) |
void | getoccs (tsa_t::tree_iterator const &m, vector< ttrack::Position > &occs) |
void | lookup_phrases (vector< id_type > const &snt, TokenIndex &V, ttrack_t const &T, tsa_t const &I, SinglePhrase::cache_t &cache, vector< vector< SPTR< SinglePhrase > > > &dest) |
size_t | lcs (string const a, string const b) |
size_t | mapstring (string const &utf8, UnicodeString &U, vector< int > &c2w, vector< int > *wlen=NULL) |
void | align_letters (UnicodeString const &A, vector< int > const &a2p, UnicodeString const &B, vector< int > const &b2p, vector< vector< int > > &W) |
void | map_back (vector< vector< int > > const &W, vector< vector< int > > &X, vector< uchar > const &aln) |
void | trymatch3 (vector< PhrasePair< Token > > const &tcands, UnicodeString const &T, size_t const tlen, vector< int > const &t2p, TokenIndex const &V2, vector< vector< int > > &X) |
void | minmatch_filter (vector< vector< int > > &X, vector< int > const &len1, vector< int > const &len2) |
void | trymatch2 (TokenIndex &V1, TokenIndex &V2, string const &source, string const &target, vector< PhrasePair< Token > > const *const tcands, vector< vector< int > > &X) |
bool | overlap (span_t const &a, span_t const &b) |
int | main (int argc, char *argv[]) |
Variables | |
TokenIndex | V1 |
TokenIndex | V2 |
boost::shared_ptr< ttrack_t > | T1 |
boost::shared_ptr< ttrack_t > | T2 |
tsa_t | I1 |
tsa_t | I2 |
mmBitext< Token > | BT |
float | lbop_level = .05 |
SinglePhrase::cache_t | cache1 |
SinglePhrase::cache_t | cache2 |
PhrasePair2::stats_t::cache_t | ppcache |
#define smooth 1 |
Definition at line 28 of file try-align2.cc.
Definition at line 19 of file try-align2.cc.
typedef pair<ushort,ushort> span_t |
Definition at line 20 of file try-align2.cc.
typedef L2R_Token<SimpleWordId> Token |
Definition at line 16 of file try-align2.cc.
Definition at line 18 of file try-align2.cc.
Definition at line 17 of file try-align2.cc.
void align_letters | ( | UnicodeString const & | A, | |
vector< int > const & | a2p, | |||
UnicodeString const & | B, | |||
vector< int > const & | b2p, | |||
vector< vector< int > > & | W | |||
) |
Definition at line 315 of file try-align2.cc.
Referenced by trymatch2(), and trymatch3().
void getoccs | ( | tsa_t::tree_iterator const & | m, | |
vector< ttrack::Position > & | occs | |||
) |
Definition at line 168 of file try-align2.cc.
References sapt::TSA_tree_iterator< TKN >::approxOccurrenceCount(), I, sapt::TSA_tree_iterator< TKN >::lower_bound(), sapt::TSA_tree_iterator< TKN >::root, sort(), stop, and sapt::TSA_tree_iterator< TKN >::upper_bound().
size_t lcs | ( | string const | a, | |
string const | b | |||
) |
Definition at line 270 of file try-align2.cc.
void lookup_phrases | ( | vector< id_type > const & | snt, | |
TokenIndex & | V, | |||
ttrack_t const & | T, | |||
tsa_t const & | I, | |||
SinglePhrase::cache_t & | cache, | |||
vector< vector< SPTR< SinglePhrase > > > & | dest | |||
) |
Definition at line 183 of file try-align2.cc.
References sapt::TSA_tree_iterator< TKN >::approxOccurrenceCount(), sapt::TSA_tree_iterator< TKN >::extend(), getoccs(), sapt::TSA_tree_iterator< TKN >::getPid(), k, and m.
int main | ( | int | argc, | |
char * | argv[] | |||
) |
void map_back | ( | vector< vector< int > > const & | W, | |
vector< vector< int > > & | X, | |||
vector< uchar > const & | aln | |||
) |
Definition at line 342 of file try-align2.cc.
References k.
Referenced by trymatch3().
size_t mapstring | ( | string const & | utf8, | |
UnicodeString & | U, | |||
vector< int > & | c2w, | |||
vector< int > * | wlen = NULL | |||
) |
Definition at line 294 of file try-align2.cc.
References k, and stringdist::strip_accents().
Referenced by trymatch2(), and trymatch3().
void minmatch_filter | ( | vector< vector< int > > & | X, | |
vector< int > const & | len1, | |||
vector< int > const & | len2 | |||
) |
Definition at line 386 of file try-align2.cc.
Referenced by trymatch2().
Definition at line 555 of file try-align2.cc.
Referenced by Rule::Prevalidate().
void trymatch2 | ( | TokenIndex & | V1, | |
TokenIndex & | V2, | |||
string const & | source, | |||
string const & | target, | |||
vector< PhrasePair< Token > > const *const | tcands, | |||
vector< vector< int > > & | X | |||
) |
Definition at line 436 of file try-align2.cc.
References align_letters(), mapstring(), minmatch_filter(), T, and trymatch3().
Referenced by main().
void trymatch3 | ( | vector< PhrasePair< Token > > const & | tcands, | |
UnicodeString const & | T, | |||
size_t const | tlen, | |||
vector< int > const & | t2p, | |||
TokenIndex const & | V2, | |||
vector< vector< int > > & | X | |||
) |
Definition at line 357 of file try-align2.cc.
References align_letters(), map_back(), mapstring(), and sapt::toString().
Referenced by trymatch2().
Definition at line 25 of file try-align2.cc.
Referenced by main(), and npmi_scorer1< Token >::operator()().
Definition at line 152 of file try-align2.cc.
Definition at line 152 of file try-align2.cc.
Definition at line 24 of file try-align2.cc.
Definition at line 24 of file try-align2.cc.
float lbop_level = .05 |
Definition at line 27 of file try-align2.cc.
Definition at line 153 of file try-align2.cc.
Definition at line 23 of file try-align2.cc.
Definition at line 23 of file try-align2.cc.
TokenIndex V1 |
Definition at line 22 of file try-align2.cc.
TokenIndex V2 |
Definition at line 22 of file try-align2.cc.