/disk4/html/www/moses/doxygen/mosesdecoder/moses/TranslationModel/UG/filter-pt.cc File Reference

#include <cstring>
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <fstream>
#include <sstream>
#include <vector>
#include <iostream>
#include <set>
#include <boost/thread/tss.hpp>
#include <boost/thread.hpp>
#include <boost/unordered_map.hpp>
#include <boost/program_options.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/foreach.hpp>
#include <unistd.h>
#include "mm/ug_bitext.h"

Go to the source code of this file.

Classes

class  Cache
struct  SA
struct  PTEntry
struct  PfeComparer
struct  NlogSigThresholder

Typedefs

typedef sapt::L2R_Token
< sapt::SimpleWordId
Token
typedef sapt::mmTtrack< Tokenttrack_t
typedef sapt::mmTSA< Tokentsa_t
typedef sapt::TokenIndex tind_t
typedef size_t TextLenType
typedef boost::shared_ptr
< std::vector< TextLenType > > 
SentIdSet

Functions

void usage ()
std::ostream & operator<< (std::ostream &os, const PTEntry &pp)
void print (int a, int b, int c, int d, float p)
double fisher_exact (int cfe, int ce, int cf)
template<class setType >
void ordered_set_intersect (setType &out, const setType set_1, const setType set_2)
void lookup_phrase (SentIdSet &ids, const std::string &phrase, tsa_t &my_sa, tind_t &my_v, Cache &cache)
void lookup_multiple_phrases (SentIdSet &ids, std::vector< std::string > &phrases, tsa_t &my_sa, tind_t &my_v, const std::string &rule, Cache &cache)
void find_occurrences (SentIdSet &ids, const std::string &rule, tsa_t &my_sa, tind_t &my_v, Cache &cache)
void compute_cooc_stats_and_filter (std::vector< PTEntry * > &options)
void filter_thread (std::istream *in, std::ostream *out, int pfe_index)
int main (int argc, char *argv[])

Variables

const size_t MINIMUM_SIZE_TO_KEEP = 10000
const std::string SEPARATOR = " ||| "
const double ALPHA_PLUS_EPS = -1000.0
const double ALPHA_MINUS_EPS = -2000.0
int pfe_filter_limit = 0
bool print_cooc_counts = false
bool print_neglog_significance = false
double sig_filter_limit = 0
bool pef_filter_only = false
bool hierarchical = false
double p_111 = 0.0
size_t pt_lines = 0
size_t nremoved_sigfilter = 0
size_t nremoved_pfefilter = 0
int num_lines
boost::mutex in_mutex
boost::mutex out_mutex
boost::mutex err_mutex
std::vector< boost::shared_ptr
< SA > > 
e_sas
std::vector< boost::shared_ptr
< SA > > 
f_sas


Typedef Documentation

typedef boost::shared_ptr<std::vector<TextLenType> > SentIdSet

Definition at line 69 of file filter-pt.cc.

typedef size_t TextLenType

Definition at line 67 of file filter-pt.cc.

Definition at line 59 of file filter-pt.cc.

Definition at line 56 of file filter-pt.cc.

Definition at line 58 of file filter-pt.cc.

Definition at line 57 of file filter-pt.cc.


Function Documentation

void compute_cooc_stats_and_filter ( std::vector< PTEntry * > &  options  ) 

Definition at line 379 of file filter-pt.cc.

References e_sas, f_sas, find_occurrences(), fisher_exact(), nremoved_pfefilter, nremoved_sigfilter, ordered_set_intersect(), pef_filter_only, pfe_filter_limit, and sig_filter_limit.

Referenced by filter_thread().

Here is the call graph for this function:

Here is the caller graph for this function:

void filter_thread ( std::istream *  in,
std::ostream *  out,
int  pfe_index 
)

Definition at line 435 of file filter-pt.cc.

References compute_cooc_stats_and_filter(), e_sas, err_mutex, PTEntry::f_phrase, f_sas, in_mutex, nremoved_pfefilter, nremoved_sigfilter, out_mutex, and pt_lines.

Referenced by main().

Here is the call graph for this function:

Here is the caller graph for this function:

void find_occurrences ( SentIdSet ids,
const std::string &  rule,
tsa_t my_sa,
tind_t my_v,
Cache cache 
)

Definition at line 344 of file filter-pt.cc.

References hierarchical, lookup_multiple_phrases(), and lookup_phrase().

Referenced by compute_cooc_stats_and_filter().

Here is the call graph for this function:

Here is the caller graph for this function:

double fisher_exact ( int  cfe,
int  ce,
int  cf 
)

Definition at line 244 of file filter-pt.cc.

References n, and num_lines.

Referenced by compute_cooc_stats_and_filter(), and main().

Here is the caller graph for this function:

void lookup_multiple_phrases ( SentIdSet ids,
std::vector< std::string > &  phrases,
tsa_t my_sa,
tind_t my_v,
const std::string &  rule,
Cache cache 
)

Definition at line 312 of file filter-pt.cc.

References lookup_phrase(), and ordered_set_intersect().

Referenced by find_occurrences().

Here is the call graph for this function:

Here is the caller graph for this function:

void lookup_phrase ( SentIdSet ids,
const std::string &  phrase,
tsa_t my_sa,
tind_t my_v,
Cache cache 
)

Definition at line 280 of file filter-pt.cc.

References sapt::TokenIndex::fillIdSeq(), Cache::get(), I, k, m, MINIMUM_SIZE_TO_KEEP, Cache::put(), sort(), stop, and unique().

Referenced by find_occurrences(), and lookup_multiple_phrases().

Here is the call graph for this function:

Here is the caller graph for this function:

int main ( int  argc,
char *  argv[] 
)

std::ostream& operator<< ( std::ostream &  os,
const PTEntry pp 
)

template<class setType >
void ordered_set_intersect ( setType &  out,
const setType  set_1,
const setType  set_2 
) [inline]

Definition at line 273 of file filter-pt.cc.

Referenced by compute_cooc_stats_and_filter(), and lookup_multiple_phrases().

Here is the caller graph for this function:

void print ( int  a,
int  b,
int  c,
int  d,
float  p 
)

Definition at line 235 of file filter-pt.cc.

void usage (  ) 

Definition at line 135 of file filter-pt.cc.

Referenced by main().

Here is the caller graph for this function:


Variable Documentation

const double ALPHA_MINUS_EPS = -2000.0

Definition at line 40 of file filter-pt.cc.

Referenced by main().

const double ALPHA_PLUS_EPS = -1000.0

Definition at line 39 of file filter-pt.cc.

Referenced by main().

std::vector<boost::shared_ptr<SA> > e_sas

Definition at line 130 of file filter-pt.cc.

Referenced by compute_cooc_stats_and_filter(), filter_thread(), and main().

boost::mutex err_mutex

Definition at line 65 of file filter-pt.cc.

Referenced by filter_thread().

std::vector<boost::shared_ptr<SA> > f_sas

Definition at line 131 of file filter-pt.cc.

Referenced by compute_cooc_stats_and_filter(), filter_thread(), and main().

bool hierarchical = false

Definition at line 49 of file filter-pt.cc.

Referenced by find_occurrences(), and main().

boost::mutex in_mutex

Definition at line 63 of file filter-pt.cc.

Referenced by filter_thread().

const size_t MINIMUM_SIZE_TO_KEEP = 10000

Definition at line 35 of file filter-pt.cc.

Referenced by lookup_phrase().

size_t nremoved_pfefilter = 0

Definition at line 54 of file filter-pt.cc.

Referenced by compute_cooc_stats_and_filter(), filter_thread(), and main().

size_t nremoved_sigfilter = 0

Definition at line 53 of file filter-pt.cc.

Referenced by compute_cooc_stats_and_filter(), filter_thread(), and main().

int num_lines

Definition at line 61 of file filter-pt.cc.

Referenced by fisher_exact(), and main().

boost::mutex out_mutex

Definition at line 64 of file filter-pt.cc.

Referenced by filter_thread().

double p_111 = 0.0

Definition at line 51 of file filter-pt.cc.

Referenced by main().

bool pef_filter_only = false

Definition at line 48 of file filter-pt.cc.

Referenced by compute_cooc_stats_and_filter(), and main().

Definition at line 43 of file filter-pt.cc.

Referenced by compute_cooc_stats_and_filter(), and main().

bool print_cooc_counts = false

Definition at line 44 of file filter-pt.cc.

Referenced by main(), and operator<<().

Definition at line 45 of file filter-pt.cc.

Referenced by main(), and operator<<().

size_t pt_lines = 0

Definition at line 52 of file filter-pt.cc.

Referenced by filter_thread(), and main().

const std::string SEPARATOR = " ||| "

Definition at line 37 of file filter-pt.cc.

Referenced by PTEntry::PTEntry().

double sig_filter_limit = 0

Definition at line 46 of file filter-pt.cc.

Referenced by compute_cooc_stats_and_filter(), and main().


Generated on Thu Jul 6 00:31:33 2017 for Moses by  doxygen 1.5.9