/disk4/html/www/moses/doxygen/mosesdecoder/moses/TranslationModel/UG/mm/mtt-build.cc File Reference

#include <boost/algorithm/string/predicate.hpp>
#include <boost/program_options.hpp>
#include <boost/program_options/options_description.hpp>
#include <boost/program_options/parsers.hpp>
#include <boost/program_options/variables_map.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <vector>
#include <string>
#include <sys/types.h>
#include <sys/wait.h>
#include "ug_conll_record.h"
#include "tpt_tokenindex.h"
#include "ug_mm_ttrack.h"
#include "tpt_pickler.h"
#include "ug_deptree.h"
#include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h"
#include "moses/TranslationModel/UG/mm/ug_im_tsa.h"

Go to the source code of this file.

Functions

void interpret_args (int ac, char *av[])
uchar rangeCheck (int p, int limit)
id_type get_id (TokenIndex const &T, string const &w)
void open_vocab (TokenIndex &T, string fname)
void ini_cnt_vec (TokenIndex const &T, vector< pair< string, size_t > > &v)
void write_tokenindex (string fname, TokenIndex &T, vector< id_type > const &n2o)
void init (int argc, char *argv[])
void fill_rec (Conll_Record &rec, vector< string > const &w)
void log_progress (size_t ctr)
size_t process_plain_input (ostream &out, vector< id_type > &s_index)
size_t process_tagged_input (ostream &out, vector< id_type > &s_index, vector< id_type > &p_index)
size_t numberize ()
void invert (vector< id_type > const &from, vector< id_type > &to)
void conservative_sort (TokenIndex const &V, vector< size_t > const &cnt, vector< id_type > &xmap)
void remap ()
void save_vocabs ()
template<typename Token >
void build_mmTSA (string infile, string outfile)
bool build_plaintext_tsas ()
void build_conll_tsas ()
int main (int argc, char *argv[])

Variables

int with_pfas
int with_dcas
int with_sfas
bool incremental = false
bool is_conll = false
bool quiet = false
string vocabBase
string baseName
string tmpFile
string mttFile
string UNK
TokenIndex SF
TokenIndex LM
TokenIndex PS
TokenIndex DT
vector< id_type > smap
vector< id_type > lmap
vector< id_type > pmap
vector< id_type > dmap


Function Documentation

void build_conll_tsas (  ) 

Definition at line 387 of file mtt-build.cc.

References baseName, mtt, tmpFile, with_dcas, with_pfas, and with_sfas.

Referenced by main().

Here is the caller graph for this function:

template<typename Token >
void build_mmTSA ( string  infile,
string  outfile 
) [inline]

Definition at line 363 of file mtt-build.cc.

References NULL, quiet, sapt::imTSA< TOKEN >::save_as_mm_tsa(), and T.

Here is the call graph for this function:

bool build_plaintext_tsas (  ) 

Definition at line 376 of file mtt-build.cc.

References baseName, tmpFile, with_pfas, and with_sfas.

Referenced by main().

Here is the caller graph for this function:

void conservative_sort ( TokenIndex const &  V,
vector< size_t > const &  cnt,
vector< id_type > &  xmap 
)

Definition at line 276 of file mtt-build.cc.

References sapt::TokenIndex::knownVocabSize(), sort(), sorter, and sapt::TokenIndex::totalVocabSize().

Referenced by remap().

Here is the call graph for this function:

Here is the caller graph for this function:

void fill_rec ( Conll_Record rec,
vector< string > const &  w 
)

Definition at line 131 of file mtt-build.cc.

References sapt::Conll_Record::dtype, get_id(), sapt::Conll_Record::lemma, sapt::Conll_Record::majpos, sapt::Conll_Record::minpos, sapt::Conll_Record::parent, rangeCheck(), and sapt::Conll_Record::sform.

Referenced by process_tagged_input().

Here is the call graph for this function:

Here is the caller graph for this function:

id_type get_id ( TokenIndex const &  T,
string const &  w 
)

void ini_cnt_vec ( TokenIndex const &  T,
vector< pair< string, size_t > > &  v 
)

Definition at line 91 of file mtt-build.cc.

References sapt::TokenIndex::totalVocabSize().

Here is the call graph for this function:

void init ( int  argc,
char *  argv[] 
)

void interpret_args ( int  ac,
char *  av[] 
)

Definition at line 26 of file mam2symal.cc.

void invert ( vector< id_type > const &  from,
vector< id_type > &  to 
)

Definition at line 266 of file mtt-build.cc.

Referenced by main(), and remap().

Here is the caller graph for this function:

void log_progress ( size_t  ctr  ) 

Definition at line 155 of file mtt-build.cc.

Referenced by process_plain_input(), and process_tagged_input().

Here is the caller graph for this function:

int main ( int  argc,
char *  argv[] 
)

size_t numberize (  ) 

Definition at line 224 of file mtt-build.cc.

References index, is_conll, tpt::numwrite(), process_plain_input(), process_tagged_input(), quiet, and tmpFile.

Referenced by main().

Here is the call graph for this function:

Here is the caller graph for this function:

void open_vocab ( TokenIndex T,
string  fname 
)

Definition at line 77 of file mtt-build.cc.

References incremental, sapt::TokenIndex::open(), sapt::TokenIndex::setDynamic(), sapt::TokenIndex::setUnkLabel(), and UNK.

Referenced by init().

Here is the call graph for this function:

Here is the caller graph for this function:

size_t process_plain_input ( ostream &  out,
vector< id_type > &  s_index 
)

Definition at line 170 of file mtt-build.cc.

References get_id(), log_progress(), tpt::numwrite(), and quiet.

Referenced by numberize().

Here is the call graph for this function:

Here is the caller graph for this function:

size_t process_tagged_input ( ostream &  out,
vector< id_type > &  s_index,
vector< id_type > &  p_index 
)

Definition at line 190 of file mtt-build.cc.

References fill_rec(), log_progress(), quiet, and starts_with().

Referenced by numberize().

Here is the call graph for this function:

Here is the caller graph for this function:

uchar rangeCheck ( int  p,
int  limit 
) [inline]

Definition at line 61 of file mtt-build.cc.

Referenced by fill_rec().

Here is the caller graph for this function:

void remap (  ) 

Definition at line 288 of file mtt-build.cc.

References conservative_sort(), dmap, invert(), is_conll, lmap, mtt, tpt::numread(), pmap, quiet, sform, smap, tmpFile, and sapt::TokenIndex::totalVocabSize().

Referenced by main().

Here is the call graph for this function:

Here is the caller graph for this function:

void save_vocabs (  ) 

Definition at line 343 of file mtt-build.cc.

References baseName, dmap, is_conll, sapt::TokenIndex::knownVocabSize(), lmap, pmap, smap, sapt::TokenIndex::totalVocabSize(), and write_tokenindex().

Referenced by main().

Here is the call graph for this function:

Here is the caller graph for this function:

void write_tokenindex ( string  fname,
TokenIndex T,
vector< id_type > const &  n2o 
)

Definition at line 102 of file mtt-build.cc.

References sapt::TokenIndex::close(), quiet, sort(), UNK, and sapt::write_tokenindex_to_disk().

Referenced by save_vocabs().

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

string baseName

Definition at line 48 of file mtt-build.cc.

Referenced by build_conll_tsas(), build_plaintext_tsas(), and save_vocabs().

vector<id_type> dmap

Definition at line 263 of file mtt-build.cc.

Referenced by remap(), and save_vocabs().

Definition at line 57 of file mtt-build.cc.

bool incremental = false

Definition at line 43 of file mtt-build.cc.

Referenced by open_vocab().

bool is_conll = false

Definition at line 44 of file mtt-build.cc.

Referenced by init(), main(), numberize(), remap(), and save_vocabs().

Definition at line 55 of file mtt-build.cc.

Referenced by Moses::LanguageModelDALM::Load().

vector<id_type> lmap

Definition at line 263 of file mtt-build.cc.

Referenced by remap(), and save_vocabs().

string mttFile

Definition at line 49 of file mtt-build.cc.

Referenced by main().

vector<id_type> pmap

Definition at line 263 of file mtt-build.cc.

Referenced by remap(), and save_vocabs().

Definition at line 56 of file mtt-build.cc.

bool quiet = false

Definition at line 54 of file mtt-build.cc.

vector<id_type> smap

Definition at line 263 of file mtt-build.cc.

Referenced by remap(), and save_vocabs().

string tmpFile

Definition at line 49 of file mtt-build.cc.

Referenced by build_conll_tsas(), build_plaintext_tsas(), main(), numberize(), and remap().

string UNK

Definition at line 52 of file mtt-build.cc.

Referenced by get_id(), open_vocab(), and write_tokenindex().

string vocabBase

Definition at line 47 of file mtt-build.cc.

Referenced by init().

int with_dcas

Definition at line 40 of file mtt-build.cc.

Referenced by build_conll_tsas().

int with_pfas

Definition at line 39 of file mtt-build.cc.

Referenced by build_conll_tsas(), and build_plaintext_tsas().

int with_sfas

Definition at line 41 of file mtt-build.cc.

Referenced by build_conll_tsas(), and build_plaintext_tsas().


Generated on Thu Jul 6 00:31:33 2017 for Moses by  doxygen 1.5.9