#include <boost/algorithm/string/predicate.hpp>
#include <boost/program_options.hpp>
#include <boost/program_options/options_description.hpp>
#include <boost/program_options/parsers.hpp>
#include <boost/program_options/variables_map.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <vector>
#include <string>
#include <sys/types.h>
#include <sys/wait.h>
#include "ug_conll_record.h"
#include "tpt_tokenindex.h"
#include "ug_mm_ttrack.h"
#include "tpt_pickler.h"
#include "ug_deptree.h"
#include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h"
#include "moses/TranslationModel/UG/mm/ug_im_tsa.h"
Go to the source code of this file.
Functions | |
void | interpret_args (int ac, char *av[]) |
uchar | rangeCheck (int p, int limit) |
id_type | get_id (TokenIndex const &T, string const &w) |
void | open_vocab (TokenIndex &T, string fname) |
void | ini_cnt_vec (TokenIndex const &T, vector< pair< string, size_t > > &v) |
void | write_tokenindex (string fname, TokenIndex &T, vector< id_type > const &n2o) |
void | init (int argc, char *argv[]) |
void | fill_rec (Conll_Record &rec, vector< string > const &w) |
void | log_progress (size_t ctr) |
size_t | process_plain_input (ostream &out, vector< id_type > &s_index) |
size_t | process_tagged_input (ostream &out, vector< id_type > &s_index, vector< id_type > &p_index) |
size_t | numberize () |
void | invert (vector< id_type > const &from, vector< id_type > &to) |
void | conservative_sort (TokenIndex const &V, vector< size_t > const &cnt, vector< id_type > &xmap) |
void | remap () |
void | save_vocabs () |
template<typename Token > | |
void | build_mmTSA (string infile, string outfile) |
bool | build_plaintext_tsas () |
void | build_conll_tsas () |
int | main (int argc, char *argv[]) |
Variables | |
int | with_pfas |
int | with_dcas |
int | with_sfas |
bool | incremental = false |
bool | is_conll = false |
bool | quiet = false |
string | vocabBase |
string | baseName |
string | tmpFile |
string | mttFile |
string | UNK |
TokenIndex | SF |
TokenIndex | LM |
TokenIndex | PS |
TokenIndex | DT |
vector< id_type > | smap |
vector< id_type > | lmap |
vector< id_type > | pmap |
vector< id_type > | dmap |
void build_conll_tsas | ( | ) |
void build_mmTSA | ( | string | infile, | |
string | outfile | |||
) | [inline] |
Definition at line 363 of file mtt-build.cc.
References NULL, quiet, sapt::imTSA< TOKEN >::save_as_mm_tsa(), and T.
bool build_plaintext_tsas | ( | ) |
void conservative_sort | ( | TokenIndex const & | V, | |
vector< size_t > const & | cnt, | |||
vector< id_type > & | xmap | |||
) |
Definition at line 276 of file mtt-build.cc.
References sapt::TokenIndex::knownVocabSize(), sort(), sorter, and sapt::TokenIndex::totalVocabSize().
Referenced by remap().
void fill_rec | ( | Conll_Record & | rec, | |
vector< string > const & | w | |||
) |
Definition at line 131 of file mtt-build.cc.
References sapt::Conll_Record::dtype, get_id(), sapt::Conll_Record::lemma, sapt::Conll_Record::majpos, sapt::Conll_Record::minpos, sapt::Conll_Record::parent, rangeCheck(), and sapt::Conll_Record::sform.
Referenced by process_tagged_input().
id_type get_id | ( | TokenIndex const & | T, | |
string const & | w | |||
) |
Definition at line 64 of file mtt-build.cc.
References UNK.
Referenced by fill_rec(), Moses::PhraseDictionaryMultiModel::GetPhraseCache(), Moses::PhraseDictionaryGroup::GetPhraseCache(), Moses::PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector(), process_plain_input(), and Moses::PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector().
void ini_cnt_vec | ( | TokenIndex const & | T, | |
vector< pair< string, size_t > > & | v | |||
) |
Definition at line 91 of file mtt-build.cc.
References sapt::TokenIndex::totalVocabSize().
void init | ( | int | argc, | |
char * | argv[] | |||
) |
Definition at line 118 of file mtt-build.cc.
Referenced by sapt::BitSetCache< TSA >::BitSetCache(), BOOST_AUTO_TEST_CASE(), sapt::DocumentBias::init_from_json(), Moses::InputFileStream::InputFileStream(), inputfilestream::inputfilestream(), Moses::SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCountFile(), main(), Moses::InputFileStream::Open(), sapt::PScoreProvenance< Token >::PScoreProvenance(), sapt::PScoreRareness< Token >::PScoreRareness(), and MosesTuning::Viterbi().
void interpret_args | ( | int | ac, | |
char * | av[] | |||
) |
Definition at line 26 of file mam2symal.cc.
void invert | ( | vector< id_type > const & | from, | |
vector< id_type > & | to | |||
) |
Definition at line 266 of file mtt-build.cc.
Referenced by main(), and remap().
void log_progress | ( | size_t | ctr | ) |
Definition at line 155 of file mtt-build.cc.
Referenced by process_plain_input(), and process_tagged_input().
int main | ( | int | argc, | |
char * | argv[] | |||
) |
Definition at line 416 of file mtt-build.cc.
References build_conll_tsas(), build_plaintext_tsas(), init(), is_conll, sapt::TokenIndex::knownVocabSize(), mttFile, numberize(), quiet, remap(), save_vocabs(), tmpFile, and sapt::TokenIndex::totalVocabSize().
size_t numberize | ( | ) |
Definition at line 224 of file mtt-build.cc.
References index, is_conll, tpt::numwrite(), process_plain_input(), process_tagged_input(), quiet, and tmpFile.
Referenced by main().
void open_vocab | ( | TokenIndex & | T, | |
string | fname | |||
) |
Definition at line 77 of file mtt-build.cc.
References incremental, sapt::TokenIndex::open(), sapt::TokenIndex::setDynamic(), sapt::TokenIndex::setUnkLabel(), and UNK.
Referenced by init().
size_t process_plain_input | ( | ostream & | out, | |
vector< id_type > & | s_index | |||
) |
Definition at line 170 of file mtt-build.cc.
References get_id(), log_progress(), tpt::numwrite(), and quiet.
Referenced by numberize().
size_t process_tagged_input | ( | ostream & | out, | |
vector< id_type > & | s_index, | |||
vector< id_type > & | p_index | |||
) |
Definition at line 190 of file mtt-build.cc.
References fill_rec(), log_progress(), quiet, and starts_with().
Referenced by numberize().
uchar rangeCheck | ( | int | p, | |
int | limit | |||
) | [inline] |
Definition at line 61 of file mtt-build.cc.
Referenced by fill_rec().
void remap | ( | ) |
Definition at line 288 of file mtt-build.cc.
References conservative_sort(), dmap, invert(), is_conll, lmap, mtt, tpt::numread(), pmap, quiet, sform, smap, tmpFile, and sapt::TokenIndex::totalVocabSize().
Referenced by main().
void save_vocabs | ( | ) |
Definition at line 343 of file mtt-build.cc.
References baseName, dmap, is_conll, sapt::TokenIndex::knownVocabSize(), lmap, pmap, smap, sapt::TokenIndex::totalVocabSize(), and write_tokenindex().
Referenced by main().
void write_tokenindex | ( | string | fname, | |
TokenIndex & | T, | |||
vector< id_type > const & | n2o | |||
) |
Definition at line 102 of file mtt-build.cc.
References sapt::TokenIndex::close(), quiet, sort(), UNK, and sapt::write_tokenindex_to_disk().
Referenced by save_vocabs().
string baseName |
Definition at line 48 of file mtt-build.cc.
Referenced by build_conll_tsas(), build_plaintext_tsas(), and save_vocabs().
vector<id_type> dmap |
Definition at line 57 of file mtt-build.cc.
bool incremental = false |
bool is_conll = false |
Definition at line 44 of file mtt-build.cc.
Referenced by init(), main(), numberize(), remap(), and save_vocabs().
vector<id_type> lmap |
string mttFile |
vector<id_type> pmap |
Definition at line 56 of file mtt-build.cc.
bool quiet = false |
Definition at line 45 of file mtt-build.cc.
Referenced by build_mmTSA(), main(), numberize(), process_plain_input(), process_tagged_input(), remap(), and write_tokenindex().
Definition at line 54 of file mtt-build.cc.
vector<id_type> smap |
string tmpFile |
Definition at line 49 of file mtt-build.cc.
Referenced by build_conll_tsas(), build_plaintext_tsas(), main(), numberize(), and remap().
string UNK |
Definition at line 52 of file mtt-build.cc.
Referenced by get_id(), open_vocab(), and write_tokenindex().
string vocabBase |
int with_dcas |
int with_pfas |
Definition at line 39 of file mtt-build.cc.
Referenced by build_conll_tsas(), and build_plaintext_tsas().
int with_sfas |
Definition at line 41 of file mtt-build.cc.
Referenced by build_conll_tsas(), and build_plaintext_tsas().