#include <FuzzyMatchWrapper.h>
Public Member Functions | |
FuzzyMatchWrapper (const std::string &source, const std::string &target, const std::string &alignment) | |
std::string | Extract (long translationId, const std::string &dirNameStr) |
Protected Types | |
typedef std::map< WORD_ID, std::vector< int > > | WordIndex |
Protected Member Functions | |
void | load_corpus (const std::string &fileName, std::vector< std::vector< tmmt::WORD_ID > > &corpus) |
void | load_target (const std::string &fileName, std::vector< std::vector< tmmt::SentenceAlignment > > &corpus) |
void | load_alignment (const std::string &fileName, std::vector< std::vector< tmmt::SentenceAlignment > > &corpus) |
void | basic_fuzzy_match (std::vector< std::vector< tmmt::WORD_ID > > source, std::vector< std::vector< tmmt::WORD_ID > > input) |
unsigned int | compute_length (const std::vector< tmmt::WORD_ID > &sentence) |
unsigned int | letter_sed (WORD_ID aIdx, WORD_ID bIdx) |
unsigned int | sed (const std::vector< WORD_ID > &a, const std::vector< WORD_ID > &b, std::string &best_path, bool use_letter_sed) |
void | init_short_matches (WordIndex &wordIndex, long translationId, const std::vector< WORD_ID > &input) |
int | short_match_max_length (int input_length) |
void | add_short_matches (WordIndex &wordIndex, long translationId, std::vector< Match > &match, const std::vector< WORD_ID > &tm, int input_length, int best_cost) |
std::vector< Match > | prune_matches (const std::vector< Match > &match, int best_cost) |
int | parse_matches (std::vector< Match > &match, int input_length, int tm_length, int &best_cost) |
void | create_extract (int sentenceInd, int cost, const std::vector< WORD_ID > &sourceSentence, const std::vector< SentenceAlignment > &targets, const std::string &inputStr, const std::string &path, std::ofstream &outputFile) |
std::string | ExtractTM (WordIndex &wordIndex, long translationId, const std::string &inputPath) |
Vocabulary & | GetVocabulary () |
bool | GetLSEDCache (const std::pair< WORD_ID, WORD_ID > &key, unsigned int &value) const |
void | SetLSEDCache (const std::pair< WORD_ID, WORD_ID > &key, const unsigned int &value) |
Protected Attributes | |
std::vector< std::vector < tmmt::SentenceAlignment > > | targetAndAlignment |
tmmt::SuffixArray * | suffixArray |
int | basic_flag |
int | lsed_flag |
int | refined_flag |
int | length_filter_flag |
int | parse_flag |
int | min_match |
int | multiple_flag |
int | multiple_slack |
int | multiple_max |
std::map< std::pair< WORD_ID, WORD_ID >, unsigned int > | m_lsed |
Definition at line 28 of file FuzzyMatchWrapper.h.
typedef std::map< WORD_ID,std::vector< int > > tmmt::FuzzyMatchWrapper::WordIndex [protected] |
Definition at line 49 of file FuzzyMatchWrapper.h.
tmmt::FuzzyMatchWrapper::FuzzyMatchWrapper | ( | const std::string & | source, | |
const std::string & | target, | |||
const std::string & | alignment | |||
) |
Definition at line 23 of file FuzzyMatchWrapper.cpp.
References load_alignment(), load_target(), suffixArray, and targetAndAlignment.
void tmmt::FuzzyMatchWrapper::add_short_matches | ( | WordIndex & | wordIndex, | |
long | translationId, | |||
std::vector< Match > & | match, | |||
const std::vector< WORD_ID > & | tm, | |||
int | input_length, | |||
int | best_cost | |||
) | [protected] |
void tmmt::FuzzyMatchWrapper::basic_fuzzy_match | ( | std::vector< std::vector< tmmt::WORD_ID > > | source, | |
std::vector< std::vector< tmmt::WORD_ID > > | input | |||
) | [protected] |
brute force method: compare input to all corpus sentences
unsigned int tmmt::FuzzyMatchWrapper::compute_length | ( | const std::vector< tmmt::WORD_ID > & | sentence | ) | [protected] |
utlility function: compute length of sentence in characters (spaces do not count)
void tmmt::FuzzyMatchWrapper::create_extract | ( | int | sentenceInd, | |
int | cost, | |||
const std::vector< WORD_ID > & | sourceSentence, | |||
const std::vector< SentenceAlignment > & | targets, | |||
const std::string & | inputStr, | |||
const std::string & | path, | |||
std::ofstream & | outputFile | |||
) | [protected] |
std::string tmmt::FuzzyMatchWrapper::Extract | ( | long | translationId, | |
const std::string & | dirNameStr | |||
) |
Referenced by Moses::PhraseDictionaryFuzzyMatch::InitializeForInput().
std::string tmmt::FuzzyMatchWrapper::ExtractTM | ( | WordIndex & | wordIndex, | |
long | translationId, | |||
const std::string & | inputPath | |||
) | [protected] |
bool tmmt::FuzzyMatchWrapper::GetLSEDCache | ( | const std::pair< WORD_ID, WORD_ID > & | key, | |
unsigned int & | value | |||
) | const [protected] |
Definition at line 514 of file FuzzyMatchWrapper.cpp.
References m_lsed.
Referenced by letter_sed().
Vocabulary& tmmt::FuzzyMatchWrapper::GetVocabulary | ( | ) | [inline, protected] |
Definition at line 80 of file FuzzyMatchWrapper.h.
References tmmt::SuffixArray::GetVocabulary(), and suffixArray.
Referenced by letter_sed().
void tmmt::FuzzyMatchWrapper::init_short_matches | ( | WordIndex & | wordIndex, | |
long | translationId, | |||
const std::vector< WORD_ID > & | input | |||
) | [protected] |
Definition at line 538 of file FuzzyMatchWrapper.cpp.
References GetLSEDCache(), GetVocabulary(), tmmt::Vocabulary::GetWord(), and SetLSEDCache().
void tmmt::FuzzyMatchWrapper::load_alignment | ( | const std::string & | fileName, | |
std::vector< std::vector< tmmt::SentenceAlignment > > & | corpus | |||
) | [protected] |
void tmmt::FuzzyMatchWrapper::load_corpus | ( | const std::string & | fileName, | |
std::vector< std::vector< tmmt::WORD_ID > > & | corpus | |||
) | [protected] |
void tmmt::FuzzyMatchWrapper::load_target | ( | const std::string & | fileName, | |
std::vector< std::vector< tmmt::SentenceAlignment > > & | corpus | |||
) | [protected] |
int tmmt::FuzzyMatchWrapper::parse_matches | ( | std::vector< Match > & | match, | |
int | input_length, | |||
int | tm_length, | |||
int & | best_cost | |||
) | [protected] |
std::vector< Match > tmmt::FuzzyMatchWrapper::prune_matches | ( | const std::vector< Match > & | match, | |
int | best_cost | |||
) | [protected] |
unsigned int tmmt::FuzzyMatchWrapper::sed | ( | const std::vector< WORD_ID > & | a, | |
const std::vector< WORD_ID > & | b, | |||
std::string & | best_path, | |||
bool | use_letter_sed | |||
) | [protected] |
void tmmt::FuzzyMatchWrapper::SetLSEDCache | ( | const std::pair< WORD_ID, WORD_ID > & | key, | |
const unsigned int & | value | |||
) | [protected] |
Definition at line 528 of file FuzzyMatchWrapper.cpp.
References m_lsed.
Referenced by letter_sed().
int tmmt::FuzzyMatchWrapper::short_match_max_length | ( | int | input_length | ) | [protected] |
int tmmt::FuzzyMatchWrapper::basic_flag [protected] |
Definition at line 39 of file FuzzyMatchWrapper.h.
int tmmt::FuzzyMatchWrapper::length_filter_flag [protected] |
Definition at line 42 of file FuzzyMatchWrapper.h.
int tmmt::FuzzyMatchWrapper::lsed_flag [protected] |
Definition at line 40 of file FuzzyMatchWrapper.h.
std::map< std::pair< WORD_ID, WORD_ID >, unsigned int > tmmt::FuzzyMatchWrapper::m_lsed [protected] |
Definition at line 52 of file FuzzyMatchWrapper.h.
Referenced by GetLSEDCache(), and SetLSEDCache().
int tmmt::FuzzyMatchWrapper::min_match [protected] |
Definition at line 44 of file FuzzyMatchWrapper.h.
int tmmt::FuzzyMatchWrapper::multiple_flag [protected] |
Definition at line 45 of file FuzzyMatchWrapper.h.
int tmmt::FuzzyMatchWrapper::multiple_max [protected] |
Definition at line 47 of file FuzzyMatchWrapper.h.
int tmmt::FuzzyMatchWrapper::multiple_slack [protected] |
Definition at line 46 of file FuzzyMatchWrapper.h.
int tmmt::FuzzyMatchWrapper::parse_flag [protected] |
Definition at line 43 of file FuzzyMatchWrapper.h.
int tmmt::FuzzyMatchWrapper::refined_flag [protected] |
tmmt::SuffixArray* tmmt::FuzzyMatchWrapper::suffixArray [protected] |
Definition at line 38 of file FuzzyMatchWrapper.h.
Referenced by FuzzyMatchWrapper(), and GetVocabulary().
std::vector< std::vector< tmmt::SentenceAlignment > > tmmt::FuzzyMatchWrapper::targetAndAlignment [protected] |