MosesTraining Namespace Reference


Namespaces

namespace  Syntax

Classes

class  AlignmentElement
class  AlignmentPhrase
class  Domain
class  DomainFeature
class  SubsetDomainFeature
class  SparseSubsetDomainFeature
class  IndicatorDomainFeature
class  SparseIndicatorDomainFeature
class  RatioDomainFeature
class  SparseRatioDomainFeature
class  WordCount
class  Vocab
class  ExtractLex
class  ExtractTask
class  ExtractedRule
class  ExtractionPhrasePair
class  Hole
class  HoleSourceOrderer
class  HoleCollection
class  InternalStructFeature
class  InternalStructFeatureDense
class  InternalStructFeatureSparse
class  PhraseExtractionOptions
class  PhraseOrientation
class  PropertiesConsolidator
class  RuleExist
struct  RuleExtractionOptions
class  LexicalTable
struct  MaybeLog
class  ScoreFeatureArgumentException
struct  ScoreFeatureContext
class  ScoreFeature
class  ScoreFeatureManager
class  SentenceAlignment
class  SentenceAlignmentWithSyntax
class  PhraseAlignment
struct  SyntaxNode
class  SyntaxNodeCollection
class  Vocabulary
class  PhraseTable
class  TTable
class  DTable
class  XmlException

Typedefs

typedef std::vector< std::pair
< int, int > > 
Alignment
typedef pair< int, int > HPhraseVertex
typedef pair< HPhraseVertex,
HPhraseVertex
HPhrase
typedef vector< HPhraseHPhraseVector
typedef map< int, set< int > > HSentenceVertices
typedef std::vector< std::set
< size_t > > 
ALIGNMENT
typedef std::map< int,
std::set< int > > 
HSenteceVertices
typedef std::list< HoleHoleList
typedef boost::shared_ptr
< ScoreFeature
ScoreFeaturePtr
typedef Syntax::Tree< SyntaxNodeSyntaxTree
typedef std::string WORD
typedef unsigned int WORD_ID
typedef std::vector< WORD_IDPHRASE
typedef unsigned int PHRASE_ID
typedef std::vector< std::pair
< PHRASE_ID, double > > 
PHRASEPROBVEC

Enumerations

enum  REO_MODEL_TYPE { REO_MSD, REO_MSLR, REO_MONO }
enum  REO_POS {
  LEFT, RIGHT, DLEFT, DRIGHT,
  UNKNOWN
}

Functions

void ReadAlignment (const std::string &s, Alignment &a)
void FlipAlignment (Alignment &a)
std::ostream & operator<< (std::ostream &out, const WordCount &obj)
REO_POS getOrientWordModel (SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool, int, int, int, int, int, int, int, bool(*)(int, int), bool(*)(int, int))
REO_POS getOrientPhraseModel (SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool, int, int, int, int, int, int, int, bool(*)(int, int), bool(*)(int, int), const HSentenceVertices &, const HSentenceVertices &)
REO_POS getOrientHierModel (SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool, int, int, int, int, int, int, int, bool(*)(int, int), bool(*)(int, int), const HSentenceVertices &, const HSentenceVertices &, const HSentenceVertices &, const HSentenceVertices &, REO_POS)
void insertVertex (HSentenceVertices &, int, int)
void insertPhraseVertices (HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, int, int, int, int)
string getOrientString (REO_POS, REO_MODEL_TYPE)
bool ge (int, int)
bool le (int, int)
bool lt (int, int)
bool isAligned (SentenceAlignmentWithSyntax &, int, int)
std::vector< float > orientationClassPriorsL2R (4, 0)
std::vector< float > orientationClassPriorsR2L (4, 0)
bool isNonTerminal (const std::string &word)
void addBoundaryWords (vector< string > &phrase)
std::vector< std::string > Tokenize (const std::string &str, const std::string &delimiters=" \t")
std::string Trim (const std::string &str, const std::string dropChars=" \t\n\r")
string ParseXmlTagAttribute (const string &tag, const string &attributeName)
void ParseXmlTagAttributes (const std::string &s, std::map< std::string, std::string > &attributes)
string TrimXml (const string &str)
bool isXmlTag (const string &tag)
string unescape (const string &str)
vector< string > TokenizeXml (const string &str)
bool ProcessAndStripXMLTags (string &line, SyntaxNodeCollection &nodeCollection, set< string > &labelCollection, map< string, int > &topLabelCollection, bool unescapeSpecialChars)
std::string ParseXmlTagAttribute (const std::string &tag, const std::string &attributeName)
std::string TrimXml (const std::string &str)
bool isXmlTag (const std::string &tag)
std::vector< std::string > TokenizeXml (const std::string &str)
bool ProcessAndStripXMLTags (std::string &line, SyntaxNodeCollection &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection, bool unescape=true)
std::string unescape (const std::string &str)

Variables

int sentenceOffset = 0
Vocabulary vcbT
Vocabulary vcbS
bool hierarchicalFlag = false
LexicalTable lexTable
bool inverseFlag = false
bool pcfgFlag = false
bool phraseOrientationFlag = false
bool treeFragmentsFlag = false
bool partsOfSpeechFlag = false
bool sourceSyntaxLabelsFlag = false
bool sourceSyntaxLabelCountsLHSFlag = false
bool targetSyntacticPreferencesFlag = false
bool unpairedExtractFormatFlag = false
bool conditionOnTargetLhsFlag = false
bool wordAlignmentFlag = true
bool goodTuringFlag = false
bool kneserNeyFlag = false
bool logProbFlag = false
int negLogProb = 1
bool lexFlag = true
bool unalignedFlag = false
bool unalignedFWFlag = false
bool crossedNonTerm = false
bool spanLength = false
bool ruleLength = false
bool nonTermContext = false
bool nonTermContextTarget = false
bool targetConstituentBoundariesFlag = false
int countOfCounts [COC_MAX+1]
int totalDistinct = 0
float minCount = 0
float minCountHierarchical = 0
bool phraseOrientationPriorsFlag = false
boost::unordered_map
< std::string, float > 
sourceLHSCounts
boost::unordered_map
< std::string,
boost::unordered_map
< std::string, float > * > 
targetLHSAndSourceLHSJointCounts
std::set< std::string > sourceLabelSet
std::map< std::string, size_t > sourceLabels
std::vector< std::string > sourceLabelsByIndex
std::set< std::string > partsOfSpeechSet
boost::unordered_map
< std::string, float > 
targetSyntacticPreferencesLHSCounts
boost::unordered_map
< std::string,
boost::unordered_map
< std::string, float > * > 
ruleTargetLHSAndTargetSyntacticPreferencesLHSJointCounts
std::set< std::string > targetSyntacticPreferencesLabelSet
std::map< std::string, size_t > targetSyntacticPreferencesLabels
std::vector< std::string > targetSyntacticPreferencesLabelsByIndex

Detailed Description

This contains extra features that can be added to the scorer. To add a new feature: 1. Implement a subclass of ScoreFeature 2. Updated ScoreFeatureManager.configure() to configure your feature, and usage() to display usage info. 3. Write unit tests (see ScoreFeatureTest.cpp) and regression tests


Typedef Documentation

typedef std::vector< std::set<size_t> > MosesTraining::ALIGNMENT

Definition at line 32 of file ExtractionPhrasePair.h.

typedef std::vector<std::pair<int, int> > MosesTraining::Alignment

Definition at line 29 of file Alignment.h.

typedef std::list<Hole> MosesTraining::HoleList

Definition at line 103 of file Hole.h.

Definition at line 34 of file extract-main.cpp.

typedef std::vector< HPhrase > MosesTraining::HPhraseVector

Definition at line 37 of file extract-main.cpp.

typedef std::pair< int, int > MosesTraining::HPhraseVertex

Definition at line 30 of file extract-main.cpp.

typedef std::map<int, std::set<int> > MosesTraining::HSenteceVertices

Definition at line 33 of file hierarchical.h.

typedef std::map< int, std::set< int > > MosesTraining::HSentenceVertices

Definition at line 41 of file extract-main.cpp.

typedef std::vector< WORD_ID > MosesTraining::PHRASE

Definition at line 33 of file tables-core.h.

typedef unsigned int MosesTraining::PHRASE_ID

Definition at line 34 of file tables-core.h.

typedef std::vector< std::pair< PHRASE_ID, double > > MosesTraining::PHRASEPROBVEC

Definition at line 49 of file tables-core.h.

typedef boost::shared_ptr<ScoreFeature> MosesTraining::ScoreFeaturePtr

Definition at line 102 of file ScoreFeature.h.

Definition at line 10 of file SyntaxTree.h.

typedef std::string MosesTraining::WORD

Definition at line 18 of file tables-core.h.

typedef unsigned int MosesTraining::WORD_ID

Definition at line 19 of file tables-core.h.


Enumeration Type Documentation

Enumerator:
REO_MSD 
REO_MSLR 
REO_MONO 

Definition at line 27 of file PhraseExtractionOptions.h.

Enumerator:
LEFT 
RIGHT 
DLEFT 
DRIGHT 
UNKNOWN 

Definition at line 28 of file PhraseExtractionOptions.h.


Function Documentation

void MosesTraining::addBoundaryWords ( vector< string > &  phrase  ) 

Definition at line 36 of file SentenceAlignment.cpp.

Referenced by MosesTraining::SentenceAlignment::processSourceSentence(), and MosesTraining::SentenceAlignment::processTargetSentence().

Here is the caller graph for this function:

void MosesTraining::FlipAlignment ( Alignment a  ) 

Definition at line 63 of file Alignment.cpp.

References swap().

Referenced by MosesTraining::Syntax::GHKM::ExtractGHKM::Main().

Here is the call graph for this function:

Here is the caller graph for this function:

bool MosesTraining::ge ( int  first,
int  second 
)

Definition at line 685 of file extract-main.cpp.

REO_POS MosesTraining::getOrientHierModel ( SentenceAlignmentWithSyntax sentence,
REO_MODEL_TYPE  modelType,
bool  connectedLeftTop,
bool  connectedRightTop,
int  startF,
int  endF,
int  startE,
int  endE,
int  countF,
int  zero,
int  unit,
bool(*)(int, int)  ge,
bool(*)(int, int)  lt,
const HSentenceVertices &  inBottomRight,
const HSentenceVertices &  inBottomLeft,
const HSentenceVertices &  outBottomRight,
const HSentenceVertices &  outBottomLeft,
REO_POS  phraseOrient 
)

Definition at line 617 of file extract-main.cpp.

References DLEFT, DRIGHT, LEFT, REO_MONO, REO_MSD, RIGHT, and UNKNOWN.

REO_POS MosesTraining::getOrientPhraseModel ( SentenceAlignmentWithSyntax sentence,
REO_MODEL_TYPE  modelType,
bool  connectedLeftTop,
bool  connectedRightTop,
int  startF,
int  endF,
int  startE,
int  endE,
int  countF,
int  zero,
int  unit,
bool(*)(int, int)  ge,
bool(*)(int, int)  lt,
const HSentenceVertices &  inBottomRight,
const HSentenceVertices &  inBottomLeft 
)

Definition at line 581 of file extract-main.cpp.

References DLEFT, DRIGHT, LEFT, REO_MONO, REO_MSD, RIGHT, and UNKNOWN.

string MosesTraining::getOrientString ( REO_POS  orient,
REO_MODEL_TYPE  modelType 
)

Definition at line 724 of file extract-main.cpp.

References DLEFT, DRIGHT, LEFT, REO_MONO, REO_MSD, REO_MSLR, RIGHT, and UNKNOWN.

REO_POS MosesTraining::getOrientWordModel ( SentenceAlignmentWithSyntax sentence,
REO_MODEL_TYPE  modelType,
bool  connectedLeftTop,
bool  connectedRightTop,
int  startF,
int  endF,
int  startE,
int  endE,
int  countF,
int  zero,
int  unit,
bool(*)(int, int)  ge,
bool(*)(int, int)  lt 
)

Definition at line 555 of file extract-main.cpp.

References DLEFT, DRIGHT, isAligned(), LEFT, REO_MONO, REO_MSD, RIGHT, and UNKNOWN.

Here is the call graph for this function:

void MosesTraining::insertPhraseVertices ( HSentenceVertices &  topLeft,
HSentenceVertices &  topRight,
HSentenceVertices &  bottomLeft,
HSentenceVertices &  bottomRight,
int  startF,
int  startE,
int  endF,
int  endE 
)

Definition at line 710 of file extract-main.cpp.

References insertVertex().

Here is the call graph for this function:

void MosesTraining::insertVertex ( HSentenceVertices &  corners,
int  x,
int  y 
)

Definition at line 700 of file extract-main.cpp.

Referenced by insertPhraseVertices().

Here is the caller graph for this function:

bool MosesTraining::isAligned ( SentenceAlignmentWithSyntax sentence,
int  fi,
int  ei 
)

Definition at line 669 of file extract-main.cpp.

References MosesTraining::SentenceAlignment::alignedToT, MosesTraining::SentenceAlignment::source, and MosesTraining::SentenceAlignment::target.

Referenced by getOrientWordModel().

Here is the caller graph for this function:

bool MosesTraining::isNonTerminal ( const std::string &  word  )  [inline]

Definition at line 42 of file score.h.

Referenced by MosesTraining::ExtractionPhrasePair::MatchesAlignment(), printSourcePhrase(), and printTargetPhrase().

Here is the caller graph for this function:

bool MosesTraining::isXmlTag ( const std::string &  tag  ) 

bool MosesTraining::isXmlTag ( const string &  tag  ) 

Check if the token is an XML tag, i.e. starts with "<"

Parameters:
tag token to be checked

Definition at line 141 of file XmlTree.cpp.

Referenced by ProcessAndStripXMLTags().

Here is the caller graph for this function:

bool MosesTraining::le ( int  first,
int  second 
)

Definition at line 690 of file extract-main.cpp.

bool MosesTraining::lt ( int  first,
int  second 
)

Definition at line 695 of file extract-main.cpp.

std::ostream& MosesTraining::operator<< ( std::ostream &  out,
const WordCount obj 
)

Definition at line 206 of file extract-lex-main.cpp.

References MosesTraining::WordCount::GetCount().

Here is the call graph for this function:

std::vector<float> MosesTraining::orientationClassPriorsL2R ( ,
 
)

Referenced by main(), and outputPhrasePair().

Here is the caller graph for this function:

std::vector<float> MosesTraining::orientationClassPriorsR2L ( ,
 
)

Referenced by main(), and outputPhrasePair().

Here is the caller graph for this function:

std::string MosesTraining::ParseXmlTagAttribute ( const std::string &  tag,
const std::string &  attributeName 
)

string MosesTraining::ParseXmlTagAttribute ( const string &  tag,
const string &  attributeName 
)

Definition at line 64 of file XmlTree.cpp.

Referenced by ProcessAndStripXMLTags().

Here is the caller graph for this function:

void MosesTraining::ParseXmlTagAttributes ( const std::string &  s,
std::map< std::string, std::string > &  attributes 
)

Definition at line 85 of file XmlTree.cpp.

References begin, and Trim().

Referenced by ProcessAndStripXMLTags().

Here is the call graph for this function:

Here is the caller graph for this function:

bool MosesTraining::ProcessAndStripXMLTags ( std::string &  line,
SyntaxNodeCollection &  tree,
std::set< std::string > &  labelCollection,
std::map< std::string, int > &  topLabelCollection,
bool  unescape = true 
)

bool MosesTraining::ProcessAndStripXMLTags ( string &  line,
SyntaxNodeCollection &  nodeCollection,
set< string > &  labelCollection,
map< string, int > &  topLabelCollection,
bool  unescapeSpecialChars 
)

Process a sentence with XML-style annotation of syntactic nodes.

Parameters:
line[in,out] in: sentence, out: sentence without the XML
nodeCollection[out] the collection of SyntaxNode objects for this sentence
labelCollection[out] label values are inserted into this set
topLabelCollection[out] top labels (key) and their counts (value) are inserted into this map
unescapeSpecialChars flag indicating whether XML special characters should be unescaped

Definition at line 259 of file XmlTree.cpp.

References MosesTraining::SyntaxNodeCollection::AddNode(), MosesTraining::SyntaxNode::attributes, MosesTraining::SyntaxNodeCollection::GetNodes(), isXmlTag(), MosesTraining::SyntaxNode::label, n, ParseXmlTagAttribute(), ParseXmlTagAttributes(), Tokenize(), TokenizeXml(), Trim(), TrimXml(), and unescape().

Referenced by MosesTraining::Syntax::XmlTreeParser::Parse(), MosesTraining::SentenceAlignmentWithSyntax::processSourceSentence(), and MosesTraining::SentenceAlignmentWithSyntax::processTargetSentence().

Here is the call graph for this function:

Here is the caller graph for this function:

void MosesTraining::ReadAlignment ( const std::string &  s,
Alignment a 
)

Definition at line 31 of file Alignment.cpp.

References begin, end, and src.

Referenced by MosesTraining::Syntax::GHKM::ExtractGHKM::Main().

Here is the caller graph for this function:

std::vector<std::string> MosesTraining::Tokenize ( const std::string &  str,
const std::string &  delimiters = " \t" 
) [inline]

Definition at line 36 of file XmlTree.cpp.

Referenced by MosesTraining::LexicalTable::load(), ProcessAndStripXMLTags(), and MosesTraining::ExtractionPhrasePair::UpdateVocabularyFromValueTokens().

Here is the caller graph for this function:

std::vector<std::string> MosesTraining::TokenizeXml ( const std::string &  str  ) 

vector<string> MosesTraining::TokenizeXml ( const string &  str  ) 

Split up the input character string into tokens made up of either XML tags or text. example: this is a test . => (this ), (), ( is a ), (), ( test .)

Parameters:
str input string

Definition at line 209 of file XmlTree.cpp.

Referenced by ProcessAndStripXMLTags().

Here is the caller graph for this function:

std::string MosesTraining::Trim ( const std::string &  str,
const std::string  dropChars = " \t\n\r" 
)

Definition at line 57 of file XmlTree.cpp.

Referenced by ParseXmlTagAttributes(), and ProcessAndStripXMLTags().

Here is the caller graph for this function:

std::string MosesTraining::TrimXml ( const std::string &  str  ) 

string MosesTraining::TrimXml ( const string &  str  ) 

Remove "<" and ">" from XML tag

Parameters:
str xml token to be stripped

Definition at line 121 of file XmlTree.cpp.

Referenced by ProcessAndStripXMLTags().

Here is the caller graph for this function:

std::string MosesTraining::unescape ( const std::string &  str  ) 

string MosesTraining::unescape ( const string &  str  ) 

Unescape XML special characters.

Definition at line 149 of file XmlTree.cpp.

References end, and n.

Referenced by ProcessAndStripXMLTags().

Here is the caller graph for this function:


Variable Documentation

Definition at line 56 of file score-main.cpp.

Referenced by main(), printSourcePhrase(), and printTargetPhrase().

Definition at line 73 of file score-main.cpp.

Definition at line 66 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 58 of file score-main.cpp.

Definition at line 46 of file score-main.cpp.

Referenced by printSourcePhrase(), and printTargetPhrase().

Definition at line 59 of file score-main.cpp.

Definition at line 63 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 45 of file score-main.cpp.

Definition at line 60 of file score-main.cpp.

Definition at line 75 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 76 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 61 of file score-main.cpp.

Referenced by main().

Definition at line 69 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 70 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 51 of file score-main.cpp.

std::set<std::string> MosesTraining::partsOfSpeechSet

Definition at line 85 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 48 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 49 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 77 of file score-main.cpp.

Referenced by main().

Definition at line 68 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

boost::unordered_map<std::string, boost::unordered_map<std::string,float>* > MosesTraining::ruleTargetLHSAndTargetSyntacticPreferencesLHSJointCounts

Definition at line 88 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 68 of file extract-main.cpp.

Referenced by main().

std::map<std::string,size_t> MosesTraining::sourceLabels

std::vector<std::string> MosesTraining::sourceLabelsByIndex

Definition at line 83 of file score-main.cpp.

std::set<std::string> MosesTraining::sourceLabelSet

Definition at line 81 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

boost::unordered_map<std::string,float> MosesTraining::sourceLHSCounts

Definition at line 53 of file score-main.cpp.

Referenced by main().

Definition at line 52 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 67 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 71 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

boost::unordered_map<std::string, boost::unordered_map<std::string,float>* > MosesTraining::targetLHSAndSourceLHSJointCounts

Definition at line 80 of file score-main.cpp.

Referenced by main(), outputPhrasePair(), and writeLeftHandSideLabelCounts().

Definition at line 54 of file score-main.cpp.

Definition at line 90 of file score-main.cpp.

Definition at line 91 of file score-main.cpp.

Definition at line 89 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

boost::unordered_map<std::string,float> MosesTraining::targetSyntacticPreferencesLHSCounts

Definition at line 87 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 74 of file score-main.cpp.

Referenced by outputPhrasePair(), and writeCountOfCounts().

Definition at line 50 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 64 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 65 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().

Definition at line 55 of file score-main.cpp.

Referenced by main(), printSourcePhrase(), and printTargetPhrase().

Definition at line 57 of file score-main.cpp.

Referenced by main(), and outputPhrasePair().


Generated on Thu Jul 6 00:34:05 2017 for Moses by  doxygen 1.5.9