00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <sstream>
00024 #include "memory.h"
00025 #include "Word.h"
00026 #include "TypeDef.h"
00027 #include "StaticData.h"
00028
00029 using namespace std;
00030
00031 namespace Moses
00032 {
00033
00034
00035 int Word::Compare(const Word &targetWord, const Word &sourceWord)
00036 {
00037 if (targetWord.IsNonTerminal() != sourceWord.IsNonTerminal()) {
00038 return targetWord.IsNonTerminal() ? -1 : 1;
00039 }
00040
00041 for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++) {
00042 const Factor *targetFactor = targetWord[factorType]
00043 ,*sourceFactor = sourceWord[factorType];
00044
00045 if (targetFactor == NULL || sourceFactor == NULL)
00046 continue;
00047 if (targetFactor == sourceFactor)
00048 continue;
00049
00050 return (targetFactor<sourceFactor) ? -1 : +1;
00051 }
00052 return 0;
00053
00054 }
00055
00056 void Word::Merge(const Word &sourceWord)
00057 {
00058 for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
00059 const Factor *sourcefactor = sourceWord.m_factorArray[currFactor]
00060 ,*targetFactor = this ->m_factorArray[currFactor];
00061 if (targetFactor == NULL && sourcefactor != NULL) {
00062 m_factorArray[currFactor] = sourcefactor;
00063 }
00064 }
00065 }
00066
00067 std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlank) const
00068 {
00069 stringstream strme;
00070 CHECK(factorType.size() <= MAX_NUM_FACTORS);
00071 const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
00072 bool firstPass = true;
00073 for (unsigned int i = 0 ; i < factorType.size() ; i++) {
00074 const Factor *factor = m_factorArray[factorType[i]];
00075 if (factor != NULL) {
00076 if (firstPass) {
00077 firstPass = false;
00078 } else {
00079 strme << factorDelimiter;
00080 }
00081 strme << factor->GetString();
00082 }
00083 }
00084 if(endWithBlank) strme << " ";
00085 return strme.str();
00086 }
00087
00088 void Word::CreateFromString(FactorDirection direction
00089 , const std::vector<FactorType> &factorOrder
00090 , const std::string &str
00091 , bool isNonTerminal)
00092 {
00093 FactorCollection &factorCollection = FactorCollection::Instance();
00094
00095 vector<string> wordVec;
00096 Tokenize(wordVec, str, "|");
00097 CHECK(wordVec.size() == factorOrder.size());
00098
00099 const Factor *factor;
00100 for (size_t ind = 0; ind < wordVec.size(); ++ind) {
00101 FactorType factorType = factorOrder[ind];
00102 factor = factorCollection.AddFactor(direction, factorType, wordVec[ind]);
00103 m_factorArray[factorType] = factor;
00104 }
00105
00106
00107 m_isNonTerminal = isNonTerminal;
00108 }
00109
00110 void Word::CreateUnknownWord(const Word &sourceWord)
00111 {
00112 FactorCollection &factorCollection = FactorCollection::Instance();
00113
00114 for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
00115 FactorType factorType = static_cast<FactorType>(currFactor);
00116
00117 const Factor *sourceFactor = sourceWord[currFactor];
00118 if (sourceFactor == NULL)
00119 SetFactor(factorType, factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR));
00120 else
00121 SetFactor(factorType, factorCollection.AddFactor(Output, factorType, sourceFactor->GetString()));
00122 }
00123 m_isNonTerminal = sourceWord.IsNonTerminal();
00124 }
00125
00126 TO_STRING_BODY(Word);
00127
00128
00129 ostream& operator<<(ostream& out, const Word& word)
00130 {
00131 stringstream strme;
00132
00133 const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
00134 bool firstPass = true;
00135 for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
00136 FactorType factorType = static_cast<FactorType>(currFactor);
00137 const Factor *factor = word.GetFactor(factorType);
00138 if (factor != NULL) {
00139 if (firstPass) {
00140 firstPass = false;
00141 } else {
00142 strme << factorDelimiter;
00143 }
00144 strme << *factor;
00145 }
00146 }
00147 out << strme.str() << " ";
00148 return out;
00149 }
00150
00151 }
00152