00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <sstream>
00024 #include "memory.h"
00025 #include "Word.h"
00026 #include "TypeDef.h"
00027 #include "StaticData.h"
00028 #include "util/exception.hh"
00029 #include "util/tokenize_piece.hh"
00030
00031 using namespace std;
00032
00033 namespace Moses
00034 {
00035
00036
00037 int Word::Compare(const Word &targetWord, const Word &sourceWord)
00038 {
00039 if (targetWord.IsNonTerminal() != sourceWord.IsNonTerminal()) {
00040 return targetWord.IsNonTerminal() ? -1 : 1;
00041 }
00042
00043 for (size_t factorType = 0 ; factorType < MAX_NUM_FACTORS ; factorType++) {
00044 const Factor *targetFactor = targetWord[factorType]
00045 ,*sourceFactor = sourceWord[factorType];
00046
00047 if (targetFactor == NULL || sourceFactor == NULL)
00048 continue;
00049 if (targetFactor == sourceFactor)
00050 continue;
00051
00052 return (targetFactor<sourceFactor) ? -1 : +1;
00053 }
00054 return 0;
00055
00056 }
00057
00058 void Word::Merge(const Word &sourceWord)
00059 {
00060 for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
00061 const Factor *sourcefactor = sourceWord.m_factorArray[currFactor]
00062 ,*targetFactor = this ->m_factorArray[currFactor];
00063 if (targetFactor == NULL && sourcefactor != NULL) {
00064 m_factorArray[currFactor] = sourcefactor;
00065 }
00066 }
00067 }
00068
00069 std::string Word::GetString(const vector<FactorType> factorType,bool endWithBlank) const
00070 {
00071 stringstream strme;
00072 CHECK(factorType.size() <= MAX_NUM_FACTORS);
00073 const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
00074 bool firstPass = true;
00075 for (unsigned int i = 0 ; i < factorType.size() ; i++) {
00076 const Factor *factor = m_factorArray[factorType[i]];
00077 if (factor != NULL) {
00078 if (firstPass) {
00079 firstPass = false;
00080 } else {
00081 strme << factorDelimiter;
00082 }
00083 strme << factor->GetString();
00084 }
00085 }
00086 if(endWithBlank) strme << " ";
00087 return strme.str();
00088 }
00089
00090 StringPiece Word::GetString(FactorType factorType) const
00091 {
00092 return m_factorArray[factorType]->GetString();
00093 }
00094
00095 class StrayFactorException : public util::Exception {};
00096
00097 void Word::CreateFromString(FactorDirection direction
00098 , const std::vector<FactorType> &factorOrder
00099 , const StringPiece &str
00100 , bool isNonTerminal)
00101 {
00102 FactorCollection &factorCollection = FactorCollection::Instance();
00103
00104 util::TokenIter<util::MultiCharacter> fit(str, StaticData::Instance().GetFactorDelimiter());
00105 for (size_t ind = 0; ind < factorOrder.size() && fit; ++ind, ++fit) {
00106 m_factorArray[factorOrder[ind]] = factorCollection.AddFactor(*fit);
00107 }
00108 UTIL_THROW_IF(fit, StrayFactorException, "You have configured " << factorOrder.size() << " factors but the word " << str << " contains factor delimiter " << StaticData::Instance().GetFactorDelimiter() << " too many times.");
00109
00110
00111 m_isNonTerminal = isNonTerminal;
00112 }
00113
00114 void Word::CreateUnknownWord(const Word &sourceWord)
00115 {
00116 FactorCollection &factorCollection = FactorCollection::Instance();
00117
00118 for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
00119 FactorType factorType = static_cast<FactorType>(currFactor);
00120
00121 const Factor *sourceFactor = sourceWord[currFactor];
00122 if (sourceFactor == NULL)
00123 SetFactor(factorType, factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR));
00124 else
00125 SetFactor(factorType, factorCollection.AddFactor(Output, factorType, sourceFactor->GetString()));
00126 }
00127 m_isNonTerminal = sourceWord.IsNonTerminal();
00128 }
00129
00130 TO_STRING_BODY(Word);
00131
00132
00133 ostream& operator<<(ostream& out, const Word& word)
00134 {
00135 stringstream strme;
00136
00137 const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
00138 bool firstPass = true;
00139 for (unsigned int currFactor = 0 ; currFactor < MAX_NUM_FACTORS ; currFactor++) {
00140 FactorType factorType = static_cast<FactorType>(currFactor);
00141 const Factor *factor = word.GetFactor(factorType);
00142 if (factor != NULL) {
00143 if (firstPass) {
00144 firstPass = false;
00145 } else {
00146 strme << factorDelimiter;
00147 }
00148 strme << *factor;
00149 }
00150 }
00151 out << strme.str() << " ";
00152 return out;
00153 }
00154
00155 }
00156