00001 #include "SpanLengthPhraseProperty.h"
00002 #include "moses/Util.h"
00003 #include "util/exception.hh"
00004
00005 using namespace std;
00006
00007 namespace Moses
00008 {
00009 SpanLengthPhraseProperty::SpanLengthPhraseProperty()
00010 {
00011 }
00012
00013 void SpanLengthPhraseProperty::ProcessValue(const std::string &value)
00014 {
00015 vector<string> toks;
00016 Tokenize(toks, value);
00017
00018 set< vector<string> > indices;
00019
00020 for (size_t i = 0; i < toks.size(); ++i) {
00021 const string &span = toks[i];
00022
00023
00024 vector<string> toks;
00025 Tokenize<string>(toks, span, ",");
00026 UTIL_THROW_IF2(toks.size() != 1 && toks.size() != 3, "Incorrect format for SpanLength: " << span);
00027
00028 if (toks.size() == 1) {
00029 float count = Scan<float>(toks[0]);
00030 Populate(indices, count);
00031
00032 indices.clear();
00033 } else {
00034 indices.insert(toks);
00035 }
00036 }
00037
00038
00039 CalcTotals(m_source);
00040 CalcTotals(m_target);
00041 }
00042
00043 void SpanLengthPhraseProperty::Populate(const set< vector<string> > &indices, float count)
00044 {
00045 set< vector<string> >::const_iterator iter;
00046 for (iter = indices.begin(); iter != indices.end(); ++iter) {
00047 const vector<string> &toksStr = *iter;
00048 vector<size_t> toks = Scan<size_t>(toksStr);
00049 UTIL_THROW_IF2(toks.size() != 3, "Incorrect format for SpanLength. Size is " << toks.size());
00050
00051 Populate(toks, count);
00052 }
00053 }
00054
00055 void SpanLengthPhraseProperty::Populate(const std::vector<size_t> &toks, float count)
00056 {
00057 size_t ntInd = toks[0];
00058 size_t sourceLength = toks[1];
00059 size_t targetLength = toks[2];
00060 if (ntInd >= m_source.size() ) {
00061 m_source.resize(ntInd + 1);
00062 m_target.resize(ntInd + 1);
00063 }
00064
00065 Map &sourceMap = m_source[ntInd].first;
00066 Map &targetMap = m_target[ntInd].first;
00067 Populate(sourceMap, sourceLength, count);
00068 Populate(targetMap, targetLength, count);
00069 }
00070
00071 void SpanLengthPhraseProperty::Populate(Map &map, size_t span, float count)
00072 {
00073 Map::iterator iter;
00074 iter = map.find(span);
00075 if (iter != map.end()) {
00076 float &value = iter->second;
00077 value += count;
00078 } else {
00079 map[span] = count;
00080 }
00081 }
00082
00083 void SpanLengthPhraseProperty::CalcTotals(Vec &vec)
00084 {
00085 for (size_t i = 0; i < vec.size(); ++i) {
00086 float total = 0;
00087
00088 const Map &map = vec[i].first;
00089 Map::const_iterator iter;
00090 for (iter = map.begin(); iter != map.end(); ++iter) {
00091 float count = iter->second;
00092 total += count;
00093 }
00094
00095 vec[i].second = total;
00096 }
00097 }
00098
00099 float SpanLengthPhraseProperty::GetProb(size_t ntInd, size_t sourceWidth, float smoothing) const
00100 {
00101 float count;
00102
00103 const std::pair<Map, float> &data = m_source[ntInd];
00104 const Map &map = data.first;
00105
00106 if (map.size() == 0) {
00107
00108 return 1.0f;
00109 }
00110
00111 Map::const_iterator iter = map.find(sourceWidth);
00112 if (iter == map.end()) {
00113 count = 0;
00114 } else {
00115 count = iter->second;
00116 }
00117 count += smoothing;
00118
00119 float total = data.second + smoothing * (float) map.size();
00120 float ret = count / total;
00121 return ret;
00122 }
00123
00124 }