00001 #include <stdexcept>
00002
00003 #include "util/exception.hh"
00004
00005 #include "FeatureFunction.h"
00006 #include "moses/Hypothesis.h"
00007 #include "moses/Manager.h"
00008 #include "moses/TranslationOption.h"
00009 #include "moses/TranslationTask.h"
00010 #include "moses/Util.h"
00011 #include "moses/FF/DistortionScoreProducer.h"
00012
00013 #include <boost/foreach.hpp>
00014
00015 using namespace std;
00016
00017 namespace Moses
00018 {
00019
00020 multiset<string> FeatureFunction::description_counts;
00021
00022 std::vector<FeatureFunction*> FeatureFunction::s_staticColl;
00023
00024 FeatureFunction &FeatureFunction::FindFeatureFunction(const std::string& name)
00025 {
00026 for (size_t i = 0; i < s_staticColl.size(); ++i) {
00027 FeatureFunction &ff = *s_staticColl[i];
00028 if (ff.GetScoreProducerDescription() == name) {
00029 return ff;
00030 }
00031 }
00032
00033 throw "Unknown feature " + name;
00034 }
00035
00036 void FeatureFunction::Destroy()
00037 {
00038 RemoveAllInColl(s_staticColl);
00039 }
00040
00041 void FeatureFunction::SetupAll(TranslationTask const& ttask)
00042 {
00043 BOOST_FOREACH(FeatureFunction* ff, s_staticColl)
00044 ff->Setup(ttask);
00045 }
00046
00047 FeatureFunction::
00048 FeatureFunction(const std::string& line, bool registerNow)
00049 : m_tuneable(true)
00050 , m_requireSortingAfterSourceContext(false)
00051 , m_verbosity(std::numeric_limits<std::size_t>::max())
00052 , m_numScoreComponents(1)
00053 , m_index(0)
00054 {
00055 m_numTuneableComponents = m_numScoreComponents;
00056 ParseLine(line);
00057
00058
00059 }
00060
00061 FeatureFunction::FeatureFunction(size_t numScoreComponents, const std::string& line, bool registerNow)
00062 : m_tuneable(true)
00063 , m_requireSortingAfterSourceContext(false)
00064 , m_verbosity(std::numeric_limits<std::size_t>::max())
00065 , m_numScoreComponents(numScoreComponents)
00066 , m_index(0)
00067 {
00068 m_numTuneableComponents = m_numScoreComponents;
00069 ParseLine(line);
00070
00071
00072 }
00073
00074 void
00075 FeatureFunction::
00076 Register(FeatureFunction* ff)
00077 {
00078 ScoreComponentCollection::RegisterScoreProducer(ff);
00079 s_staticColl.push_back(ff);
00080 }
00081
00082 FeatureFunction::~FeatureFunction() {}
00083
00084 void FeatureFunction::ParseLine(const std::string &line)
00085 {
00086 vector<string> toks = Tokenize(line);
00087 UTIL_THROW_IF2(toks.empty(), "Empty line");
00088
00089 string nameStub = toks[0];
00090
00091 set<string> keys;
00092
00093 for (size_t i = 1; i < toks.size(); ++i) {
00094 vector<string> args = TokenizeFirstOnly(toks[i], "=");
00095 UTIL_THROW_IF2(args.size() != 2,
00096 "Incorrect format for feature function arg: " << toks[i]);
00097
00098 pair<set<string>::iterator,bool> ret = keys.insert(args[0]);
00099 UTIL_THROW_IF2(!ret.second, "Duplicate key in line " << line);
00100
00101 if (args[0] == "num-features") {
00102 m_numScoreComponents = Scan<size_t>(args[1]);
00103 m_numTuneableComponents = m_numScoreComponents;
00104 } else if (args[0] == "name") {
00105 m_description = args[1];
00106 } else {
00107 m_args.push_back(args);
00108 }
00109 }
00110
00111
00112 if (m_description == "") {
00113 size_t index = description_counts.count(nameStub);
00114
00115 string descr = SPrint(nameStub) + SPrint(index);
00116
00117 description_counts.insert(nameStub);
00118 m_description = descr;
00119 }
00120
00121 }
00122
00123 void FeatureFunction::SetParameter(const std::string& key, const std::string& value)
00124 {
00125 if (key == "tuneable") {
00126 m_tuneable = Scan<bool>(value);
00127 } else if (key == "tuneable-components") {
00128 UTIL_THROW_IF2(!m_tuneable, GetScoreProducerDescription()
00129 << ": tuneable-components cannot be set if tuneable=false");
00130 SetTuneableComponents(value);
00131 } else if (key == "require-sorting-after-source-context") {
00132 m_requireSortingAfterSourceContext = Scan<bool>(value);
00133 } else if (key == "verbosity") {
00134 m_verbosity = Scan<size_t>(value);
00135 } else if (key == "filterable") {
00136 } else {
00137 UTIL_THROW2(GetScoreProducerDescription() << ": Unknown argument " << key << "=" << value);
00138 }
00139 }
00140
00141 void FeatureFunction::ReadParameters()
00142 {
00143 while (!m_args.empty()) {
00144 const vector<string> &args = m_args[0];
00145 SetParameter(args[0], args[1]);
00146
00147 m_args.erase(m_args.begin());
00148 }
00149 }
00150
00151 std::vector<float> FeatureFunction::DefaultWeights() const
00152 {
00153 return std::vector<float>(this->m_numScoreComponents,1.0);
00154
00155 }
00156
00157 void FeatureFunction::SetTuneableComponents(const std::string& value)
00158 {
00159 std::vector<std::string> toks = Tokenize(value,",");
00160 UTIL_THROW_IF2(toks.empty(), GetScoreProducerDescription()
00161 << ": Empty tuneable-components");
00162 UTIL_THROW_IF2(toks.size()!=m_numScoreComponents, GetScoreProducerDescription()
00163 << ": tuneable-components value has to be a comma-separated list of "
00164 << m_numScoreComponents << " boolean values");
00165
00166 m_tuneableComponents.resize(m_numScoreComponents);
00167 m_numTuneableComponents = m_numScoreComponents;
00168
00169 for (size_t i = 0; i < toks.size(); ++i) {
00170 m_tuneableComponents[i] = Scan<bool>(toks[i]);
00171 if (!m_tuneableComponents[i]) {
00172 --m_numTuneableComponents;
00173 }
00174 }
00175 }
00176
00177
00178
00179
00180
00181
00182
00183
00184 void
00185 FeatureFunction
00186 ::CleanUpAfterSentenceProcessing(ttasksptr const& ttask)
00187 {
00188 CleanUpAfterSentenceProcessing(*(ttask->GetSource().get()));
00189 }
00190
00191 size_t
00192 FeatureFunction
00193 ::GetIndex() const
00194 {
00195 return m_index;
00196 }
00197
00198
00200
00201 size_t
00202 FeatureFunction
00203 ::SetIndex(size_t const idx)
00204 {
00205 m_index = idx;
00206 return this->GetNumScoreComponents() + idx;
00207 }
00208
00209 }
00210