00001 #include <string>
00002 #include <map>
00003 #include "moses/FF/StatefulFeatureFunction.h"
00004 #include "moses/FF/FFState.h"
00005 #include "moses/FF/InternalTree.h"
00006 #include "moses/Word.h"
00007
00008 #include <boost/thread/tss.hpp>
00009 #include <boost/array.hpp>
00010
00011 #ifdef WITH_THREADS
00012 #include <boost/thread/shared_mutex.hpp>
00013 #endif
00014
00015
00016
00017
00018
00019
00020 namespace nplm
00021 {
00022 class neuralTM;
00023 }
00024
00025 namespace Moses
00026 {
00027
00028 namespace rdlm
00029 {
00030
00031
00032
00033
00034 class ThreadLocal
00035 {
00036 public:
00037 std::vector<int> ancestor_heads;
00038 std::vector<int> ancestor_labels;
00039 std::vector<int> ngram;
00040 std::vector<int> heads;
00041 std::vector<int> labels;
00042 std::vector<int> heads_output;
00043 std::vector<int> labels_output;
00044 std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack;
00045 nplm::neuralTM* lm_head;
00046 nplm::neuralTM* lm_label;
00047
00048 ThreadLocal(nplm::neuralTM *lm_head_base_instance_, nplm::neuralTM *lm_label_base_instance_, bool normalizeHeadLM, bool normalizeLabelLM, int cacheSize);
00049 ~ThreadLocal();
00050 };
00051 }
00052
00053 class RDLMState : public TreeState
00054 {
00055 float m_approx_head;
00056 float m_approx_label;
00057 size_t m_hash;
00058 public:
00059 RDLMState(TreePointer tree, float approx_head, float approx_label, size_t hash)
00060 : TreeState(tree)
00061 , m_approx_head(approx_head)
00062 , m_approx_label(approx_label)
00063 , m_hash(hash)
00064 {}
00065
00066 float GetApproximateScoreHead() const {
00067 return m_approx_head;
00068 }
00069
00070 float GetApproximateScoreLabel() const {
00071 return m_approx_label;
00072 }
00073
00074 size_t GetHash() const {
00075 return m_hash;
00076 }
00077
00078 int Compare(const FFState& other) const {
00079 if (m_hash == static_cast<const RDLMState*>(&other)->GetHash()) return 0;
00080 else if (m_hash > static_cast<const RDLMState*>(&other)->GetHash()) return 1;
00081 else return -1;
00082 }
00083 };
00084
00085 class RDLM : public StatefulFeatureFunction
00086 {
00087 typedef std::map<InternalTree*,TreePointer> TreePointerMap;
00088
00089 nplm::neuralTM* lm_head_base_instance_;
00090 nplm::neuralTM* lm_label_base_instance_;
00091
00092 mutable boost::thread_specific_ptr<rdlm::ThreadLocal> thread_objects_backend_;
00093
00094 std::string m_glueSymbolString;
00095 Word dummy_head;
00096 Word m_glueSymbol;
00097 Word m_startSymbol;
00098 Word m_endSymbol;
00099 Word m_endTag;
00100 std::string m_path_head_lm;
00101 std::string m_path_label_lm;
00102 bool m_isPretermBackoff;
00103 size_t m_context_left;
00104 size_t m_context_right;
00105 size_t m_context_up;
00106 bool m_premultiply;
00107 bool m_rerank;
00108 bool m_normalizeHeadLM;
00109 bool m_normalizeLabelLM;
00110 bool m_sharedVocab;
00111 std::string m_debugPath;
00112 int m_binarized;
00113 int m_cacheSize;
00114
00115 size_t offset_up_head;
00116 size_t offset_up_label;
00117
00118 size_t size_head;
00119 size_t size_label;
00120 std::vector<int> static_label_null;
00121 std::vector<int> static_head_null;
00122 int static_dummy_head;
00123 int static_start_head;
00124 int static_start_label;
00125 int static_stop_head;
00126 int static_stop_label;
00127 int static_head_head;
00128 int static_head_label;
00129 int static_root_head;
00130 int static_root_label;
00131
00132 int static_head_label_output;
00133 int static_stop_label_output;
00134 int static_start_label_output;
00135
00136 FactorType m_factorType;
00137
00138 static const int LABEL_INPUT = 0;
00139 static const int LABEL_OUTPUT = 1;
00140 static const int HEAD_INPUT = 2;
00141 static const int HEAD_OUTPUT = 3;
00142 mutable std::vector<int> factor2id_label_input;
00143 mutable std::vector<int> factor2id_label_output;
00144 mutable std::vector<int> factor2id_head_input;
00145 mutable std::vector<int> factor2id_head_output;
00146
00147 #ifdef WITH_THREADS
00148
00149 mutable boost::shared_mutex m_accessLock;
00150 #endif
00151
00152 public:
00153 RDLM(const std::string &line)
00154 : StatefulFeatureFunction(2, line)
00155 , m_glueSymbolString("Q")
00156 , m_isPretermBackoff(true)
00157 , m_context_left(3)
00158 , m_context_right(0)
00159 , m_context_up(2)
00160 , m_premultiply(true)
00161 , m_rerank(false)
00162 , m_normalizeHeadLM(false)
00163 , m_normalizeLabelLM(false)
00164 , m_sharedVocab(false)
00165 , m_binarized(0)
00166 , m_cacheSize(1000000)
00167 , m_factorType(0) {
00168 ReadParameters();
00169 std::vector<FactorType> factors;
00170 factors.push_back(0);
00171 dummy_head.CreateFromString(Output, factors, "<dummy_head>", false);
00172 m_glueSymbol.CreateFromString(Output, factors, m_glueSymbolString, true);
00173 m_startSymbol.CreateFromString(Output, factors, "SSTART", true);
00174 m_endSymbol.CreateFromString(Output, factors, "SEND", true);
00175 m_endTag.CreateFromString(Output, factors, "</s>", false);
00176 }
00177
00178 ~RDLM();
00179
00180 virtual const FFState* EmptyHypothesisState(const InputType &input) const {
00181 return new RDLMState(TreePointer(), 0, 0, 0);
00182 }
00183
00184 void Score(InternalTree* root, const TreePointerMap & back_pointers, boost::array<float,4> &score, size_t &boundary_hash, rdlm::ThreadLocal &thread_objects, int num_virtual = 0, int rescoring_levels = 0) const;
00185 bool GetHead(InternalTree* root, const TreePointerMap & back_pointers, std::pair<int,int> & IDs) const;
00186 void GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & back_pointers, int reached_end, rdlm::ThreadLocal &thread_objects) const;
00187 void GetIDs(const Word & head, const Word & preterminal, std::pair<int,int> & IDs) const;
00188 int Factor2ID(const Factor * const factor, int model_type) const;
00189 void ScoreFile(std::string &path);
00190 void PrintInfo(std::vector<int> &ngram, nplm::neuralTM* lm) const;
00191
00192 TreePointerMap AssociateLeafNTs(InternalTree* root, const std::vector<TreePointer> &previous) const;
00193
00194 bool IsUseable(const FactorMask &mask) const {
00195 return true;
00196 }
00197
00198 void SetParameter(const std::string& key, const std::string& value);
00199
00200 FFState* EvaluateWhenApplied(
00201 const Hypothesis& cur_hypo,
00202 const FFState* prev_state,
00203 ScoreComponentCollection* accumulator) const {
00204 UTIL_THROW(util::Exception, "Not implemented");
00205 };
00206 FFState* EvaluateWhenApplied(
00207 const ChartHypothesis& ,
00208 int ,
00209 ScoreComponentCollection* accumulator) const;
00210
00211 void Load(AllOptions::ptr const& opts);
00212
00213
00214 class UnbinarizedChildren
00215 {
00216 private:
00217 std::vector<TreePointer>::const_iterator iter;
00218 std::vector<TreePointer>::const_iterator _begin;
00219 bool _ended;
00220 InternalTree* current;
00221 const TreePointerMap & back_pointers;
00222 bool binarized;
00223 std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > &stack;
00224
00225 public:
00226 UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary, std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > & persistent_stack):
00227 current(root),
00228 back_pointers(pointers),
00229 binarized(binary),
00230 stack(persistent_stack) {
00231 stack.resize(0);
00232 _ended = current->GetChildren().empty();
00233 iter = current->GetChildren().begin();
00234
00235 while (binarized && !(*iter)->GetLabel().GetString(0).empty() && (*iter)->GetLabel().GetString(0).data()[0] == '^') {
00236 stack.push_back(std::make_pair(current, iter));
00237
00238 if ((*iter)->IsLeafNT()) {
00239 current = back_pointers.find(iter->get())->second.get();
00240 } else {
00241 current = iter->get();
00242 }
00243 iter = current->GetChildren().begin();
00244 }
00245 _begin = iter;
00246 }
00247
00248 std::vector<TreePointer>::const_iterator begin() const {
00249 return _begin;
00250 }
00251 bool ended() const {
00252 return _ended;
00253 }
00254
00255 std::vector<TreePointer>::const_iterator operator++() {
00256 iter++;
00257 if (iter == current->GetChildren().end()) {
00258 while (!stack.empty()) {
00259 std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> & active = stack.back();
00260 current = active.first;
00261 iter = ++active.second;
00262 stack.pop_back();
00263 if (iter != current->GetChildren().end()) {
00264 break;
00265 }
00266 }
00267 if (iter == current->GetChildren().end()) {
00268 _ended = true;
00269 return iter;
00270 }
00271 }
00272
00273 while (binarized && !(*iter)->GetLabel().GetString(0).empty() && (*iter)->GetLabel().GetString(0).data()[0] == '^') {
00274 stack.push_back(std::make_pair(current, iter));
00275
00276 if ((*iter)->IsLeafNT()) {
00277 current = back_pointers.find(iter->get())->second.get();
00278 } else {
00279 current = iter->get();
00280 }
00281 iter = current->GetChildren().begin();
00282 }
00283 return iter;
00284 }
00285 };
00286
00287 };
00288
00289 }