00001 #ifndef moses_WordCoocTable_h 00002 #define moses_WordCoocTable_h 00003 00004 #include "moses/TranslationModel/DynSAInclude/vocab.h" 00005 #include "moses/TranslationModel/DynSAInclude/types.h" 00006 #include "moses/TranslationModel/DynSAInclude/utils.h" 00007 #include "moses/InputFileStream.h" 00008 #include "moses/FactorTypeSet.h" 00009 #include "moses/TargetPhrase.h" 00010 #include <boost/dynamic_bitset.hpp> 00011 #include <map> 00012 00013 namespace Moses 00014 { 00015 00016 #ifndef bitvector 00017 typedef boost::dynamic_bitset<uint64_t> bitvector; 00018 #endif 00019 00020 00025 class WordCoocTable 00026 { 00027 typedef std::map<wordID_t,uint32_t> my_map_t; 00028 std::vector<my_map_t> m_cooc; 00029 std::vector<uint32_t> m_marg1; 00030 std::vector<uint32_t> m_marg2; 00031 public: 00032 WordCoocTable(); 00033 WordCoocTable(wordID_t const VocabSize1, wordID_t const VocabSize2); 00034 uint32_t GetJoint(size_t const a, size_t const b) const; 00035 uint32_t GetMarg1(size_t const x) const; 00036 uint32_t GetMarg2(size_t const x) const; 00037 float pfwd(size_t const a, size_t const b) const; 00038 float pbwd(size_t const a, size_t const b) const; 00039 void 00040 Count(size_t const a, size_t const b); 00041 00042 template<typename idvec, typename alnvec> 00043 void 00044 Count(idvec const& s1, idvec const& s2, alnvec const& aln, 00045 wordID_t const NULL1, wordID_t const NULL2); 00046 00047 }; 00048 00049 template<typename idvec, typename alnvec> 00050 void 00051 WordCoocTable:: 00052 Count(idvec const& s1, idvec const& s2, alnvec const& aln, 00053 wordID_t const NULL1, wordID_t const NULL2) 00054 { 00055 boost::dynamic_bitset<uint64_t> check1(s1.size()), check2(s2.size()); 00056 check1.set(); 00057 check2.set(); 00058 for (size_t i = 0; i < aln.size(); i += 2) { 00059 Count(s1[aln[i]], s2[aln[i+1]]); 00060 check1.reset(aln[i]); 00061 check2.reset(aln[i+1]); 00062 } 00063 for (size_t i = check1.find_first(); i < check1.size(); i = check1.find_next(i)) 00064 Count(s1[i], NULL2); 00065 for (size_t i = check2.find_first(); i < check2.size(); i = check2.find_next(i)) 00066 Count(NULL1, s2[i]); 00067 } 00068 00069 } 00070 #endif