00001
00002 #include "ug_bitext_jstats.h"
00003 namespace sapt
00004 {
00005
00006 uint32_t jstats::rcnt() const { return my_rcnt; }
00007 float jstats::wcnt() const { return my_wcnt; }
00008 float jstats::bcnt() const { return my_bcnt; }
00009 uint32_t jstats::cnt2() const { return my_cnt2; }
00010
00011
00012 bool jstats::valid() { return my_wcnt >= 0; }
00013 void jstats::validate() { if (my_wcnt < 0) my_wcnt *= -1; }
00014 void jstats::invalidate() { if (my_wcnt > 0) my_wcnt *= -1; }
00015
00016 jstats::
00017 jstats()
00018 : my_rcnt(0), my_cnt2(0), my_wcnt(0), my_bcnt(0)
00019 {
00020 for (int i = 0; i <= LRModel::NONE; ++i)
00021 ofwd[i] = obwd[i] = 0;
00022 my_aln.reserve(1);
00023 }
00024
00025 jstats::
00026 jstats(jstats const& other)
00027 {
00028 my_rcnt = other.rcnt();
00029 my_wcnt = other.wcnt();
00030 my_bcnt = other.bcnt();
00031 my_aln = other.aln();
00032 sids = other.sids;
00033 indoc = other.indoc;
00034 for (int i = 0; i <= LRModel::NONE; i++)
00035 {
00036 ofwd[i] = other.ofwd[i];
00037 obwd[i] = other.obwd[i];
00038 }
00039 }
00040
00041 uint32_t
00042 jstats::
00043 dcnt_fwd(PhraseOrientation const idx) const
00044 {
00045 assert(idx <= LRModel::NONE);
00046 return ofwd[idx];
00047 }
00048
00049 uint32_t
00050 jstats::
00051 dcnt_bwd(PhraseOrientation const idx) const
00052 {
00053 assert(idx <= LRModel::NONE);
00054 return obwd[idx];
00055 }
00056
00057 size_t
00058 jstats::
00059 add(float w, float b, std::vector<unsigned char> const& a, uint32_t const cnt2,
00060 uint32_t fwd_orient, uint32_t bwd_orient, int const docid,
00061 uint32_t const sid, bool const track_sid)
00062 {
00063 boost::lock_guard<boost::mutex> lk(this->lock);
00064 my_cnt2 = cnt2;
00065 my_rcnt += 1;
00066 my_wcnt += w;
00067 my_bcnt += b;
00068 if (a.size())
00069 {
00070 size_t i = 0;
00071 while (i < my_aln.size() && my_aln[i].second != a) ++i;
00072 if (i == my_aln.size())
00073 my_aln.push_back(std::pair<size_t,std::vector<unsigned char> >(1,a));
00074 else
00075 my_aln[i].first++;
00076 if (my_aln[i].first > my_aln[i/2].first)
00077 push_heap(my_aln.begin(),my_aln.begin()+i+1);
00078 }
00079 ++ofwd[fwd_orient];
00080 ++obwd[bwd_orient];
00081
00082 if (track_sid)
00083 {
00084 if (!sids)
00085 sids.reset(new std::vector<uint32_t>);
00086 sids->push_back(sid);
00087 }
00088 if (docid >= 0)
00089 {
00090
00091 ++indoc[docid];
00092 }
00093 return my_rcnt;
00094 }
00095
00096 std::vector<std::pair<size_t, std::vector<unsigned char> > > const&
00097 jstats::
00098 aln() const
00099 { return my_aln; }
00100
00101 }