00001 #pragma once
00002
00003 #include <fstream>
00004 #include <string>
00005 #include <vector>
00006
00007 #include <boost/shared_ptr.hpp>
00008
00009 #include "Ngram.h"
00010 #include "Reference.h"
00011 #include "ScopedVector.h"
00012 #include "ScoreData.h"
00013 #include "StatisticsBasedScorer.h"
00014 #include "Types.h"
00015
00016 namespace MosesTuning
00017 {
00018
00019 const size_t kBleuNgramOrder = 4;
00020
00024 class BleuScorer: public StatisticsBasedScorer
00025 {
00026 public:
00027 enum ReferenceLengthType {
00028 AVERAGE,
00029 CLOSEST,
00030 SHORTEST
00031 };
00032
00033 explicit BleuScorer(const std::string& config = "");
00034 ~BleuScorer();
00035
00036 static std::vector<float> ScoreNbestList(const std::string& scoreFile, const std::string& featureFile);
00037
00038 virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
00039 virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
00040 virtual statscore_t calculateScore(const std::vector<ScoreStatsType>& comps) const;
00041 virtual std::size_t NumberOfScores() const {
00042 return 2 * kBleuNgramOrder + 1;
00043 }
00044
00045 void CalcBleuStats(const Reference& ref, const std::string& text, ScoreStats& entry) const;
00046
00047 int CalcReferenceLength(const Reference& ref, std::size_t length) const;
00048
00049 ReferenceLengthType GetReferenceLengthType() const {
00050 return m_ref_length_type;
00051 }
00052
00053 void SetReferenceLengthType(ReferenceLengthType type) {
00054 m_ref_length_type = type;
00055 }
00056
00057 const std::vector<Reference*>& GetReferences() const {
00058 return m_references.get();
00059 }
00060
00061 virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const {
00062 return totals[kBleuNgramOrder*2];
00063 }
00064
00068 size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n, bool is_testing=false) const;
00069
00070 void DumpCounts(std::ostream* os, const NgramCounts& counts) const;
00071
00072
00073 bool OpenReferenceStream(std::istream* is, std::size_t file_id);
00074
00075 void ProcessReferenceLine(const std::string& line, Reference* ref) const;
00076
00077 bool GetNextReferenceFromStreams(std::vector<boost::shared_ptr<std::ifstream> >& referenceStreams, Reference& ref) const;
00078
00079
00080 protected:
00081 ReferenceLengthType m_ref_length_type;
00082
00083
00084 ScopedVector<Reference> m_references;
00085
00086
00087 BleuScorer(const std::string& name, const std::string& config): StatisticsBasedScorer(name,config) {}
00088
00089
00090 BleuScorer(const BleuScorer&);
00091 BleuScorer& operator=(const BleuScorer&);
00092 };
00093
00097 float smoothedSentenceBleu
00098 (const std::vector<float>& stats, float smoothing=1.0, bool smoothBP=false);
00099
00103 float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg);
00104
00105 }
00106