00001
00002
00003
00004
00005
00006
00007
00008
00009 #include <iostream>
00010 #include "StatisticsBasedScorer.h"
00011
00012 using namespace std;
00013
00014 namespace MosesTuning
00015 {
00016
00017
00018 StatisticsBasedScorer::StatisticsBasedScorer(const string& name, const string& config)
00019 : Scorer(name,config)
00020 {
00021
00022 static string KEY_TYPE = "regtype";
00023 static string KEY_WINDOW = "regwin";
00024 static string KEY_CASE = "case";
00025 static string TYPE_NONE = "none";
00026 static string TYPE_AVERAGE = "average";
00027 static string TYPE_MINIMUM = "min";
00028 static string TRUE = "true";
00029 static string FALSE = "false";
00030
00031 string type = getConfig(KEY_TYPE,TYPE_NONE);
00032 if (type == TYPE_NONE) {
00033 m_regularization_type = NONE;
00034 } else if (type == TYPE_AVERAGE) {
00035 m_regularization_type = AVERAGE;
00036 } else if (type == TYPE_MINIMUM) {
00037 m_regularization_type = MINIMUM;
00038 } else {
00039 throw runtime_error("Unknown scorer regularisation strategy: " + type);
00040 }
00041
00042
00043 const string& window = getConfig(KEY_WINDOW, "0");
00044 m_regularization_window = atoi(window.c_str());
00045
00046
00047 const string& preserve_case = getConfig(KEY_CASE,TRUE);
00048 if (preserve_case == TRUE) {
00049 m_enable_preserve_case = true;
00050 } else if (preserve_case == FALSE) {
00051 m_enable_preserve_case = false;
00052 }
00053
00054 }
00055
00056 void StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t& diffs,
00057 statscores_t& scores) const
00058 {
00059 if (!m_score_data) {
00060 throw runtime_error("Score data not loaded");
00061 }
00062
00063 if (m_score_data->size() == 0) {
00064 throw runtime_error("Score data is empty");
00065 }
00066 if (candidates.size() == 0) {
00067 throw runtime_error("No candidates supplied");
00068 }
00069 int numCounts = m_score_data->get(0,candidates[0]).size();
00070 vector<ScoreStatsType> totals(numCounts);
00071 for (size_t i = 0; i < candidates.size(); ++i) {
00072 ScoreStats stats = m_score_data->get(i,candidates[i]);
00073 if (stats.size() != totals.size()) {
00074 stringstream msg;
00075 msg << "Statistics for (" << "," << candidates[i] << ") have incorrect "
00076 << "number of fields. Found: " << stats.size() << " Expected: "
00077 << totals.size();
00078 throw runtime_error(msg.str());
00079 }
00080 for (size_t k = 0; k < totals.size(); ++k) {
00081 totals[k] += stats.get(k);
00082 }
00083 }
00084 scores.push_back(calculateScore(totals));
00085
00086 candidates_t last_candidates(candidates);
00087
00088 for (size_t i = 0; i < diffs.size(); ++i) {
00089 for (size_t j = 0; j < diffs[i].size(); ++j) {
00090 size_t sid = diffs[i][j].first;
00091 size_t nid = diffs[i][j].second;
00092 size_t last_nid = last_candidates[sid];
00093 for (size_t k = 0; k < totals.size(); ++k) {
00094 int diff = m_score_data->get(sid,nid).get(k)
00095 - m_score_data->get(sid,last_nid).get(k);
00096 totals[k] += diff;
00097 }
00098 last_candidates[sid] = nid;
00099 }
00100 scores.push_back(calculateScore(totals));
00101 }
00102
00103
00104
00105 if (m_regularization_type == NONE || m_regularization_window <= 0) {
00106
00107 return;
00108 }
00109
00110
00111 statscores_t raw_scores(scores);
00112 for (size_t i = 0; i < scores.size(); ++i) {
00113 size_t start = 0;
00114 if (i >= m_regularization_window) {
00115 start = i - m_regularization_window;
00116 }
00117 const size_t end = min(scores.size(), i + m_regularization_window + 1);
00118 if (m_regularization_type == AVERAGE) {
00119 scores[i] = score_average(raw_scores,start,end);
00120 } else {
00121 scores[i] = score_min(raw_scores,start,end);
00122 }
00123 }
00124 }
00125
00126 }
00127