00001 #include "Optimizer.h"
00002
00003 #include <cmath>
00004 #include "util/check.hh"
00005 #include <vector>
00006 #include <limits>
00007 #include <map>
00008 #include <cfloat>
00009 #include <iostream>
00010 #include <stdint.h>
00011
00012 #include "Point.h"
00013 #include "Util.h"
00014
00015 using namespace std;
00016
00017 static const float MIN_FLOAT = -1.0 * numeric_limits<float>::max();
00018 static const float MAX_FLOAT = numeric_limits<float>::max();
00019
00020 namespace {
00021
00025 inline float intersect(float m1, float b1, float m2, float b2)
00026 {
00027 float isect = (b2 - b1) / (m1 - m2);
00028 if (!isfinite(isect)) {
00029 isect = MAX_FLOAT;
00030 }
00031 return isect;
00032 }
00033
00034 }
00035
00036 namespace MosesTuning
00037 {
00038
00039
00040 Optimizer::Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<bool>& pos, const vector<parameter_t>& start, unsigned int nrandom)
00041 : m_scorer(NULL), m_feature_data(), m_num_random_directions(nrandom), m_positive(pos)
00042 {
00043
00044 Point::m_pdim = Pd;
00045
00046 CHECK(start.size() == Pd);
00047 Point::m_dim = i2O.size();
00048 Point::m_opt_indices = i2O;
00049 if (Point::m_pdim > Point::m_dim) {
00050 for (unsigned int i = 0; i < Point::m_pdim; i++) {
00051 unsigned int j = 0;
00052 while (j < Point::m_dim && i != i2O[j])
00053 j++;
00054
00055
00056
00057 if (j == Point::m_dim)
00058 Point::m_fixed_weights[i] = start[i];
00059 }
00060 }
00061 }
00062
00063 Optimizer::~Optimizer() {}
00064
00065 statscore_t Optimizer::GetStatScore(const Point& param) const
00066 {
00067 vector<unsigned> bests;
00068 Get1bests(param, bests);
00069 statscore_t score = GetStatScore(bests);
00070 return score;
00071 }
00072
00073 map<float,diff_t >::iterator AddThreshold(map<float,diff_t >& thresholdmap, float newt, const pair<unsigned,unsigned>& newdiff)
00074 {
00075 map<float,diff_t>::iterator it = thresholdmap.find(newt);
00076 if (it != thresholdmap.end()) {
00077
00078 if (it->second.back().first == newdiff.first)
00079
00080 it->second.back().second = newdiff.second;
00081 else
00082 it->second.push_back(newdiff);
00083 } else {
00084
00085 pair<map<float,diff_t>::iterator, bool> ins = thresholdmap.insert(threshold(newt, diff_t(1, newdiff)));
00086 CHECK(ins.second);
00087 it = ins.first;
00088 }
00089 return it;
00090 }
00091
00092 statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction, Point& bestpoint) const
00093 {
00094
00095 float min_int = 0.0001;
00096
00097
00098
00099 map<float,diff_t> thresholdmap;
00100 thresholdmap[MIN_FLOAT] = diff_t();
00101 vector<unsigned> first1best;
00102 for (unsigned int S = 0; S < size(); S++) {
00103 map<float,diff_t >::iterator previnserted = thresholdmap.begin();
00104
00105
00106
00107 multimap<float, unsigned> gradient;
00108 vector<float> f0;
00109 f0.resize(m_feature_data->get(S).size());
00110 for (unsigned j = 0; j < m_feature_data->get(S).size(); j++) {
00111
00112 gradient.insert(pair<float, unsigned>(direction * (m_feature_data->get(S,j)), j));
00113
00114 f0[j] = origin * m_feature_data->get(S, j);
00115 }
00116
00117
00118
00119
00120
00121 multimap<float,unsigned>::iterator gradientit = gradient.begin();
00122 multimap<float,unsigned>::iterator highest_f0 = gradient.begin();
00123
00124 float smallest = gradientit->first;
00125
00126
00127 gradientit++;
00128 while (gradientit != gradient.end() && gradientit->first == smallest) {
00129
00130
00131 if (f0[gradientit->second] > f0[highest_f0->second])
00132 highest_f0 = gradientit;
00133 gradientit++;
00134 }
00135
00136 gradientit = highest_f0;
00137 first1best.push_back(highest_f0->second);
00138
00139
00140
00141 while (gradientit != gradient.end()) {
00142 map<float,unsigned>::iterator leftmost = gradientit;
00143 float m = gradientit->first;
00144 float b = f0[gradientit->second];
00145 multimap<float,unsigned>::iterator gradientit2 = gradientit;
00146 gradientit2++;
00147 float leftmostx = MAX_FLOAT;
00148 for (; gradientit2 != gradient.end(); gradientit2++) {
00149
00150
00151
00152 float curintersect;
00153 if (m != gradientit2->first) {
00154 curintersect = intersect(m, b, gradientit2->first, f0[gradientit2->second]);
00155
00156 if (curintersect<=leftmostx) {
00157
00158
00159
00160 leftmostx = curintersect;
00161 leftmost = gradientit2;
00162 }
00163 }
00164 }
00165 if (leftmost == gradientit) {
00166
00167
00168
00169
00170 CHECK(abs(leftmost->first-gradient.rbegin()->first) < 0.0001);
00171
00172 break;
00173 }
00174
00175
00176 pair<unsigned,unsigned> newd(S, leftmost->second);
00177
00178 if (leftmostx-previnserted->first < min_int) {
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188 map<float,diff_t>::iterator tit = thresholdmap.find(leftmostx);
00189 if (tit == previnserted) {
00190
00191 CHECK(previnserted->second.back().first == newd.first);
00192 previnserted->second.back()=newd;
00193
00194 } else {
00195
00196 if (tit == thresholdmap.end()) {
00197 thresholdmap[leftmostx]=previnserted->second;
00198 thresholdmap.erase(previnserted);
00199 previnserted = thresholdmap.find(leftmostx);
00200 previnserted->second.back()=newd;
00201
00202 } else {
00203
00204 tit->second.insert(tit->second.end(),previnserted->second.begin(),previnserted->second.end());
00205 CHECK(tit->second.back().first == newd.first);
00206 tit->second.back()=newd;
00207 thresholdmap.erase(previnserted);
00208 previnserted = tit;
00209 }
00210 }
00211
00212 CHECK(previnserted != thresholdmap.end());
00213 } else {
00214 previnserted = AddThreshold(thresholdmap, leftmostx, newd);
00215 }
00216 gradientit = leftmost;
00217 }
00218 }
00219
00220
00221
00222
00223 map<float,diff_t >::iterator thrit;
00224 if (verboselevel() > 6) {
00225 cerr << "Thresholds:(" << thresholdmap.size() << ")" << endl;
00226 for (thrit = thresholdmap.begin(); thrit != thresholdmap.end(); thrit++) {
00227 cerr << "x: " << thrit->first << " diffs";
00228 for (size_t j = 0; j < thrit->second.size(); ++j) {
00229 cerr << " " <<thrit->second[j].first << "," << thrit->second[j].second;
00230 }
00231 cerr << endl;
00232 }
00233 }
00234
00235
00236 thrit = thresholdmap.begin();
00237 ++thrit;
00238 diffs_t diffs;
00239 for (; thrit != thresholdmap.end(); thrit++)
00240 diffs.push_back(thrit->second);
00241 vector<statscore_t> scores = GetIncStatScore(first1best, diffs);
00242
00243 thrit = thresholdmap.begin();
00244 statscore_t bestscore = MIN_FLOAT;
00245 float bestx = MIN_FLOAT;
00246
00247
00248 CHECK(scores.size() == thresholdmap.size());
00249 for (unsigned int sc = 0; sc != scores.size(); sc++) {
00250
00251
00252
00253 Point respoint = origin + direction * thrit->first;
00254 bool is_valid = true;
00255 for (unsigned int k=0; k < respoint.getdim(); k++) {
00256 if (m_positive[k] && respoint[k] <= 0.0)
00257 is_valid = false;
00258 }
00259
00260 if (is_valid && scores[sc] > bestscore) {
00261
00262
00263
00264 bestscore = scores[sc];
00265
00266
00267
00268
00269
00270
00271 float leftx = thrit->first;
00272 if (thrit == thresholdmap.begin()) {
00273 leftx = MIN_FLOAT;
00274 }
00275 ++thrit;
00276 float rightx = MAX_FLOAT;
00277 if (thrit != thresholdmap.end()) {
00278 rightx = thrit->first;
00279 }
00280 --thrit;
00281
00282 if (leftx == MIN_FLOAT) {
00283 bestx = rightx-1000;
00284 } else if (rightx == MAX_FLOAT) {
00285 bestx = leftx + 0.1;
00286 } else {
00287 bestx = 0.5 * (rightx + leftx);
00288 }
00289
00290 }
00291 ++thrit;
00292 }
00293
00294 if (abs(bestx) < 0.00015) {
00295
00296
00297 bestx = 0.0;
00298
00299
00300
00301 if (verboselevel() > 4)
00302 cerr << "best point on line at origin" << endl;
00303 }
00304 if (verboselevel() > 3) {
00305
00306 }
00307 bestpoint = direction * bestx + origin;
00308 bestpoint.SetScore(bestscore);
00309 return bestscore;
00310 }
00311
00312 void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
00313 {
00314 CHECK(m_feature_data);
00315 bests.clear();
00316 bests.resize(size());
00317
00318 for (unsigned i = 0; i < size(); i++) {
00319 float bestfs = MIN_FLOAT;
00320 unsigned idx = 0;
00321 unsigned j;
00322 for (j = 0; j < m_feature_data->get(i).size(); j++) {
00323 float curfs = P * m_feature_data->get(i, j);
00324 if (curfs > bestfs) {
00325 bestfs = curfs;
00326 idx = j;
00327 }
00328 }
00329 bests[i]=idx;
00330 }
00331
00332 }
00333
00334 statscore_t Optimizer::Run(Point& P) const
00335 {
00336 if (!m_feature_data) {
00337 cerr << "error trying to optimize without Features loaded" << endl;
00338 exit(2);
00339 }
00340 if (!m_scorer) {
00341 cerr << "error trying to optimize without a Scorer loaded" << endl;
00342 exit(2);
00343 }
00344 if (m_scorer->getReferenceSize() != m_feature_data->size()) {
00345 cerr << "error length mismatch between feature file and score file" << endl;
00346 exit(2);
00347 }
00348
00349 P.SetScore(GetStatScore(P));
00350
00351 if (verboselevel () > 2) {
00352 cerr << "Starting point: " << P << " => " << P.GetScore() << endl;
00353 }
00354 statscore_t score = TrueRun(P);
00355
00356
00357 P.SetScore(score);
00358 if (verboselevel() > 2) {
00359 cerr << "Ending point: " << P << " => " << score << endl;
00360 }
00361 return score;
00362 }
00363
00364
00365 vector<statscore_t> Optimizer::GetIncStatScore(const vector<unsigned>& thefirst, const vector<vector <pair<unsigned,unsigned> > >& thediffs) const
00366 {
00367 CHECK(m_scorer);
00368
00369 vector<statscore_t> theres;
00370
00371 m_scorer->score(thefirst, thediffs, theres);
00372 return theres;
00373 }
00374
00375
00376 statscore_t SimpleOptimizer::TrueRun(Point& P) const
00377 {
00378 statscore_t prevscore = 0;
00379 statscore_t bestscore = MIN_FLOAT;
00380 Point best;
00381
00382
00383
00384 if (P.GetScore() > bestscore) {
00385 bestscore = P.GetScore();
00386 best = P;
00387 }
00388
00389 int nrun = 0;
00390 do {
00391 ++nrun;
00392 if (verboselevel() > 2 && nrun > 1)
00393 cerr << "last diff=" << bestscore-prevscore << " nrun " << nrun << endl;
00394 prevscore = bestscore;
00395
00396 Point linebest;
00397
00398 for (unsigned int d = 0; d < Point::getdim() + m_num_random_directions; d++) {
00399 if (verboselevel() > 4) {
00400
00401 cerr << "starting point: " << P << " => " << prevscore << endl;
00402 }
00403 Point direction;
00404 if (d < Point::getdim()) {
00405 for (unsigned int i = 0; i < Point::getdim(); i++)
00406 direction[i]=0.0;
00407 direction[d]=1.0;
00408 }
00409 else {
00410 direction.Randomize();
00411 }
00412 statscore_t curscore = LineOptimize(P, direction, linebest);
00413 if (verboselevel() > 5) {
00414 cerr << "direction: " << d << " => " << curscore << endl;
00415 cerr << "\tending point: "<< linebest << " => " << curscore << endl;
00416 }
00417 if (curscore > bestscore) {
00418 bestscore = curscore;
00419 best = linebest;
00420 if (verboselevel() > 3) {
00421 cerr << "new best dir:" << d << " (" << nrun << ")" << endl;
00422 cerr << "new best Point " << best << " => " << curscore << endl;
00423 }
00424 }
00425 }
00426 P = best;
00427 if (verboselevel() > 3)
00428 cerr << nrun << "\t" << P << endl;
00429 } while (bestscore-prevscore > kEPS);
00430
00431 if (verboselevel() > 2) {
00432 cerr << "end Powell Algo, nrun=" << nrun << endl;
00433 cerr << "last diff=" << bestscore-prevscore << endl;
00434 cerr << "\t" << P << endl;
00435 }
00436 return bestscore;
00437 }
00438
00439 statscore_t RandomDirectionOptimizer::TrueRun(Point& P) const
00440 {
00441 statscore_t prevscore = P.GetScore();
00442
00443
00444 unsigned int nrun = 0;
00445 unsigned int nrun_no_change = 0;
00446 for (; nrun_no_change < m_num_random_directions; nrun++, nrun_no_change++)
00447 {
00448
00449 Point direction;
00450 direction.Randomize();
00451
00452
00453 statscore_t score = LineOptimize(P, direction, P);
00454 if (verboselevel() > 4) {
00455 cerr << "direction: " << direction << " => " << score;
00456 cerr << " (" << (score-prevscore) << ")" << endl;
00457 cerr << "\tending point: " << P << " => " << score << endl;
00458 }
00459
00460 if (score-prevscore > kEPS)
00461 nrun_no_change = 0;
00462 prevscore = score;
00463 }
00464
00465 if (verboselevel() > 2) {
00466 cerr << "end Powell Algo, nrun=" << nrun << endl;
00467 }
00468 return prevscore;
00469 }
00470
00471
00472 statscore_t RandomOptimizer::TrueRun(Point& P) const
00473 {
00474 P.Randomize();
00475 statscore_t score = GetStatScore(P);
00476 P.SetScore(score);
00477 return score;
00478 }
00479
00480 }