00001 #include "Optimizer.h"
00002
00003 #include <cmath>
00004 #include "util/check.hh"
00005 #include <vector>
00006 #include <limits>
00007 #include <map>
00008 #include <cfloat>
00009 #include <iostream>
00010 #include <stdint.h>
00011
00012 #include "Point.h"
00013 #include "Util.h"
00014
00015 using namespace std;
00016
00017 static const float MIN_FLOAT = -1.0 * numeric_limits<float>::max();
00018 static const float MAX_FLOAT = numeric_limits<float>::max();
00019
00020 namespace
00021 {
00022
00026 inline float intersect(float m1, float b1, float m2, float b2)
00027 {
00028 float isect = (b2 - b1) / (m1 - m2);
00029 if (!isfinite(isect)) {
00030 isect = MAX_FLOAT;
00031 }
00032 return isect;
00033 }
00034
00035 }
00036
00037 namespace MosesTuning
00038 {
00039
00040
00041 Optimizer::Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<bool>& pos, const vector<parameter_t>& start, unsigned int nrandom)
00042 : m_scorer(NULL), m_feature_data(), m_num_random_directions(nrandom), m_positive(pos)
00043 {
00044
00045 Point::m_pdim = Pd;
00046
00047 CHECK(start.size() == Pd);
00048 Point::m_dim = i2O.size();
00049 Point::m_opt_indices = i2O;
00050 if (Point::m_pdim > Point::m_dim) {
00051 for (unsigned int i = 0; i < Point::m_pdim; i++) {
00052 unsigned int j = 0;
00053 while (j < Point::m_dim && i != i2O[j])
00054 j++;
00055
00056
00057
00058 if (j == Point::m_dim)
00059 Point::m_fixed_weights[i] = start[i];
00060 }
00061 }
00062 }
00063
00064 Optimizer::~Optimizer() {}
00065
00066 statscore_t Optimizer::GetStatScore(const Point& param) const
00067 {
00068 vector<unsigned> bests;
00069 Get1bests(param, bests);
00070 statscore_t score = GetStatScore(bests);
00071 return score;
00072 }
00073
00074 map<float,diff_t >::iterator AddThreshold(map<float,diff_t >& thresholdmap, float newt, const pair<unsigned,unsigned>& newdiff)
00075 {
00076 map<float,diff_t>::iterator it = thresholdmap.find(newt);
00077 if (it != thresholdmap.end()) {
00078
00079 if (it->second.back().first == newdiff.first)
00080
00081 it->second.back().second = newdiff.second;
00082 else
00083 it->second.push_back(newdiff);
00084 } else {
00085
00086 pair<map<float,diff_t>::iterator, bool> ins = thresholdmap.insert(threshold(newt, diff_t(1, newdiff)));
00087 CHECK(ins.second);
00088 it = ins.first;
00089 }
00090 return it;
00091 }
00092
00093 statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction, Point& bestpoint) const
00094 {
00095
00096 float min_int = 0.0001;
00097
00098
00099
00100 map<float,diff_t> thresholdmap;
00101 thresholdmap[MIN_FLOAT] = diff_t();
00102 vector<unsigned> first1best;
00103 for (unsigned int S = 0; S < size(); S++) {
00104 map<float,diff_t >::iterator previnserted = thresholdmap.begin();
00105
00106
00107
00108 multimap<float, unsigned> gradient;
00109 vector<float> f0;
00110 f0.resize(m_feature_data->get(S).size());
00111 for (unsigned j = 0; j < m_feature_data->get(S).size(); j++) {
00112
00113 gradient.insert(pair<float, unsigned>(direction * (m_feature_data->get(S,j)), j));
00114
00115 f0[j] = origin * m_feature_data->get(S, j);
00116 }
00117
00118
00119
00120
00121
00122 multimap<float,unsigned>::iterator gradientit = gradient.begin();
00123 multimap<float,unsigned>::iterator highest_f0 = gradient.begin();
00124
00125 float smallest = gradientit->first;
00126
00127
00128 gradientit++;
00129 while (gradientit != gradient.end() && gradientit->first == smallest) {
00130
00131
00132 if (f0[gradientit->second] > f0[highest_f0->second])
00133 highest_f0 = gradientit;
00134 gradientit++;
00135 }
00136
00137 gradientit = highest_f0;
00138 first1best.push_back(highest_f0->second);
00139
00140
00141
00142 while (gradientit != gradient.end()) {
00143 map<float,unsigned>::iterator leftmost = gradientit;
00144 float m = gradientit->first;
00145 float b = f0[gradientit->second];
00146 multimap<float,unsigned>::iterator gradientit2 = gradientit;
00147 gradientit2++;
00148 float leftmostx = MAX_FLOAT;
00149 for (; gradientit2 != gradient.end(); gradientit2++) {
00150
00151
00152
00153 float curintersect;
00154 if (m != gradientit2->first) {
00155 curintersect = intersect(m, b, gradientit2->first, f0[gradientit2->second]);
00156
00157 if (curintersect<=leftmostx) {
00158
00159
00160
00161 leftmostx = curintersect;
00162 leftmost = gradientit2;
00163 }
00164 }
00165 }
00166 if (leftmost == gradientit) {
00167
00168
00169
00170
00171 CHECK(abs(leftmost->first-gradient.rbegin()->first) < 0.0001);
00172
00173 break;
00174 }
00175
00176
00177 pair<unsigned,unsigned> newd(S, leftmost->second);
00178
00179 if (leftmostx-previnserted->first < min_int) {
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189 map<float,diff_t>::iterator tit = thresholdmap.find(leftmostx);
00190 if (tit == previnserted) {
00191
00192 CHECK(previnserted->second.back().first == newd.first);
00193 previnserted->second.back()=newd;
00194
00195 } else {
00196
00197 if (tit == thresholdmap.end()) {
00198 thresholdmap[leftmostx]=previnserted->second;
00199 thresholdmap.erase(previnserted);
00200 previnserted = thresholdmap.find(leftmostx);
00201 previnserted->second.back()=newd;
00202
00203 } else {
00204
00205 tit->second.insert(tit->second.end(),previnserted->second.begin(),previnserted->second.end());
00206 CHECK(tit->second.back().first == newd.first);
00207 tit->second.back()=newd;
00208 thresholdmap.erase(previnserted);
00209 previnserted = tit;
00210 }
00211 }
00212
00213 CHECK(previnserted != thresholdmap.end());
00214 } else {
00215 previnserted = AddThreshold(thresholdmap, leftmostx, newd);
00216 }
00217 gradientit = leftmost;
00218 }
00219 }
00220
00221
00222
00223
00224 map<float,diff_t >::iterator thrit;
00225 if (verboselevel() > 6) {
00226 cerr << "Thresholds:(" << thresholdmap.size() << ")" << endl;
00227 for (thrit = thresholdmap.begin(); thrit != thresholdmap.end(); thrit++) {
00228 cerr << "x: " << thrit->first << " diffs";
00229 for (size_t j = 0; j < thrit->second.size(); ++j) {
00230 cerr << " " <<thrit->second[j].first << "," << thrit->second[j].second;
00231 }
00232 cerr << endl;
00233 }
00234 }
00235
00236
00237 thrit = thresholdmap.begin();
00238 ++thrit;
00239 diffs_t diffs;
00240 for (; thrit != thresholdmap.end(); thrit++)
00241 diffs.push_back(thrit->second);
00242 vector<statscore_t> scores = GetIncStatScore(first1best, diffs);
00243
00244 thrit = thresholdmap.begin();
00245 statscore_t bestscore = MIN_FLOAT;
00246 float bestx = MIN_FLOAT;
00247
00248
00249 CHECK(scores.size() == thresholdmap.size());
00250 for (unsigned int sc = 0; sc != scores.size(); sc++) {
00251
00252
00253
00254 Point respoint = origin + direction * thrit->first;
00255 bool is_valid = true;
00256 for (unsigned int k=0; k < respoint.getdim(); k++) {
00257 if (m_positive[k] && respoint[k] <= 0.0)
00258 is_valid = false;
00259 }
00260
00261 if (is_valid && scores[sc] > bestscore) {
00262
00263
00264
00265 bestscore = scores[sc];
00266
00267
00268
00269
00270
00271
00272 float leftx = thrit->first;
00273 if (thrit == thresholdmap.begin()) {
00274 leftx = MIN_FLOAT;
00275 }
00276 ++thrit;
00277 float rightx = MAX_FLOAT;
00278 if (thrit != thresholdmap.end()) {
00279 rightx = thrit->first;
00280 }
00281 --thrit;
00282
00283 if (leftx == MIN_FLOAT) {
00284 bestx = rightx-1000;
00285 } else if (rightx == MAX_FLOAT) {
00286 bestx = leftx + 0.1;
00287 } else {
00288 bestx = 0.5 * (rightx + leftx);
00289 }
00290
00291 }
00292 ++thrit;
00293 }
00294
00295 if (abs(bestx) < 0.00015) {
00296
00297
00298 bestx = 0.0;
00299
00300
00301
00302 if (verboselevel() > 4)
00303 cerr << "best point on line at origin" << endl;
00304 }
00305 if (verboselevel() > 3) {
00306
00307 }
00308 bestpoint = direction * bestx + origin;
00309 bestpoint.SetScore(bestscore);
00310 return bestscore;
00311 }
00312
00313 void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
00314 {
00315 CHECK(m_feature_data);
00316 bests.clear();
00317 bests.resize(size());
00318
00319 for (unsigned i = 0; i < size(); i++) {
00320 float bestfs = MIN_FLOAT;
00321 unsigned idx = 0;
00322 unsigned j;
00323 for (j = 0; j < m_feature_data->get(i).size(); j++) {
00324 float curfs = P * m_feature_data->get(i, j);
00325 if (curfs > bestfs) {
00326 bestfs = curfs;
00327 idx = j;
00328 }
00329 }
00330 bests[i]=idx;
00331 }
00332
00333 }
00334
00335 statscore_t Optimizer::Run(Point& P) const
00336 {
00337 if (!m_feature_data) {
00338 cerr << "error trying to optimize without Features loaded" << endl;
00339 exit(2);
00340 }
00341 if (!m_scorer) {
00342 cerr << "error trying to optimize without a Scorer loaded" << endl;
00343 exit(2);
00344 }
00345 if (m_scorer->getReferenceSize() != m_feature_data->size()) {
00346 cerr << "error length mismatch between feature file and score file" << endl;
00347 exit(2);
00348 }
00349
00350 P.SetScore(GetStatScore(P));
00351
00352 if (verboselevel () > 2) {
00353 cerr << "Starting point: " << P << " => " << P.GetScore() << endl;
00354 }
00355 statscore_t score = TrueRun(P);
00356
00357
00358 P.SetScore(score);
00359 if (verboselevel() > 2) {
00360 cerr << "Ending point: " << P << " => " << score << endl;
00361 }
00362 return score;
00363 }
00364
00365
00366 vector<statscore_t> Optimizer::GetIncStatScore(const vector<unsigned>& thefirst, const vector<vector <pair<unsigned,unsigned> > >& thediffs) const
00367 {
00368 CHECK(m_scorer);
00369
00370 vector<statscore_t> theres;
00371
00372 m_scorer->score(thefirst, thediffs, theres);
00373 return theres;
00374 }
00375
00376
00377 statscore_t SimpleOptimizer::TrueRun(Point& P) const
00378 {
00379 statscore_t prevscore = 0;
00380 statscore_t bestscore = MIN_FLOAT;
00381 Point best;
00382
00383
00384
00385 if (P.GetScore() > bestscore) {
00386 bestscore = P.GetScore();
00387 best = P;
00388 }
00389
00390 int nrun = 0;
00391 do {
00392 ++nrun;
00393 if (verboselevel() > 2 && nrun > 1)
00394 cerr << "last diff=" << bestscore-prevscore << " nrun " << nrun << endl;
00395 prevscore = bestscore;
00396
00397 Point linebest;
00398
00399 for (unsigned int d = 0; d < Point::getdim() + m_num_random_directions; d++) {
00400 if (verboselevel() > 4) {
00401
00402 cerr << "starting point: " << P << " => " << prevscore << endl;
00403 }
00404 Point direction;
00405 if (d < Point::getdim()) {
00406 for (unsigned int i = 0; i < Point::getdim(); i++)
00407 direction[i]=0.0;
00408 direction[d]=1.0;
00409 } else {
00410 direction.Randomize();
00411 }
00412 statscore_t curscore = LineOptimize(P, direction, linebest);
00413 if (verboselevel() > 5) {
00414 cerr << "direction: " << d << " => " << curscore << endl;
00415 cerr << "\tending point: "<< linebest << " => " << curscore << endl;
00416 }
00417 if (curscore > bestscore) {
00418 bestscore = curscore;
00419 best = linebest;
00420 if (verboselevel() > 3) {
00421 cerr << "new best dir:" << d << " (" << nrun << ")" << endl;
00422 cerr << "new best Point " << best << " => " << curscore << endl;
00423 }
00424 }
00425 }
00426 P = best;
00427 if (verboselevel() > 3)
00428 cerr << nrun << "\t" << P << endl;
00429 } while (bestscore-prevscore > kEPS);
00430
00431 if (verboselevel() > 2) {
00432 cerr << "end Powell Algo, nrun=" << nrun << endl;
00433 cerr << "last diff=" << bestscore-prevscore << endl;
00434 cerr << "\t" << P << endl;
00435 }
00436 return bestscore;
00437 }
00438
00439 statscore_t RandomDirectionOptimizer::TrueRun(Point& P) const
00440 {
00441 statscore_t prevscore = P.GetScore();
00442
00443
00444 unsigned int nrun = 0;
00445 unsigned int nrun_no_change = 0;
00446 for (; nrun_no_change < m_num_random_directions; nrun++, nrun_no_change++) {
00447
00448 Point direction;
00449 direction.Randomize();
00450
00451
00452 statscore_t score = LineOptimize(P, direction, P);
00453 if (verboselevel() > 4) {
00454 cerr << "direction: " << direction << " => " << score;
00455 cerr << " (" << (score-prevscore) << ")" << endl;
00456 cerr << "\tending point: " << P << " => " << score << endl;
00457 }
00458
00459 if (score-prevscore > kEPS)
00460 nrun_no_change = 0;
00461 prevscore = score;
00462 }
00463
00464 if (verboselevel() > 2) {
00465 cerr << "end Powell Algo, nrun=" << nrun << endl;
00466 }
00467 return prevscore;
00468 }
00469
00470
00471 statscore_t RandomOptimizer::TrueRun(Point& P) const
00472 {
00473 P.Randomize();
00474 statscore_t score = GetStatScore(P);
00475 P.SetScore(score);
00476 return score;
00477 }
00478
00479 }