00001 #include "Optimiser.h"
00002 #include "Hildreth.h"
00003 #include "moses/StaticData.h"
00004
00005 using namespace Moses;
00006 using namespace std;
00007
00008 namespace Mira
00009 {
00010
00011 size_t MiraOptimiser::updateWeights(
00012 ScoreComponentCollection& weightUpdate,
00013 const vector<vector<ScoreComponentCollection> >& featureValues,
00014 const vector<vector<float> >& losses,
00015 const vector<vector<float> >& bleuScores,
00016 const vector<vector<float> >& modelScores,
00017 const vector<ScoreComponentCollection>& oracleFeatureValues,
00018 const vector<float> oracleBleuScores,
00019 const vector<float> oracleModelScores,
00020 float learning_rate,
00021 size_t rank,
00022 size_t epoch)
00023 {
00024
00025
00026 vector<ScoreComponentCollection> featureValueDiffs;
00027 vector<float> lossMinusModelScoreDiffs;
00028 vector<float> all_losses;
00029
00030
00031 ScoreComponentCollection max_batch_featureValueDiff;
00032
00033
00034 float epsilon = 0.0001;
00035 int violatedConstraintsBefore = 0;
00036 float oldDistanceFromOptimum = 0;
00037
00038 for (size_t i = 0; i < featureValues.size(); ++i) {
00039
00040
00041 for (size_t j = 0; j < featureValues[i].size(); ++j) {
00042 ScoreComponentCollection featureValueDiff = oracleFeatureValues[i];
00043 featureValueDiff.MinusEquals(featureValues[i][j]);
00044
00045
00046 if (featureValueDiff.GetL1Norm() == 0) {
00047 cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
00048 continue;
00049 }
00050
00051 float loss = losses[i][j];
00052
00053
00054 bool violated = false;
00055
00056 float modelScoreDiff = oracleModelScores[i] - modelScores[i][j];
00057 float diff = 0;
00058
00059 if (loss > modelScoreDiff)
00060 diff = loss - modelScoreDiff;
00061 cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
00062 if (diff > epsilon)
00063 violated = true;
00064
00065 if (m_normaliseMargin) {
00066 modelScoreDiff = (2*m_sigmoidParam/(1 + exp(-modelScoreDiff))) - m_sigmoidParam;
00067 loss = (2*m_sigmoidParam/(1 + exp(-loss))) - m_sigmoidParam;
00068 diff = 0;
00069 if (loss > modelScoreDiff) {
00070 diff = loss - modelScoreDiff;
00071 }
00072 cerr << "Rank " << rank << ", epoch " << epoch << ", normalised constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
00073 }
00074
00075 if (m_scale_margin) {
00076 diff *= oracleBleuScores[i];
00077 cerr << "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score " << oracleBleuScores[i] << endl;
00078 }
00079
00080 featureValueDiffs.push_back(featureValueDiff);
00081 lossMinusModelScoreDiffs.push_back(diff);
00082 all_losses.push_back(loss);
00083 if (violated) {
00084 ++violatedConstraintsBefore;
00085 oldDistanceFromOptimum += diff;
00086 }
00087 }
00088 }
00089
00090
00091 vector<float> alphas;
00092 ScoreComponentCollection summedUpdate;
00093 if (violatedConstraintsBefore > 0) {
00094 cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " <<
00095 featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl;
00096 if (m_slack != 0) {
00097 alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack);
00098 } else {
00099 alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs);
00100 }
00101
00102
00103
00104 for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
00105 float alpha = alphas[k];
00106 cerr << "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl;
00107 ScoreComponentCollection update(featureValueDiffs[k]);
00108 update.MultiplyEquals(alpha);
00109
00110
00111 summedUpdate.PlusEquals(update);
00112 }
00113 } else {
00114 cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl;
00115
00116 return 1;
00117 }
00118
00119
00120 if (learning_rate != 1) {
00121 cerr << "Rank " << rank << ", epoch " << epoch << ", apply learning rate " << learning_rate << " to update." << endl;
00122 summedUpdate.MultiplyEquals(learning_rate);
00123 }
00124
00125
00126 if (oracleBleuScores.size() == 1) {
00127 if (m_scale_update) {
00128 cerr << "Rank " << rank << ", epoch " << epoch << ", scaling summed update with oracle bleu score " << oracleBleuScores[0] << endl;
00129 summedUpdate.MultiplyEquals(oracleBleuScores[0]);
00130 }
00131 }
00132
00133
00134 weightUpdate.PlusEquals(summedUpdate);
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151 return 0;
00152 }
00153
00154 size_t MiraOptimiser::updateWeightsHopeFear(
00155 Moses::ScoreComponentCollection& weightUpdate,
00156 const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
00157 const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
00158 const std::vector<std::vector<float> >& bleuScoresHope,
00159 const std::vector<std::vector<float> >& bleuScoresFear,
00160 const std::vector<std::vector<float> >& modelScoresHope,
00161 const std::vector<std::vector<float> >& modelScoresFear,
00162 float learning_rate,
00163 size_t rank,
00164 size_t epoch,
00165 int updatePosition)
00166 {
00167
00168
00169 vector<ScoreComponentCollection> featureValueDiffs;
00170 vector<float> lossMinusModelScoreDiffs;
00171 vector<float> modelScoreDiffs;
00172 vector<float> all_losses;
00173
00174
00175 ScoreComponentCollection max_batch_featureValueDiff;
00176
00177
00178 float epsilon = 0.0001;
00179 int violatedConstraintsBefore = 0;
00180 float oldDistanceFromOptimum = 0;
00181
00182
00183 for (size_t i = 0; i < featureValuesHope.size(); ++i) {
00184 if (updatePosition != -1) {
00185 if (i < updatePosition)
00186 continue;
00187 else if (i > updatePosition)
00188 break;
00189 }
00190
00191
00192 for (size_t j = 0; j < featureValuesHope[i].size(); ++j) {
00193 ScoreComponentCollection featureValueDiff = featureValuesHope[i][j];
00194 featureValueDiff.MinusEquals(featureValuesFear[i][j]);
00195
00196 if (featureValueDiff.GetL1Norm() == 0) {
00197 cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
00198 continue;
00199 }
00200
00201 float loss = bleuScoresHope[i][j] - bleuScoresFear[i][j];
00202
00203
00204 bool violated = false;
00205
00206 float modelScoreDiff = modelScoresHope[i][j] - modelScoresFear[i][j];
00207 float diff = 0;
00208 if (loss > modelScoreDiff)
00209 diff = loss - modelScoreDiff;
00210 cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
00211
00212 if (diff > epsilon)
00213 violated = true;
00214
00215 if (m_normaliseMargin) {
00216 modelScoreDiff = (2*m_sigmoidParam/(1 + exp(-modelScoreDiff))) - m_sigmoidParam;
00217 loss = (2*m_sigmoidParam/(1 + exp(-loss))) - m_sigmoidParam;
00218 diff = 0;
00219 if (loss > modelScoreDiff) {
00220 diff = loss - modelScoreDiff;
00221 }
00222 cerr << "Rank " << rank << ", epoch " << epoch << ", normalised constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
00223 }
00224
00225 if (m_scale_margin) {
00226 diff *= bleuScoresHope[i][j];
00227 cerr << "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score " << bleuScoresHope[i][j] << endl;
00228 }
00229
00230 featureValueDiffs.push_back(featureValueDiff);
00231 lossMinusModelScoreDiffs.push_back(diff);
00232 modelScoreDiffs.push_back(modelScoreDiff);
00233 all_losses.push_back(loss);
00234 if (violated) {
00235 ++violatedConstraintsBefore;
00236 oldDistanceFromOptimum += diff;
00237 }
00238 }
00239 }
00240
00241
00242 vector<float> alphas;
00243 ScoreComponentCollection summedUpdate;
00244 if (violatedConstraintsBefore > 0) {
00245 cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " <<
00246 featureValueDiffs.size() << " (of which violated: " << violatedConstraintsBefore << ")" << endl;
00247 if (m_slack != 0) {
00248 alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs, m_slack);
00249 } else {
00250 alphas = Hildreth::optimise(featureValueDiffs, lossMinusModelScoreDiffs);
00251 }
00252
00253
00254
00255 for (size_t k = 0; k < featureValueDiffs.size(); ++k) {
00256 float alpha = alphas[k];
00257 cerr << "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl;
00258 if (alpha != 0) {
00259
00260 if (m_boost && modelScoreDiffs[k] <= 0) {
00261
00262 float factor = min(1.5, log2(bleuScoresHope[0][0]));
00263 factor = min(3.0f, factor);
00264 alpha = alpha * factor;
00265 cerr << "Rank " << rank << ", epoch " << epoch << ", apply boosting factor " << factor << " to update." << endl;
00266 }
00267
00268 ScoreComponentCollection update(featureValueDiffs[k]);
00269 update.MultiplyEquals(alpha);
00270
00271
00272 summedUpdate.PlusEquals(update);
00273 }
00274 }
00275 } else {
00276 cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl;
00277
00278 return 1;
00279 }
00280
00281
00282 if (learning_rate != 1) {
00283 cerr << "Rank " << rank << ", epoch " << epoch << ", apply learning rate " << learning_rate << " to update." << endl;
00284 summedUpdate.MultiplyEquals(learning_rate);
00285 }
00286
00287
00288 if (featureValuesHope.size() == 1) {
00289 if (m_scale_update) {
00290 cerr << "Rank " << rank << ", epoch " << epoch << ", scaling summed update with oracle bleu score " << bleuScoresHope[0][0] << endl;
00291 summedUpdate.MultiplyEquals(bleuScoresHope[0][0]);
00292 }
00293 }
00294
00295
00296 weightUpdate.PlusEquals(summedUpdate);
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313 return 0;
00314 }
00315
00316 size_t MiraOptimiser::updateWeightsAnalytically(
00317 ScoreComponentCollection& weightUpdate,
00318 ScoreComponentCollection& featureValuesHope,
00319 ScoreComponentCollection& featureValuesFear,
00320 float bleuScoreHope,
00321 float bleuScoreFear,
00322 float modelScoreHope,
00323 float modelScoreFear,
00324 float learning_rate,
00325 size_t rank,
00326 size_t epoch)
00327 {
00328
00329 float epsilon = 0.0001;
00330 float oldDistanceFromOptimum = 0;
00331 bool constraintViolatedBefore = false;
00332
00333
00334
00335 ScoreComponentCollection featureValueDiff = featureValuesHope;
00336 featureValueDiff.MinusEquals(featureValuesFear);
00337 if (featureValueDiff.GetL1Norm() == 0) {
00338 cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
00339 return 1;
00340 }
00341
00342
00343
00344 float modelScoreDiff = modelScoreHope - modelScoreFear;
00345 float loss = bleuScoreHope - bleuScoreFear;
00346 float diff = 0;
00347 if (loss > modelScoreDiff)
00348 diff = loss - modelScoreDiff;
00349 cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
00350
00351 if (m_normaliseMargin) {
00352 modelScoreDiff = (2*m_sigmoidParam/(1 + exp(-modelScoreDiff))) - m_sigmoidParam;
00353 loss = (2*m_sigmoidParam/(1 + exp(-loss))) - m_sigmoidParam;
00354 if (loss > modelScoreDiff)
00355 diff = loss - modelScoreDiff;
00356 cerr << "Rank " << rank << ", epoch " << epoch << ", normalised constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
00357 }
00358
00359 if (m_scale_margin) {
00360 diff *= bleuScoreHope;
00361 cerr << "Rank " << rank << ", epoch " << epoch << ", scaling margin with oracle bleu score " << bleuScoreHope << endl;
00362 }
00363 if (m_scale_margin_precision) {
00364 diff *= (1+m_precision);
00365 cerr << "Rank " << rank << ", epoch " << epoch << ", scaling margin with 1+precision: " << (1+m_precision) << endl;
00366 }
00367
00368 if (diff > epsilon) {
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378 oldDistanceFromOptimum += diff;
00379 constraintViolatedBefore = true;
00380
00381
00382
00383
00384 float squaredNorm = featureValueDiff.GetL2Norm() * featureValueDiff.GetL2Norm();
00385
00386 float alpha = diff / squaredNorm;
00387 cerr << "Rank " << rank << ", epoch " << epoch << ", unclipped alpha: " << alpha << endl;
00388 if (m_slack > 0 ) {
00389 if (alpha > m_slack) {
00390 alpha = m_slack;
00391 } else if (alpha < m_slack*(-1)) {
00392 alpha = m_slack*(-1);
00393 }
00394 }
00395
00396
00397 if (learning_rate != 1)
00398 alpha = alpha * learning_rate;
00399
00400 if (m_scale_update) {
00401 cerr << "Rank " << rank << ", epoch " << epoch << ", scaling update with oracle bleu score " << bleuScoreHope << endl;
00402 alpha *= bleuScoreHope;
00403 }
00404 if (m_scale_update_precision) {
00405 cerr << "Rank " << rank << ", epoch " << epoch << ", scaling update with 1+precision: " << (1+m_precision) << endl;
00406 alpha *= (1+m_precision);
00407 }
00408
00409 cerr << "Rank " << rank << ", epoch " << epoch << ", clipped/scaled alpha: " << alpha << endl;
00410
00411
00412 if (m_boost && modelScoreDiff <= 0) {
00413
00414 float factor = min(1.5, log2(bleuScoreHope));
00415 factor = min(3.0f, factor);
00416 alpha = alpha * factor;
00417 cerr << "Rank " << rank << ", epoch " << epoch << ", boosted alpha: " << alpha << endl;
00418 }
00419
00420 featureValueDiff.MultiplyEquals(alpha);
00421 weightUpdate.PlusEquals(featureValueDiff);
00422
00423 }
00424
00425 if (!constraintViolatedBefore) {
00426
00427 cerr << "Rank " << rank << ", epoch " << epoch << ", constraint already satisfied" << endl;
00428 return 1;
00429 }
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454 return 0;
00455 }
00456
00457 size_t MiraOptimiser::updateWeightsHopeFearSelective(
00458 Moses::ScoreComponentCollection& weightUpdate,
00459 const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
00460 const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
00461 const std::vector<std::vector<float> >& bleuScoresHope,
00462 const std::vector<std::vector<float> >& bleuScoresFear,
00463 const std::vector<std::vector<float> >& modelScoresHope,
00464 const std::vector<std::vector<float> >& modelScoresFear,
00465 float learning_rate,
00466 size_t rank,
00467 size_t epoch,
00468 int updatePosition)
00469 {
00470
00471
00472 vector<ScoreComponentCollection> nonZeroFeatures;
00473 vector<float> lossMinusModelScoreDiffs;
00474
00475
00476 float epsilon = 0.0001;
00477 int violatedConstraintsBefore = 0;
00478
00479
00480 for (size_t i = 0; i < featureValuesHope.size(); ++i) {
00481 if (updatePosition != -1) {
00482 if (i < updatePosition)
00483 continue;
00484 else if (i > updatePosition)
00485 break;
00486 }
00487
00488
00489 for (size_t j = 0; j < featureValuesHope[i].size(); ++j) {
00490 ScoreComponentCollection featureValueDiff = featureValuesHope[i][j];
00491 featureValueDiff.MinusEquals(featureValuesFear[i][j]);
00492 if (featureValueDiff.GetL1Norm() == 0) {
00493 cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
00494 continue;
00495 }
00496
00497
00498 float loss = bleuScoresHope[i][j] - bleuScoresFear[i][j];
00499 float modelScoreDiff = modelScoresHope[i][j] - modelScoresFear[i][j];
00500 float diff = 0;
00501 if (loss > modelScoreDiff)
00502 diff = loss - modelScoreDiff;
00503 if (diff > epsilon)
00504 ++violatedConstraintsBefore;
00505 cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << loss << " (current violation: " << diff << ")" << endl;
00506
00507
00508 FVector features = featureValueDiff.GetScoresVector();
00509 size_t n_core = 0, n_sparse = 0, n_sparse_hope = 0, n_sparse_fear = 0;
00510 for (size_t i=0; i<features.coreSize(); ++i) {
00511 if (features[i] != 0.0) {
00512 ++n_core;
00513 ScoreComponentCollection f;
00514 f.Assign(i, features[i]);
00515 nonZeroFeatures.push_back(f);
00516 }
00517 }
00518
00519 vector<ScoreComponentCollection> nonZeroFeaturesHope;
00520 vector<ScoreComponentCollection> nonZeroFeaturesFear;
00521 for (FVector::iterator i = features.begin(); i != features.end(); ++i) {
00522 if (i->second != 0.0) {
00523 ScoreComponentCollection f;
00524 f.Assign((i->first).name(), i->second);
00525 cerr << "Rank " << rank << ", epoch " << epoch << ", f: " << f << endl;
00526
00527 if (i->second > 0.0) {
00528 ++n_sparse_hope;
00529 nonZeroFeaturesHope.push_back(f);
00530 } else {
00531 ++n_sparse_fear;
00532 nonZeroFeaturesFear.push_back(f);
00533 }
00534 }
00535 }
00536
00537 float n = n_core + n_sparse_hope + n_sparse_fear;
00538 for (size_t i=0; i<n_core; ++i)
00539 lossMinusModelScoreDiffs.push_back(diff/n);
00540 for (size_t i=0; i<n_sparse_hope; ++i) {
00541 nonZeroFeatures.push_back(nonZeroFeaturesHope[i]);
00542 lossMinusModelScoreDiffs.push_back((diff/n)*1.1);
00543 }
00544 for (size_t i=0; i<n_sparse_fear; ++i) {
00545 nonZeroFeatures.push_back(nonZeroFeaturesFear[i]);
00546 lossMinusModelScoreDiffs.push_back(diff/n);
00547 }
00548 cerr << "Rank " << rank << ", epoch " << epoch << ", core diff: " << diff/n << endl;
00549 cerr << "Rank " << rank << ", epoch " << epoch << ", hope diff: " << ((diff/n)*1.1) << endl;
00550 cerr << "Rank " << rank << ", epoch " << epoch << ", fear diff: " << diff/n << endl;
00551 }
00552 }
00553
00554 assert(nonZeroFeatures.size() == lossMinusModelScoreDiffs.size());
00555
00556
00557 vector<float> alphas;
00558 ScoreComponentCollection summedUpdate;
00559 if (violatedConstraintsBefore > 0) {
00560 cerr << "Rank " << rank << ", epoch " << epoch << ", number of constraints passed to optimizer: " << nonZeroFeatures.size() << endl;
00561 alphas = Hildreth::optimise(nonZeroFeatures, lossMinusModelScoreDiffs, m_slack);
00562
00563
00564
00565 for (size_t k = 0; k < nonZeroFeatures.size(); ++k) {
00566 float alpha = alphas[k];
00567 cerr << "Rank " << rank << ", epoch " << epoch << ", alpha: " << alpha << endl;
00568 if (alpha != 0) {
00569 ScoreComponentCollection update(nonZeroFeatures[k]);
00570 update.MultiplyEquals(alpha);
00571
00572
00573 summedUpdate.PlusEquals(update);
00574 }
00575 }
00576 } else {
00577 cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl;
00578
00579 return 1;
00580 }
00581
00582
00583 if (learning_rate != 1) {
00584 cerr << "Rank " << rank << ", epoch " << epoch << ", apply learning rate " << learning_rate << " to update." << endl;
00585 summedUpdate.MultiplyEquals(learning_rate);
00586 }
00587
00588
00589 if (featureValuesHope.size() == 1) {
00590 if (m_scale_update) {
00591 cerr << "Rank " << rank << ", epoch " << epoch << ", scaling summed update with oracle bleu score " << bleuScoresHope[0][0] << endl;
00592 summedUpdate.MultiplyEquals(bleuScoresHope[0][0]);
00593 }
00594 }
00595
00596
00597 weightUpdate.PlusEquals(summedUpdate);
00598 return 0;
00599 }
00600
00601 size_t MiraOptimiser::updateWeightsHopeFearSummed(
00602 Moses::ScoreComponentCollection& weightUpdate,
00603 const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
00604 const std::vector< std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
00605 const std::vector<std::vector<float> >& bleuScoresHope,
00606 const std::vector<std::vector<float> >& bleuScoresFear,
00607 const std::vector<std::vector<float> >& modelScoresHope,
00608 const std::vector<std::vector<float> >& modelScoresFear,
00609 float learning_rate,
00610 size_t rank,
00611 size_t epoch,
00612 bool rescaleSlack,
00613 bool makePairs)
00614 {
00615
00616
00617 ScoreComponentCollection averagedFeatureDiffs;
00618 float averagedViolations = 0;
00619
00620
00621 float epsilon = 0.0001;
00622 int violatedConstraintsBefore = 0;
00623
00624 if (!makePairs) {
00625 ScoreComponentCollection featureValueDiff;
00626 float lossHope = 0, lossFear = 0, modelScoreHope = 0, modelScoreFear = 0, hopeCount = 0, fearCount = 0;
00627
00628 for (size_t i = 0; i < featureValuesHope.size(); ++i) {
00629 for (size_t j = 0; j < featureValuesHope[i].size(); ++j) {
00630 featureValueDiff.PlusEquals(featureValuesHope[i][j]);
00631 lossHope += bleuScoresHope[i][j];
00632 modelScoreHope += modelScoresHope[i][j];
00633 ++hopeCount;
00634 }
00635 }
00636 lossHope /= hopeCount;
00637 modelScoreHope /= hopeCount;
00638
00639
00640 for (size_t i = 0; i < featureValuesFear.size(); ++i) {
00641 for (size_t j = 0; j < featureValuesFear[i].size(); ++j) {
00642 featureValueDiff.MinusEquals(featureValuesFear[i][j]);
00643 lossFear += bleuScoresFear[i][j];
00644 modelScoreFear += modelScoresFear[i][j];
00645 ++fearCount;
00646 }
00647 }
00648 lossFear /= fearCount;
00649 modelScoreFear /= fearCount;
00650
00651 if (featureValueDiff.GetL1Norm() == 0) {
00652 cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
00653 cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl;
00654 return 1;
00655 }
00656
00657
00658 float lossDiff = lossHope - lossFear;
00659 float modelScoreDiff = modelScoreHope - modelScoreFear;
00660 float diff = 0;
00661 if (lossDiff > modelScoreDiff)
00662 diff = lossDiff - modelScoreDiff;
00663 if (diff > epsilon)
00664 ++violatedConstraintsBefore;
00665 cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << lossDiff << " (current violation: " <<\
00666 diff << ")" << endl;
00667
00668
00669 averagedFeatureDiffs = featureValueDiff;
00670 averagedViolations = diff;
00671 } else {
00672
00673 for (size_t i = 0; i < featureValuesHope.size(); ++i) {
00674
00675 for (size_t j = 0; j < featureValuesHope[i].size(); ++j) {
00676 ScoreComponentCollection featureValueDiff = featureValuesHope[i][j];
00677 featureValueDiff.MinusEquals(featureValuesFear[i][j]);
00678 if (featureValueDiff.GetL1Norm() == 0) {
00679 cerr << "Rank " << rank << ", epoch " << epoch << ", features equal --> skip" << endl;
00680 continue;
00681 }
00682
00683
00684 float lossDiff = bleuScoresHope[i][j] - bleuScoresFear[i][j];
00685 float modelScoreDiff = modelScoresHope[i][j] - modelScoresFear[i][j];
00686 if (rescaleSlack) {
00687 cerr << "Rank " << rank << ", epoch " << epoch << ", modelScoreDiff scaled by lossDiff: " << modelScoreDiff << " --> " << modelScoreDiff*lossDiff << endl;
00688 modelScoreDiff *= lossDiff;
00689 }
00690 float diff = 0;
00691 if (lossDiff > modelScoreDiff)
00692 diff = lossDiff - modelScoreDiff;
00693 if (diff > epsilon)
00694 ++violatedConstraintsBefore;
00695 cerr << "Rank " << rank << ", epoch " << epoch << ", constraint: " << modelScoreDiff << " >= " << lossDiff << " (current violation: " << diff << ")" << endl;
00696
00697
00698 if (rescaleSlack) {
00699 averagedFeatureDiffs.MultiplyEquals(lossDiff);
00700 cerr << "Rank " << rank << ", epoch " << epoch << ", featureValueDiff scaled by lossDiff." << endl;
00701 }
00702 averagedFeatureDiffs.PlusEquals(featureValueDiff);
00703 averagedViolations += diff;
00704 }
00705 }
00706 }
00707
00708
00709 if (!makePairs) {
00710 averagedFeatureDiffs.DivideEquals(featureValuesHope[0].size());
00711 } else {
00712 averagedFeatureDiffs.DivideEquals(featureValuesHope[0].size());
00713 averagedViolations /= featureValuesHope[0].size();
00714 }
00715
00716 cerr << "Rank " << rank << ", epoch " << epoch << ", averaged violations: " << averagedViolations << endl;
00717
00718 if (violatedConstraintsBefore > 0) {
00719
00720
00721
00722
00723 float squaredNorm = averagedFeatureDiffs.GetL2Norm() * averagedFeatureDiffs.GetL2Norm();
00724 float alpha = averagedViolations / squaredNorm;
00725 cerr << "Rank " << rank << ", epoch " << epoch << ", unclipped alpha: " << alpha << endl;
00726 if (m_slack > 0 ) {
00727 if (alpha > m_slack) {
00728 alpha = m_slack;
00729 } else if (alpha < m_slack*(-1)) {
00730 alpha = m_slack*(-1);
00731 }
00732 }
00733 cerr << "Rank " << rank << ", epoch " << epoch << ", clipped alpha: " << alpha << endl;
00734
00735
00736 averagedFeatureDiffs.MultiplyEquals(alpha);
00737 weightUpdate.PlusEquals(averagedFeatureDiffs);
00738 return 0;
00739 } else {
00740 cerr << "Rank " << rank << ", epoch " << epoch << ", no constraint violated for this batch" << endl;
00741 return 1;
00742 }
00743 }
00744
00745 }
00746