00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #define BOOST_TEST_MODULE BackwardTest
00020 #include <boost/test/unit_test.hpp>
00021
00022 #include "lm/config.hh"
00023 #include "lm/left.hh"
00024 #include "lm/model.hh"
00025 #include "lm/state.hh"
00026
00027 #include "moses/Sentence.h"
00028 #include "moses/TypeDef.h"
00029
00030 #include "moses/StaticData.h"
00031 #include "moses/parameters/AllOptions.h"
00032
00033
00034 #include "moses/LM/Backward.h"
00035 #include "moses/LM/BackwardLMState.h"
00036 #include "moses/Util.h"
00037
00038 #include "lm/state.hh"
00039 #include "lm/left.hh"
00040
00041 #include <vector>
00042
00043 using namespace Moses;
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 namespace Moses
00060 {
00061
00062
00063 #define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
00064
00065 AllOptions::ptr DefaultOptions(new AllOptions);
00066
00067 class BackwardLanguageModelTest
00068 {
00069
00070 public:
00071 BackwardLanguageModelTest() :
00072 dummyInput(new Sentence(DefaultOptions)),
00073 backwardLM(
00074 static_cast< BackwardLanguageModel<lm::ngram::ProbingModel> * >(
00075 ConstructBackwardLM(
00076 "LM1=1.0",
00077 boost::unit_test::framework::master_test_suite().argv[1],
00078 0,
00079 false)
00080 )
00081 ) {
00082
00083 }
00084
00085 ~BackwardLanguageModelTest() {
00086 delete dummyInput;
00087 delete backwardLM;
00088 }
00089
00090 void testEmptyHypothesis() {
00091 FFState *ffState = const_cast< FFState * >(backwardLM->EmptyHypothesisState( *dummyInput ));
00092
00093 BOOST_CHECK( ffState != NULL );
00094
00095 delete ffState;
00096 }
00097
00098 void testCalcScore() {
00099
00100 double p_the = -1.383059;
00101 double p_licenses = -2.360783;
00102 double p_for = -1.661813;
00103 double p_most = -2.360783;
00104
00105
00106 double p_the_licenses = -0.9625873;
00107 double p_licenses_for = -1.661557;
00108 double p_for_most = -0.4526253;
00109
00110
00111 double p_the_licenses_for = p_the_licenses + p_licenses_for;
00112
00113
00114
00115 {
00116 Phrase phrase;
00117 BOOST_CHECK( phrase.GetSize() == 0 );
00118
00119 std::vector<FactorType> outputFactorOrder;
00120 outputFactorOrder.push_back(0);
00121
00122 phrase.CreateFromString(
00123 Input,
00124 outputFactorOrder,
00125 "the",
00126 NULL);
00127
00128 BOOST_CHECK( phrase.GetSize() == 1 );
00129
00130 float fullScore;
00131 float ngramScore;
00132 size_t oovCount;
00133 backwardLM->CalcScore(phrase, fullScore, ngramScore, oovCount);
00134
00135 BOOST_CHECK( oovCount == 0 );
00136 SLOPPY_CHECK_CLOSE( TransformLMScore(p_the), fullScore, 0.01);
00137 SLOPPY_CHECK_CLOSE( TransformLMScore( 0.0 ), ngramScore, 0.01);
00138 }
00139
00140
00141 {
00142 Phrase phrase;
00143 BOOST_CHECK( phrase.GetSize() == 0 );
00144
00145 std::vector<FactorType> outputFactorOrder;
00146 outputFactorOrder.push_back(0);
00147
00148 phrase.CreateFromString(
00149 Input,
00150 outputFactorOrder,
00151 "the licenses",
00152 NULL);
00153
00154 BOOST_CHECK( phrase.GetSize() == 2 );
00155
00156 float fullScore;
00157 float ngramScore;
00158 size_t oovCount;
00159 backwardLM->CalcScore(phrase, fullScore, ngramScore, oovCount);
00160
00161 BOOST_CHECK( oovCount == 0 );
00162 SLOPPY_CHECK_CLOSE( TransformLMScore(p_licenses + p_the_licenses), fullScore, 0.01);
00163
00164 BOOST_CHECK_GT(0.0001, ngramScore);
00165 BOOST_CHECK_LT(-0.0001, ngramScore);
00166 }
00167
00168
00169 {
00170 Phrase phrase;
00171 BOOST_CHECK( phrase.GetSize() == 0 );
00172
00173 std::vector<FactorType> outputFactorOrder;
00174 outputFactorOrder.push_back(0);
00175
00176 phrase.CreateFromString(
00177 Input,
00178 outputFactorOrder,
00179 "the licenses for",
00180 NULL);
00181
00182 BOOST_CHECK( phrase.GetSize() == 3 );
00183
00184 float fullScore;
00185 float ngramScore;
00186 size_t oovCount;
00187 backwardLM->CalcScore(phrase, fullScore, ngramScore, oovCount);
00188
00189 BOOST_CHECK( oovCount == 0 );
00190 SLOPPY_CHECK_CLOSE( TransformLMScore( p_the_licenses_for ), ngramScore, 0.01);
00191 SLOPPY_CHECK_CLOSE( TransformLMScore(p_for + p_licenses_for + p_the_licenses), fullScore, 0.01);
00192 }
00193
00194
00195 {
00196 Phrase phrase;
00197 BOOST_CHECK( phrase.GetSize() == 0 );
00198
00199 std::vector<FactorType> outputFactorOrder;
00200 outputFactorOrder.push_back(0);
00201
00202 phrase.CreateFromString(
00203 Input,
00204 outputFactorOrder,
00205 "the licenses for most",
00206 NULL);
00207
00208 BOOST_CHECK( phrase.GetSize() == 4 );
00209
00210 float fullScore;
00211 float ngramScore;
00212 size_t oovCount;
00213 backwardLM->CalcScore(phrase, fullScore, ngramScore, oovCount);
00214
00215 BOOST_CHECK( oovCount == 0 );
00216 SLOPPY_CHECK_CLOSE( TransformLMScore( p_the_licenses + p_licenses_for ), ngramScore, 0.01);
00217 SLOPPY_CHECK_CLOSE( TransformLMScore(p_most + p_for_most + p_licenses_for + p_the_licenses), fullScore, 0.01);
00218 }
00219
00220 }
00221
00222 void testEvaluate() {
00223
00224 FFState *nextState;
00225 FFState *prevState = const_cast< FFState * >(backwardLM->EmptyHypothesisState( *dummyInput ));
00226
00227 double p_most = -2.360783;
00228 double p_for = -1.661813;
00229 double p_licenses = -2.360783;
00230 double p_the = -1.383059;
00231 double p_eos = -1.457693;
00232
00233 double p_most_for = -0.4526253;
00234 double p_for_licenses = -1.661557;
00235 double p_licenses_the = -0.9625873;
00236 double p_the_eos = -1.940311;
00237
00238
00239
00240 {
00241 Phrase phrase;
00242 BOOST_CHECK( phrase.GetSize() == 0 );
00243
00244 std::vector<FactorType> outputFactorOrder;
00245 outputFactorOrder.push_back(0);
00246
00247 phrase.CreateFromString(
00248 Input,
00249 outputFactorOrder,
00250 "the",
00251 NULL);
00252
00253 BOOST_CHECK( phrase.GetSize() == 1 );
00254
00255 float score;
00256 nextState = backwardLM->Evaluate(phrase, prevState, score);
00257
00258
00259 SLOPPY_CHECK_CLOSE( (p_the + p_the_eos - p_eos), score, 0.01);
00260
00261 delete prevState;
00262 prevState = nextState;
00263
00264 }
00265
00266
00267 {
00268 Phrase phrase;
00269 BOOST_CHECK( phrase.GetSize() == 0 );
00270
00271 std::vector<FactorType> outputFactorOrder;
00272 outputFactorOrder.push_back(0);
00273
00274 phrase.CreateFromString(
00275 Input,
00276 outputFactorOrder,
00277 "licenses",
00278 NULL);
00279
00280 BOOST_CHECK( phrase.GetSize() == 1 );
00281
00282 float score;
00283 nextState = backwardLM->Evaluate(phrase, prevState, score);
00284
00285
00286 SLOPPY_CHECK_CLOSE( (p_licenses + p_licenses_the - p_the), score, 0.01);
00287
00288 delete prevState;
00289 prevState = nextState;
00290
00291 }
00292
00293
00294 {
00295 Phrase phrase;
00296 BOOST_CHECK( phrase.GetSize() == 0 );
00297
00298 std::vector<FactorType> outputFactorOrder;
00299 outputFactorOrder.push_back(0);
00300
00301 phrase.CreateFromString(
00302 Input,
00303 outputFactorOrder,
00304 "for",
00305 NULL);
00306
00307 BOOST_CHECK( phrase.GetSize() == 1 );
00308
00309 float score;
00310 nextState = backwardLM->Evaluate(phrase, prevState, score);
00311
00312
00313 SLOPPY_CHECK_CLOSE( (p_for + p_for_licenses - p_licenses), score, 0.01);
00314
00315 delete prevState;
00316 prevState = nextState;
00317
00318 }
00319
00320
00321 {
00322 Phrase phrase;
00323 BOOST_CHECK( phrase.GetSize() == 0 );
00324
00325 std::vector<FactorType> outputFactorOrder;
00326 outputFactorOrder.push_back(0);
00327
00328 phrase.CreateFromString(
00329 Input,
00330 outputFactorOrder,
00331 "most",
00332 NULL);
00333
00334 BOOST_CHECK( phrase.GetSize() == 1 );
00335
00336 float score;
00337 nextState = backwardLM->Evaluate(phrase, prevState, score);
00338
00339
00340 SLOPPY_CHECK_CLOSE( (p_most + p_most_for - p_for), score, 0.01);
00341
00342 delete prevState;
00343 prevState = nextState;
00344
00345 }
00346
00347 delete prevState;
00348 }
00349
00350 private:
00351 const Sentence *dummyInput;
00352 BackwardLanguageModel<lm::ngram::ProbingModel> *backwardLM;
00353
00354 };
00355
00356
00357 }
00358
00359 const char *FileLocation()
00360 {
00361 if (boost::unit_test::framework::master_test_suite().argc < 2) {
00362 BOOST_FAIL("Jamfile must specify arpa file for this test, but did not");
00363 }
00364 return boost::unit_test::framework::master_test_suite().argv[1];
00365 }
00366
00367 BOOST_AUTO_TEST_CASE(ProbingAll)
00368 {
00369
00370 BackwardLanguageModelTest test;
00371 test.testEmptyHypothesis();
00372 test.testCalcScore();
00373 test.testEvaluate();
00374
00375 }