00001 #include "moses/PP/TargetPreferencesPhraseProperty.h"
00002 #include <iostream>
00003 #include <cstdio>
00004 #include <cstdlib>
00005 #include <sstream>
00006 #include <string>
00007 #include <queue>
00008 #include <assert.h>
00009 #include <limits>
00010
00011 namespace Moses
00012 {
00013
00014 void TargetPreferencesPhraseProperty::ProcessValue(const std::string &value)
00015 {
00016 std::istringstream tokenizer(value);
00017
00018 if (! (tokenizer >> m_nNTs)) {
00019 UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of non-terminals. Flawed property?");
00020 }
00021 assert( m_nNTs > 0 );
00022
00023 if (! (tokenizer >> m_totalCount)) {
00024 UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read overall rule count. Flawed property?");
00025 }
00026 assert( m_totalCount > 0.0 );
00027
00028
00029
00030
00031 std::priority_queue<float> ruleLabelledCountsPQ;
00032
00033 while (tokenizer.peek() != EOF) {
00034 try {
00035
00036 TargetPreferencesPhrasePropertyItem item;
00037 size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
00038
00039 if (m_nNTs == 1) {
00040
00041 item.m_labelsRHSCount = m_totalCount;
00042
00043 } else {
00044
00045 for (size_t i=0; i<m_nNTs-1; ++i) {
00046 size_t labelRHS;
00047 if (! (tokenizer >> labelRHS) ) {
00048 UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side label index. Flawed property?");
00049 }
00050 item.m_labelsRHS.push_back(labelRHS);
00051 }
00052
00053 if (! (tokenizer >> item.m_labelsRHSCount)) {
00054 UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read right-hand side count. Flawed property?");
00055 }
00056
00057 if (! (tokenizer >> numberOfLHSsGivenRHS)) {
00058 UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read number of left-hand sides. Flawed property?");
00059 }
00060 }
00061
00062 for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) {
00063 size_t labelLHS;
00064 if (! (tokenizer >> labelLHS)) {
00065 UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read left-hand side label index. Flawed property?");
00066 }
00067 float ruleLabelledCount;
00068 if (! (tokenizer >> ruleLabelledCount)) {
00069 UTIL_THROW2("TargetPreferencesPhraseProperty: Not able to read count. Flawed property?");
00070 }
00071 item.m_labelsLHSList.push_back( std::make_pair(labelLHS,ruleLabelledCount) );
00072 ruleLabelledCountsPQ.push(ruleLabelledCount);
00073 }
00074
00075 m_labelItems.push_back(item);
00076
00077 } catch (const std::exception &e) {
00078 UTIL_THROW2("TargetPreferencesPhraseProperty: Read error. Flawed property?");
00079 }
00080 }
00081
00082
00083 const size_t N=50;
00084
00085 if (ruleLabelledCountsPQ.size() > N) {
00086
00087 float topNRuleLabelledCount = std::numeric_limits<int>::max();
00088 for (size_t i=0; !ruleLabelledCountsPQ.empty() && i<N; ++i) {
00089 topNRuleLabelledCount = ruleLabelledCountsPQ.top();
00090 ruleLabelledCountsPQ.pop();
00091 }
00092
00093 size_t nKept=0;
00094 std::list<TargetPreferencesPhrasePropertyItem>::iterator itemIter=m_labelItems.begin();
00095 while (itemIter!=m_labelItems.end()) {
00096 if (itemIter->m_labelsRHSCount < topNRuleLabelledCount) {
00097 itemIter = m_labelItems.erase(itemIter);
00098 } else {
00099 std::list< std::pair<size_t,float> >::iterator itemLHSIter=(itemIter->m_labelsLHSList).begin();
00100 while (itemLHSIter!=(itemIter->m_labelsLHSList).end()) {
00101 if (itemLHSIter->second < topNRuleLabelledCount) {
00102 itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter);
00103 } else {
00104 if (nKept >= N) {
00105 itemLHSIter = (itemIter->m_labelsLHSList).erase(itemLHSIter,(itemIter->m_labelsLHSList).end());
00106 } else {
00107 ++nKept;
00108 ++itemLHSIter;
00109 }
00110 }
00111 }
00112 if ((itemIter->m_labelsLHSList).empty()) {
00113 itemIter = m_labelItems.erase(itemIter);
00114 } else {
00115 ++itemIter;
00116 }
00117 }
00118 }
00119 }
00120 };
00121
00122 }
00123