00001
00002
00003 #include <boost/program_options.hpp>
00004 #include <iomanip>
00005
00006 #include "tpt_typedefs.h"
00007 #include "ug_mm_ttrack.h"
00008 #include "tpt_tokenindex.h"
00009 #include "ug_deptree.h"
00010 #include "ug_corpus_token.h"
00011
00012 using namespace std;
00013 using namespace sapt;
00014 namespace po = boost::program_options;
00015
00016 string bname,mtt,mct;
00017 vector<string> range;
00018
00019 typedef L2R_Token<Conll_Sform> Token;
00020
00021 TokenIndex SF,LM,PS,DT;
00022 mmTtrack<Token> MTT;
00023 mmTtrack<SimpleWordId> MCT;
00024 bool sform;
00025 bool have_mtt, have_mct;
00026 bool with_sids;
00027 bool with_positions;
00028 void
00029 interpret_args(int ac, char* av[])
00030 {
00031 po::variables_map vm;
00032 po::options_description o("Options");
00033 o.add_options()
00034 ("help,h", "print this message")
00035 ("numbers,n", po::bool_switch(&with_sids), "print sentence ids as first token")
00036 ("sform,s", po::bool_switch(&sform), "sform only")
00037 ("with-positions,p", po::bool_switch(&with_positions), "show word positions")
00038 ;
00039
00040 po::options_description h("Hidden Options");
00041 h.add_options()
00042 ("bname", po::value<string>(&bname), "base name")
00043 ("range", po::value<vector<string> >(&range), "range")
00044 ;
00045 po::positional_options_description a;
00046 a.add("bname",1);
00047 a.add("range",-1);
00048
00049 po::store(po::command_line_parser(ac,av)
00050 .options(h.add(o))
00051 .positional(a)
00052 .run(),vm);
00053 po::notify(vm);
00054 if (vm.count("help") || bname.empty())
00055 {
00056 cout << "usage:\n\t"
00057 << av[0] << " track name [<range>]\n"
00058 << endl;
00059 cout << o << endl;
00060 exit(0);
00061 }
00062 mtt = bname+".mtt";
00063 mct = bname+".mct";
00064 }
00065
00066 void
00067 printRangeMTT(size_t start, size_t stop)
00068 {
00069 for (;start < stop; start++)
00070 {
00071 size_t i = 0;
00072 Token const* s = MTT.sntStart(start);
00073 Token const* e = MTT.sntEnd(start);
00074 if (with_sids) cout << start << " ";
00075 for (Token const* t = s; t < e; ++t)
00076 {
00077 #if 0
00078 uchar const* x = reinterpret_cast<uchar const*>(t);
00079 cout << *reinterpret_cast<id_type const*>(x) << " ";
00080 cout << *reinterpret_cast<id_type const*>(x+4) << " ";
00081 cout << int(*(x+8)) << " ";
00082 cout << int(*(x+9)) << " ";
00083 cout << *reinterpret_cast<short const*>(x+10) << endl;
00084 #endif
00085 if (!sform)
00086 {
00087 cout << setw(2) << right << ++i << " ";
00088 cout << setw(30) << right << SF[t->sform] << " ";
00089 cout << setw(4) << right << PS[t->majpos] << " ";
00090 cout << setw(4) << right << PS[t->minpos] << " ";
00091 cout << setw(30) << left << LM[t->lemma] << " ";
00092 cout << i+t->parent << " ";
00093 cout << DT[t->dtype] << endl;
00094 }
00095 else
00096 {
00097 if (with_positions) cout << t-s << ":";
00098 cout << SF[t->id()] << " ";
00099 }
00100 }
00101 cout << endl;
00102 }
00103 }
00104
00105 void
00106 printRangeMCT(size_t start, size_t stop)
00107 {
00108 for (;start < stop; start++)
00109 {
00110 SimpleWordId const* s = MCT.sntStart(start);
00111 SimpleWordId const* t = s;
00112 SimpleWordId const* e = MCT.sntEnd(start);
00113 if (with_sids) cout << start << " ";
00114 while (t < e)
00115 {
00116 if (with_positions) cout << t-s << ":";
00117 cout << SF[(t++)->id()] << " ";
00118 }
00119 cout << endl;
00120 }
00121 }
00122
00123 int
00124 main(int argc, char*argv[])
00125 {
00126 interpret_args(argc,argv);
00127 have_mtt = !access(mtt.c_str(),F_OK);
00128 have_mct = !have_mtt && !access(mct.c_str(),F_OK);
00129 if (!have_mtt && !have_mct)
00130 {
00131 cerr << "FATAL ERROR: neither " << mtt << " nor " << mct << " exit." << endl;
00132 exit(1);
00133 }
00134 if (have_mtt)
00135 {
00136 SF.open(bname+".tdx.sfo"); SF.iniReverseIndex();
00137 LM.open(bname+".tdx.lem"); LM.iniReverseIndex();
00138 PS.open(bname+".tdx.pos"); PS.iniReverseIndex();
00139 DT.open(bname+".tdx.drl"); DT.iniReverseIndex();
00140 MTT.open(mtt);
00141 }
00142 else
00143 {
00144 sform = true;
00145 SF.open(bname+".tdx"); SF.iniReverseIndex();
00146 MCT.open(mct);
00147 }
00148
00149 if (!range.size())
00150 have_mtt ? printRangeMTT(0, MTT.size()) : printRangeMCT(0, MCT.size());
00151 else
00152 {
00153 for (size_t i = 0; i < range.size(); i++)
00154 {
00155 istringstream buf(range[i]);
00156 size_t first,last; uchar c;
00157 buf>>first;
00158 if (buf.peek() == '-') buf>>c>>last;
00159 else last = first;
00160 if (have_mtt && last < MTT.size())
00161 printRangeMTT(first,last+1);
00162 else if (last < MCT.size())
00163 printRangeMCT(first,last+1);
00164 }
00165 }
00166 }