00001 #pragma once 00002 00003 #include "util/string_piece.hh" 00004 #include "util/tokenize_piece.hh" 00005 #include "util/file_piece.hh" 00006 #include <vector> 00007 #include <cstdlib> //atof 00008 #include "util/string_piece.hh" //Tokenization and work with StringPiece 00009 #include "util/tokenize_piece.hh" 00010 #include <vector> 00011 00012 namespace Moses 00013 { 00014 00015 //Struct for holding processed line 00016 struct line_text { 00017 StringPiece source_phrase; 00018 StringPiece target_phrase; 00019 StringPiece prob; 00020 StringPiece word_align; 00021 StringPiece counts; 00022 StringPiece sparse_score; 00023 StringPiece property; 00024 std::string property_to_be_binarized; 00025 }; 00026 00027 //Struct for holding processed line 00028 struct target_text { 00029 std::vector<unsigned int> target_phrase; 00030 std::vector<float> prob; 00031 std::vector<size_t> word_align_term; 00032 std::vector<size_t> word_align_non_term; 00033 std::vector<char> counts; 00034 std::vector<char> sparse_score; 00035 std::vector<char> property; 00036 00037 /* 00038 void Reset() 00039 { 00040 target_phrase.clear(); 00041 prob.clear(); 00042 word_all1.clear(); 00043 counts.clear(); 00044 sparse_score.clear(); 00045 property.clear(); 00046 } 00047 */ 00048 }; 00049 00050 //Ask if it's better to have it receive a pointer to a line_text struct 00051 line_text splitLine(const StringPiece &textin, bool scfg); 00052 void reformatSCFG(line_text &output); 00053 00054 std::vector<unsigned char> splitWordAll1(const StringPiece &textin); 00055 00056 } 00057