00001 #include "FileHandler.h"
00002 #include <cstdio>
00003
00004
00005
00006 #if defined(WIN32) && !defined(__MINGW32__)
00007 #define popen(A, B) _popen(A, B)
00008 #define pclose(A) _pclose(A)
00009 #endif
00010
00011 namespace Moses
00012 {
00013
00014
00015 const std::string FileHandler::kStdInDescriptor = "___stdin___";
00016 const std::string FileHandler::kStdOutDescriptor = "___stdout___";
00017
00018 const FileExtension FileHandler::kGzipped = ".gz";
00019 const FileExtension FileHandler::kBzipped2 = ".bz2";
00020
00021 const std::string FileHandler::kCatCommand = "cat";
00022 const std::string FileHandler::kGzipCommand = "gzip -f";
00023 const std::string FileHandler::kGunzipCommand = "gunzip -f";
00024 const std::string FileHandler::kBzip2Command = "bzip2 -f";
00025 const std::string FileHandler::kBunzip2Command = "bunzip2 -f";
00026
00027 FileHandler::FileHandler(const std::string & path, std::ios_base::openmode flags, bool )
00028 : std::fstream((const char*) NULL), path_(path), flags_(flags), buffer_(NULL), fp_(NULL)
00029 {
00030 if( !(flags^(std::ios::in|std::ios::out)) ) {
00031 fprintf(stderr, "ERROR: FileHandler does not support bidirectional files (%s).\n", path_.c_str());
00032 exit(EXIT_FAILURE);
00033 } else {
00034 bool ret = setStreamBuffer(flags & std::ios::in);
00035 UTIL_THROW_IF2(!ret, "Unable to set stream buffer");
00036 }
00037 this->precision(32);
00038 }
00039
00040 FileHandler::~FileHandler()
00041 {
00042 #ifndef NO_PIPES
00043 if( fp_ != 0 )
00044 pclose(fp_);
00045 #endif
00046 if( path_ != FileHandler::kStdInDescriptor &&
00047 path_ != FileHandler::kStdOutDescriptor )
00048 delete buffer_;
00049 if( this->is_open() )
00050 this->close();
00051 }
00052
00053 fdstreambuf * FileHandler::openCompressedFile(const char * cmd)
00054 {
00055
00056
00057 const char * p_type = (flags_ & std::ios::in ? "r" : "w");
00058 #ifndef NO_PIPES
00059 fp_ = popen(cmd, p_type);
00060 #else
00061 fp_ = NULL;
00062 #endif
00063 if( fp_ == NULL ) {
00064
00065 perror("openCompressedFile: ");
00066 exit(EXIT_FAILURE);
00067 }
00068
00069 return new fdstreambuf(fileno(fp_));
00070 }
00071
00072 bool FileHandler::setStreamBuffer(bool checkExists)
00073 {
00074
00075 if (path_ == FileHandler::kStdInDescriptor) {
00076 UTIL_THROW_IF2((flags_ & std::ios::in) == 0,
00077 "Incorrect flags: " << flags_);
00078 std::streambuf* sb = std::cin.rdbuf();
00079 buffer_ = sb;
00080 } else if (path_ == FileHandler::kStdOutDescriptor) {
00081 UTIL_THROW_IF2((flags_ & std::ios::out) == 0,
00082 "Incorrect flags: " << flags_);
00083 std::streambuf* sb = std::cout.rdbuf();
00084 buffer_ = sb;
00085 } else {
00086
00087 if( checkExists && ! fileExists() ) {
00088 fprintf(stderr, "ERROR: Failed to find file at %s\n", path_.c_str());
00089 exit(EXIT_FAILURE);
00090 }
00091 std::string cmd = "";
00092 if( isCompressedFile(cmd) && (! cmd.empty()) ) {
00093 buffer_ = openCompressedFile(cmd.c_str());
00094 } else {
00095
00096 std::filebuf* fb = new std::filebuf();
00097 fb->open(path_.c_str(), flags_);
00098 buffer_ = fb;
00099 }
00100 }
00101 if (!buffer_) {
00102 fprintf(stderr, "ERROR:Failed to open file at %s\n", path_.c_str());
00103 exit(EXIT_FAILURE);
00104 }
00105 this->init(buffer_);
00106 return true;
00107 }
00108
00109
00110
00111
00112
00113 bool FileHandler::isCompressedFile(std::string & cmd)
00114 {
00115 bool compressed = false, isInput = (flags_ & std::ios::in);
00116 cmd = "";
00117 unsigned int len = path_.size();
00118 if( len > kGzipped.size()
00119 && path_.find(kGzipped) == len - kGzipped.size()) {
00120
00121 compressed = true;
00122
00123 cmd = (isInput ? "exec " + kGunzipCommand + "c "
00124 : "exec " + kGzipCommand + "c > ") + path_;
00125 } else if( len > kBzipped2.size() &&
00126 path_.find(kBzipped2) == len - kBzipped2.size()) {
00127
00128 compressed = true;
00129 cmd = (isInput ? "exec " + kBunzip2Command + "c "
00130 : "exec " + kBzip2Command + "c > ") + path_;
00131 }
00132 return compressed;
00133 }
00134
00135 bool FileHandler::fileExists()
00136 {
00137 bool exists = false;
00138 struct stat f_info;
00139 if( stat(path_.c_str(), &f_info) == 0 )
00140 exists = true;
00141 return( exists );
00142 }
00143
00144
00145
00146 bool FileHandler::getCompressionCmds(const std::string & filepath, std::string & compressionCmd,
00147 std::string & decompressionCmd,
00148 std::string & compressionSuffix)
00149 {
00150
00151 compressionCmd = kCatCommand;
00152 decompressionCmd = kCatCommand;
00153 if (filepath.length() > kGzipped.size() &&
00154 filepath.find(kGzipped) == filepath.length()
00155 - kGzipped.length()) {
00156 compressionCmd = kGzipCommand;
00157 decompressionCmd = kGunzipCommand;
00158 compressionSuffix = kGzipped;
00159 } else if (filepath.length() > kBzipped2.size() &&
00160 filepath.find(kBzipped2) == filepath.length()
00161 - kBzipped2.length() ) {
00162 compressionCmd = kBzip2Command;
00163 decompressionCmd = kBunzip2Command;
00164 compressionSuffix = kBzipped2;;
00165 }
00166 return (compressionCmd != kCatCommand && decompressionCmd != kCatCommand);
00167 }
00168
00169 bool FileHandler::reset()
00170 {
00171 #ifndef NO_PIPES
00172
00173 if (fp_ != 0) {
00174
00175 pclose(fp_);
00176 std::string cmd = "";
00177 if (isCompressedFile(cmd) && ! cmd.empty())
00178 buffer_ = openCompressedFile(cmd.c_str());
00179
00180 this->init(buffer_);
00181 } else
00182 #endif
00183 buffer_->pubseekoff(0, std::ios_base::beg);
00184 return true;
00185 }
00186 }