bes  Updated for version 3.20.5
DirectoryUtil.cc
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "config.h"
31 #include "DirectoryUtil.h"
32 
33 #include <cstring>
34 #include <cerrno>
35 #include <sstream>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <dirent.h>
39 
40 // libdap
41 #include "GNURegex.h"
42 
43 // bes
44 #include "BESDebug.h"
45 #include "BESForbiddenError.h"
46 #include "BESInternalError.h"
47 #include "TheBESKeys.h"
48 #include "BESNotFoundError.h"
49 #include "BESUtil.h"
50 
51 using std::string;
52 using std::vector;
53 
54 namespace agg_util {
59 struct DirWrapper {
60 public:
61 
62  DirWrapper(const string& fullDirPath) :
63  _pDir(0), _fullPath(fullDirPath)
64  {
65  // if the user sees null after this, they can check the errno.
66  _pDir = opendir(fullDirPath.c_str());
67  }
68 
69  ~DirWrapper()
70  {
71  if (_pDir) {
72  closedir(_pDir);
73  _pDir = 0;
74  }
75  }
76 
77  bool fail() const
78  {
79  return !_pDir;
80  }
81 
82  DIR*
83  get() const
84  {
85  return _pDir;
86  }
87 
88  // automatically closedir() if non-null on dtor.
89  DIR* _pDir;
90  std::string _fullPath;
91 };
92 
94 FileInfo::FileInfo(const std::string& path, const std::string& basename, bool isDir, time_t modTime) :
95  _path(path), _basename(basename), _fullPath("") // start empty, cached later
96  , _isDir(isDir), _modTime(modTime)
97 {
100 }
101 
102 FileInfo::~FileInfo()
103 {
104 }
105 
106 const std::string&
108 {
109  return _path;
110 }
111 
112 const std::string&
113 FileInfo::basename() const
114 {
115  return _basename;
116 }
117 
118 bool FileInfo::isDir() const
119 {
120  return _isDir;
121 }
122 
123 time_t FileInfo::modTime() const
124 {
125  return _modTime;
126 }
127 
128 std::string FileInfo::getModTimeAsString() const
129 {
130  // we'll just use UTC for the output...
131  struct tm* pTM = gmtime(&_modTime);
132  char buf[128];
133  // this should be "Year-Month-Day Hour:Minute:Second"
134  strftime(buf, 128, "%F %T", pTM);
135  return string(buf);
136 }
137 
138 const std::string&
140 {
141  if (_fullPath.empty()) {
142  _fullPath = _path + "/" + _basename;
143  }
144  return _fullPath;
145 }
146 
147 std::string FileInfo::toString() const
148 {
149  return "{FileInfo fullPath=" + getFullPath() + " isDir=" + ((isDir()) ? ("true") : ("false")) + " modTime=\""
150  + getModTimeAsString() + "\""
151  " }";
152 }
153 
155 
156 const string DirectoryUtil::_sDebugChannel = "agg_util";
157 
158 DirectoryUtil::DirectoryUtil() :
159  _rootDir("/"), _suffix("") // we start with no filter
160  , _pRegExp(0), _filteringModTimes(false), _newestModTime(0L)
161 {
162  // this can throw, but the class is completely constructed by this point.
163  setRootDir("/");
164 }
165 
166 DirectoryUtil::~DirectoryUtil()
167 {
168  clearRegExp();
169 }
170 
172 const std::string&
174 {
175  return _rootDir;
176 }
177 
183 void DirectoryUtil::setRootDir(const std::string& origRootDir, bool allowRelativePaths/*=false*/,
184  bool /*allowSymLinks=false*/)
185 {
186  if (!allowRelativePaths && hasRelativePath(origRootDir)) {
187  throw BESForbiddenError("can't use rootDir=" + origRootDir + " since it has a relative path (../)", __FILE__,
188  __LINE__);
189  }
190 
191  // Get the root without trailing slash, we'll add it.
192  _rootDir = origRootDir;
193  removeTrailingSlashes(_rootDir);
194  // If empty here, that means the actual filesystem root.
195 
196  // Use the BESUtil to test the path
197  // Since it assumes root is valid and strips preceding "/",
198  // we use "/" as the root path and the root path as the path
199  // to validate the root. This will throw if invalid.
200  BESUtil::check_path(_rootDir, "/", false); // not going to allow symlinks by default.
201 
202  // We should be good if we get here.
203 }
204 
205 void DirectoryUtil::setFilterSuffix(const std::string& suffix)
206 {
207  _suffix = suffix;
208 }
209 
210 void DirectoryUtil::setFilterRegExp(const std::string& regexp)
211 {
212  clearRegExp(); // avoid leaks
213  if (!regexp.empty()) {
214  _pRegExp = new libdap::Regex(regexp.c_str());
215  }
216 }
217 
219 {
220  delete _pRegExp;
221  _pRegExp = 0;
222 }
223 
225 {
226  _newestModTime = newestModTime;
227  _filteringModTimes = true;
228 }
229 
230 void DirectoryUtil::getListingForPath(const std::string& path, std::vector<FileInfo>* pRegularFiles,
231  std::vector<FileInfo>* pDirectories)
232 {
233  string pathToUse(path);
234  removePrecedingSlashes(pathToUse);
235  pathToUse = getRootDir() + "/" + pathToUse;
236  BESDEBUG(_sDebugChannel, "Attempting to get dir listing for path=\"" << pathToUse << "\"" << endl);
237 
238  // RAII, will closedir no matter how we leave function, including a throw
239  DirWrapper pDir(pathToUse);
240  if (pDir.fail()) {
241  throwErrorForOpendirFail(pathToUse);
242  }
243 
244  // Go through each entry and see if it's a directory or regular file and
245  // add it to the list.
246  struct dirent* pDirEnt = 0;
247  while ((pDirEnt = readdir(pDir.get())) != 0) {
248  string entryName = pDirEnt->d_name;
249  // Exclude ".", ".." and any dotfile dirs like ".svn".
250  if (!entryName.empty() && entryName[0] == '.') {
251  continue;
252  }
253 
254  // Figure out if it's a regular file or directory
255  string pathToEntry = pathToUse + "/" + entryName;
256  struct stat statBuf;
257  int statResult = stat(pathToEntry.c_str(), &statBuf);
258  if (statResult != 0) {
259  // If we can't stat the file for some reason, then ignore it
260  continue;
261  }
262 
263  // Use the passed in path for the entry since we
264  // want to make the locations be relative to the root
265  // for loading later.
266  if (pDirectories && S_ISDIR(statBuf.st_mode)) {
267  pDirectories->push_back(FileInfo(path, entryName, true, statBuf.st_mtime));
268  }
269  else if (pRegularFiles && S_ISREG(statBuf.st_mode)) {
270  FileInfo theFile(path, entryName, false, statBuf.st_mtime);
271  // match against the relative passed in path, not root full path
272  if (matchesAllFilters(theFile.getFullPath(), statBuf.st_mtime)) {
273  pRegularFiles->push_back(theFile);
274  }
275  }
276  }
277 }
278 
279 void DirectoryUtil::getListingForPathRecursive(const std::string& path, std::vector<FileInfo>* pRegularFiles,
280  std::vector<FileInfo>* pDirectories)
281 {
282  // Remove trailing slash to make it canonical
283  string canonicalPath = path;
284  removeTrailingSlashes(canonicalPath);
285 
286  // We use our own local vector of directories in order to recurse,
287  // then add them to the end of pDirectories if it exists.
288 
289  // First, get the current path's listing
290  vector<FileInfo> dirs;
291  dirs.reserve(16); // might as well start with a "few" to avoid grows.
292 
293  // Keep adding them to the user specified regular file list if desired,
294  // but keep track of dirs ourself.
295  getListingForPath(canonicalPath, pRegularFiles, &dirs);
296 
297  // If the caller wanted directories, append them all to the return
298  if (pDirectories) {
299  pDirectories->insert(pDirectories->end(), dirs.begin(), dirs.end());
300  }
301 
302  // Finally, recurse on each directory in dirs
303  for (vector<FileInfo>::const_iterator it = dirs.begin(); it != dirs.end(); ++it) {
304  string subPath = canonicalPath + "/" + it->basename();
305  BESDEBUG(_sDebugChannel, "DirectoryUtil: recursing down to directory subtree=\"" << subPath << "\"..." << endl);
306  // Pass down the caller's accumulated vector's to be filled in.
307  getListingForPathRecursive(subPath, pRegularFiles, pDirectories);
308  }
309 
310 }
311 
312 void DirectoryUtil::getListingOfRegularFilesRecursive(const std::string& path, std::vector<FileInfo>& rRegularFiles)
313 {
314  // call the other one, not accumulated the directories, only recursing into them.
315  getListingForPathRecursive(path, &rRegularFiles, 0);
316 }
317 
318 void DirectoryUtil::throwErrorForOpendirFail(const string& fullPath)
319 {
320  switch (errno) {
321  case EACCES: {
322  string msg = "Permission denied for some directory in path=\"" + fullPath + "\"";
323  throw BESForbiddenError(msg, __FILE__, __LINE__);
324  }
325  break;
326 
327  case ELOOP: {
328  string msg = "A symlink loop was detected in path=\"" + fullPath + "\"";
329  throw BESNotFoundError(msg, __FILE__, __LINE__); // closest I can figure...
330  }
331  break;
332 
333  case ENAMETOOLONG: {
334  string msg = "A name in the path was too long. path=\"" + fullPath + "\"";
335  throw BESNotFoundError(msg, __FILE__, __LINE__);
336  }
337  break;
338 
339  case ENOENT: {
340  string msg = "Some part of the path was not found. path=\"" + fullPath + "\"";
341  throw BESNotFoundError(msg, __FILE__, __LINE__);
342  }
343  break;
344 
345  case ENOTDIR: {
346  string msg = "Some part of the path was not a directory. path=\"" + fullPath + "\"";
347  throw BESNotFoundError(msg, __FILE__, __LINE__);
348  }
349  break;
350 
351  case ENFILE: {
352  string msg = "Internal Error: Too many files are currently open!";
353  throw BESInternalError(msg, __FILE__, __LINE__);
354  }
355  break;
356 
357  default: {
358  string msg = "An unknown errno was found after opendir() was called on path=\"" + fullPath + "\"";
359  throw BESInternalError(msg, __FILE__, __LINE__);
360  }
361  }
362 }
363 
364 bool DirectoryUtil::matchesAllFilters(const std::string& path, time_t modTime) const
365 {
366  bool matches = true;
367  // Do the suffix first since it's fast
368  if (!_suffix.empty() && !matchesSuffix(path, _suffix)) {
369  matches = false;
370  }
371 
372  // Suffix matches and we have a regexp, check that
373  if (matches && _pRegExp) {
374  // match the full string, -1 on fail, num chars matching otherwise
375  int numCharsMatching = _pRegExp->match(path.c_str(), path.size(), 0);
376  matches = (numCharsMatching > 0); // TODO do we want to match the size()?
377  }
378 
379  if (matches && _filteringModTimes) {
380  matches = (modTime < _newestModTime);
381  }
382 
383  return matches;
384 }
385 
386 bool DirectoryUtil::hasRelativePath(const std::string& path)
387 {
388  return (path.find("..") != string::npos);
389 }
390 
392 {
393  if (!path.empty()) {
394  string::size_type pos = path.find_last_not_of("/");
395  if (pos != string::npos) {
396  path = path.substr(0, pos + 1);
397  }
398  }
399 }
400 
402 {
403  if (!path.empty()) {
404  string::size_type pos = path.find_first_not_of("/");
405  path = path.substr(pos, string::npos);
406  }
407 }
408 
409 void DirectoryUtil::printFileInfoList(const vector<FileInfo>& listing)
410 {
411  std::ostringstream oss;
412  printFileInfoList(oss, listing);
413  BESDEBUG(_sDebugChannel, oss.str() << endl);
414 }
415 
416 void DirectoryUtil::printFileInfoList(std::ostream& os, const vector<FileInfo>& listing)
417 {
418  for (vector<FileInfo>::const_iterator it = listing.begin(); it != listing.end(); ++it) {
419  os << it->toString() << endl;
420  }
421 }
422 
424 {
425  bool found;
426  string rootDir;
427  TheBESKeys::TheKeys()->get_value("BES.Catalog.catalog.RootDirectory", rootDir, found);
428  if (!found) {
429  TheBESKeys::TheKeys()->get_value("BES.Data.RootDirectory", rootDir, found);
430  }
431  if (!found) {
432  rootDir = "/";
433  }
434  return rootDir;
435 }
436 
437 bool DirectoryUtil::matchesSuffix(const std::string& filename, const std::string& suffix)
438 {
439  // see if the last suffix.size() characters match.
440  bool matches = (filename.find(suffix, filename.size() - suffix.size()) != string::npos);
441  return matches;
442 }
443 }
agg_util::DirectoryUtil::getListingOfRegularFilesRecursive
void getListingOfRegularFilesRecursive(const std::string &path, std::vector< FileInfo > &rRegularFiles)
Definition: DirectoryUtil.cc:312
agg_util::FileInfo
Definition: DirectoryUtil.h:46
agg_util::DirectoryUtil::removePrecedingSlashes
static void removePrecedingSlashes(std::string &path)
Definition: DirectoryUtil.cc:401
agg_util::DirectoryUtil::setRootDir
void setRootDir(const std::string &rootDir, bool allowRelativePaths=false, bool allowSymLinks=false)
Definition: DirectoryUtil.cc:183
BESNotFoundError
error thrown if the resource requested cannot be found
Definition: BESNotFoundError.h:40
agg_util::DirectoryUtil::setFilterRegExp
void setFilterRegExp(const std::string &regexp)
Definition: DirectoryUtil.cc:210
agg_util
Helper class for temporarily hijacking an existing dhi to load a DDX response for one particular file...
Definition: AggMemberDataset.cc:38
agg_util::DirectoryUtil::removeTrailingSlashes
static void removeTrailingSlashes(std::string &path)
Definition: DirectoryUtil.cc:391
TheBESKeys::TheKeys
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:61
agg_util::DirectoryUtil::setFilterSuffix
void setFilterSuffix(const std::string &suffix)
Definition: DirectoryUtil.cc:205
agg_util::FileInfo::FileInfo
FileInfo(const std::string &path, const std::string &basename, bool isDir, time_t modTime)
Definition: DirectoryUtil.cc:94
BESUtil::check_path
static void check_path(const string &path, const string &root, bool follow_sym_links)
Check if the specified path is valid.
Definition: BESUtil.cc:251
BESForbiddenError
error thrown if the BES is not allowed to access the resource requested
Definition: BESForbiddenError.h:40
BESInternalError
exception thrown if inernal error encountered
Definition: BESInternalError.h:43
TheBESKeys::get_value
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:420
agg_util::DirectoryUtil::getRootDir
const std::string & getRootDir() const
Definition: DirectoryUtil.cc:173
agg_util::FileInfo::path
const std::string & path() const
Definition: DirectoryUtil.cc:107
agg_util::FileInfo::getModTimeAsString
std::string getModTimeAsString() const
Definition: DirectoryUtil.cc:128
agg_util::FileInfo::getFullPath
const std::string & getFullPath() const
Definition: DirectoryUtil.cc:139
agg_util::DirectoryUtil::printFileInfoList
static void printFileInfoList(std::ostream &os, const std::vector< FileInfo > &listing)
Definition: DirectoryUtil.cc:416
agg_util::DirectoryUtil::setFilterModTimeOlderThan
void setFilterModTimeOlderThan(time_t newestModTime)
Definition: DirectoryUtil.cc:224
agg_util::DirectoryUtil::getBESRootDir
static std::string getBESRootDir()
Definition: DirectoryUtil.cc:423
agg_util::DirectoryUtil::getListingForPath
void getListingForPath(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
Definition: DirectoryUtil.cc:230
agg_util::DirectoryUtil::getListingForPathRecursive
void getListingForPathRecursive(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
Definition: DirectoryUtil.cc:279
agg_util::DirectoryUtil::clearRegExp
void clearRegExp()
Definition: DirectoryUtil.cc:218
agg_util::DirectoryUtil::hasRelativePath
static bool hasRelativePath(const std::string &path)
Definition: DirectoryUtil.cc:386