OpenTREP Logo  0.07.4
C++ Open Travel Request Parsing Library
opentrep-indexer.cpp
Go to the documentation of this file.
1 // STL
2 #include <cassert>
3 #include <iostream>
4 #include <sstream>
5 #include <fstream>
6 #include <vector>
7 #include <string>
8 // Boost (Extended STL)
9 #include <boost/date_time/posix_time/posix_time.hpp>
10 #include <boost/date_time/gregorian/gregorian.hpp>
11 #include <boost/program_options.hpp>
12 // OpenTREP
14 #include <opentrep/Location.hpp>
15 #include <opentrep/CityDetails.hpp>
16 #include <opentrep/DBType.hpp>
19 #include <opentrep/config/opentrep-paths.hpp>
20 
21 
22 // //////// Type definitions ///////
23 typedef std::vector<std::string> WordList_T;
24 
25 
26 // //////// Constants //////
30 const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep-indexer.log");
31 
45 
46 
47 // ///////// Parsing of Options & Configuration /////////
50 
52 int readConfiguration (int argc, char* argv[],
53  std::string& ioPORFilepath,
54  std::string& ioXapianDBFilepath,
55  std::string& ioSQLDBTypeString,
56  std::string& ioSQLDBConnectionString,
57  unsigned short& ioDeploymentNumber,
58  bool& ioIncludeNonIATAPOR,
59  bool& ioIndexPORInXapian,
60  bool& ioAddPORInDB,
61  std::string& ioLogFilename) {
62 
63  // Declare a group of options that will be allowed only on command line
64  boost::program_options::options_description generic ("Generic options");
65  generic.add_options()
66  ("prefix", "print installation prefix")
67  ("version,v", "print version string")
68  ("help,h", "produce help message");
69 
70  // Declare a group of options that will be allowed both on command
71  // line and in config file
72  boost::program_options::options_description config ("Configuration");
73  config.add_options()
74  ("porfile,p",
75  boost::program_options::value< std::string >(&ioPORFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_POR_FILEPATH),
76  "POR file-path (e.g., optd_por_public.csv)")
77  ("xapiandb,d",
78  boost::program_options::value< std::string >(&ioXapianDBFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH),
79  "Xapian database filepath (e.g., /tmp/opentrep/xapian_traveldb)")
80  ("sqldbtype,t",
81  boost::program_options::value< std::string >(&ioSQLDBTypeString)->default_value(OPENTREP::DEFAULT_OPENTREP_SQL_DB_TYPE),
82  "SQL database type (e.g., nodb for no SQL database, sqlite for SQLite, mysql for MariaDB/MySQL)")
83  ("sqldbconx,s",
84  boost::program_options::value< std::string >(&ioSQLDBConnectionString),
85  "SQL database connection string (e.g., ~/tmp/opentrep/sqlite_travel.db for SQLite, \"db=trep_trep user=trep password=trep\" for MariaDB/MySQL)")
86  ("deploymentnb,m",
87  boost::program_options::value<unsigned short>(&ioDeploymentNumber)->default_value(OPENTREP::DEFAULT_OPENTREP_DEPLOYMENT_NUMBER),
88  "Deployment number (from to N, where N=1 normally)")
89  ("noniata,n",
90  boost::program_options::value<bool>(&ioIncludeNonIATAPOR)->default_value(K_OPENTREP_DEFAULT_POR_INCLUDING),
91  "Whether or not to include POR not referenced by IATA (0 = only IATA-referenced POR, 1 = all POR are included)")
92  ("xapianindex,x",
93  boost::program_options::value<bool>(&ioIndexPORInXapian)->default_value(OPENTREP::DEFAULT_OPENTREP_INDEX_IN_XAPIAN),
94  "Whether or not to index the POR in Xapian (0 = do not touch the Xapian index, 1 = re-index all the POR in Xapian)")
95  ("dbadd,a",
96  boost::program_options::value<bool>(&ioAddPORInDB)->default_value(OPENTREP::DEFAULT_OPENTREP_ADD_IN_DB),
97  "Whether or not to add and index the POR in the SQL-based database (0 = do not touch the SQL-based database, 1 = add and re-index all the POR in the SQL-based database)")
98  ("log,l",
99  boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME),
100  "Filepath for the logs")
101  ;
102 
103  // Hidden options, will be allowed both on command line and
104  // in config file, but will not be shown to the user.
105  boost::program_options::options_description hidden ("Hidden options");
106  hidden.add_options()
107  ("copyright",
108  boost::program_options::value< std::vector<std::string> >(),
109  "Show the copyright (license)");
110 
111  boost::program_options::options_description cmdline_options;
112  cmdline_options.add(generic).add(config).add(hidden);
113 
114  boost::program_options::options_description config_file_options;
115  config_file_options.add(config).add(hidden);
116 
117  boost::program_options::options_description visible ("Allowed options");
118  visible.add(generic).add(config);
119 
120  boost::program_options::positional_options_description p;
121  p.add ("copyright", -1);
122 
123  boost::program_options::variables_map vm;
124  boost::program_options::
125  store (boost::program_options::command_line_parser (argc, argv).
126  options (cmdline_options).positional(p).run(), vm);
127 
128  std::ifstream ifs ("opentrep-indexer.cfg");
129  boost::program_options::store (parse_config_file (ifs, config_file_options),
130  vm);
131  boost::program_options::notify (vm);
132 
133  if (vm.count ("help")) {
134  std::cout << visible << std::endl;
136  }
137 
138  if (vm.count ("version")) {
139  std::cout << PACKAGE_NAME << ", version " << PACKAGE_VERSION << std::endl;
141  }
142 
143  if (vm.count ("prefix")) {
144  std::cout << "Installation prefix: " << PREFIXDIR << std::endl;
146  }
147 
148  if (vm.count ("porfile")) {
149  ioPORFilepath = vm["porfile"].as< std::string >();
150  std::cout << "POR file-path is: " << ioPORFilepath << std::endl;
151  }
152 
153  if (vm.count ("deploymentnb")) {
154  ioDeploymentNumber = vm["deploymentnb"].as< unsigned short >();
155  std::cout << "Deployment number " << ioDeploymentNumber << std::endl;
156  }
157 
158  if (vm.count ("xapiandb")) {
159  ioXapianDBFilepath = vm["xapiandb"].as< std::string >();
160  std::cout << "Xapian index/database filepath is: " << ioXapianDBFilepath
161  << ioDeploymentNumber << std::endl;
162  }
163 
164  // Parse the SQL database type, if any is given
165  if (vm.count ("sqldbtype")) {
166  ioSQLDBTypeString = vm["sqldbtype"].as< std::string >();
167  std::cout << "SQL database type is: " << ioSQLDBTypeString
168  << std::endl;
169  }
170 
182  const OPENTREP::DBType lDBType (ioSQLDBTypeString);
183  if (lDBType == OPENTREP::DBType::NODB) {
184  ioAddPORInDB = false;
185  ioSQLDBConnectionString = "";
186 
187  } else if (lDBType == OPENTREP::DBType::SQLITE3) {
188  ioAddPORInDB = true;
189  ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_SQLITE_DB_FILEPATH;
190 
191  } else if (lDBType == OPENTREP::DBType::MYSQL) {
192  ioAddPORInDB = true;
193  ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_MYSQL_CONN_STRING;
194  }
195 
196  // Set the SQL database connection string, if any is given
197  if (vm.count ("sqldbconx")) {
198  ioSQLDBConnectionString = vm["sqldbconx"].as< std::string >();
199  }
200 
201  // Reporting of the SQL database connection string
202  if (lDBType == OPENTREP::DBType::SQLITE3
203  || lDBType == OPENTREP::DBType::MYSQL) {
204  const std::string& lSQLDBConnString =
206  ioSQLDBConnectionString,
207  ioDeploymentNumber);
208  //
209  std::cout << "SQL database connection string is: " << lSQLDBConnString
210  << std::endl;
211  }
212 
213  std::cout << "Are non-IATA-referenced POR included? "
214  << ioIncludeNonIATAPOR << std::endl;
215 
216  std::cout << "Index the POR in Xapian? "
217  << ioIndexPORInXapian << std::endl;
218 
219  std::cout << "Add and re-index the POR in the SQL-based database? "
220  << ioAddPORInDB << std::endl;
221 
222  if (vm.count ("log")) {
223  ioLogFilename = vm["log"].as< std::string >();
224  std::cout << "Log filename is: " << ioLogFilename << std::endl;
225  }
226 
227  return 0;
228 }
229 
230 
231 // /////////////// M A I N /////////////////
232 int main (int argc, char* argv[]) {
233 
234  // Output log File
235  std::string lLogFilename;
236 
237  // File-path of POR (points of reference)
238  std::string lPORFilepathStr;
239 
240  // Xapian database name (directory of the index)
241  std::string lXapianDBNameStr;
242 
243  // SQL database type
244  std::string lSQLDBTypeStr;
245 
246  // SQL database connection string
247  std::string lSQLDBConnectionStr;
248 
249  // Deployment number/version
250  OPENTREP::DeploymentNumber_T lDeploymentNumber;
251 
252  // Whether or not to include non-IATA-referenced POR
253  OPENTREP::shouldIndexNonIATAPOR_T lIncludeNonIATAPOR;
254 
255  // Whether or not to index the POR in Xapian
256  OPENTREP::shouldIndexPORInXapian_T lShouldIndexPORInXapian;
257 
258  // Whether or not to insert the POR in the SQL database
259  OPENTREP::shouldAddPORInSQLDB_T lShouldAddPORInSQLDB;
260 
261  // Call the command-line option parser
262  const int lOptionParserStatus =
263  readConfiguration (argc, argv, lPORFilepathStr, lXapianDBNameStr,
264  lSQLDBTypeStr, lSQLDBConnectionStr, lDeploymentNumber,
265  lIncludeNonIATAPOR, lShouldIndexPORInXapian,
266  lShouldAddPORInSQLDB, lLogFilename);
267 
268  if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) {
269  return 0;
270  }
271 
272  // Set the log parameters
273  std::ofstream logOutputFile;
274  // open and clean the log outputfile
275  logOutputFile.open (lLogFilename.c_str());
276  logOutputFile.clear();
277 
278  //
279  std::cout << "Parsing and indexing the OpenTravelData POR data file (into "
280  << "Xapian and/or SQL databases) may take a few tens of minutes "
281  << "on some architectures (and a few minutes on fastest ones)..."
282  << std::endl;
283 
284  // Initialise the context
285  const OPENTREP::PORFilePath_T lPORFilepath (lPORFilepathStr);
286  const OPENTREP::TravelDBFilePath_T lXapianDBName (lXapianDBNameStr);
287  const OPENTREP::DBType lDBType (lSQLDBTypeStr);
288  const OPENTREP::SQLDBConnectionString_T lSQLDBConnStr (lSQLDBConnectionStr);
289  OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lPORFilepath,
290  lXapianDBName, lDBType,
291  lSQLDBConnStr, lDeploymentNumber,
292  lIncludeNonIATAPOR,
293  lShouldIndexPORInXapian,
294  lShouldAddPORInSQLDB);
295 
296  // Launch the indexation
297  const OPENTREP::NbOfDBEntries_T lNbOfEntries =
298  opentrepService.insertIntoDBAndXapian();
299 
300  // Close the Log outputFile
301  logOutputFile.close();
302 
303  //
304  std::cout << lNbOfEntries << " entries have been processed" << std::endl;
305 
306  return 0;
307 }
OPENTREP::shouldIndexNonIATAPOR_T
bool shouldIndexNonIATAPOR_T
Definition: OPENTREP_Types.hpp:136
OPENTREP::DEFAULT_OPENTREP_MYSQL_CONN_STRING
const std::string DEFAULT_OPENTREP_MYSQL_CONN_STRING
OPENTREP::DBType::MYSQL
Definition: DBType.hpp:22
OPENTREP::DBType::SQLITE3
Definition: DBType.hpp:21
OPENTREP::NbOfDBEntries_T
unsigned int NbOfDBEntries_T
Definition: OPENTREP_Types.hpp:680
OPENTREP_Service.hpp
OPENTREP::OPENTREP_Service
Interface for the OPENTREP Services.
Definition: OPENTREP_Service.hpp:25
OPENTREP::PORFilePath_T
Definition: OPENTREP_Types.hpp:37
CityDetails.hpp
OPENTREP::SQLDBConnectionString_T
Definition: OPENTREP_Types.hpp:56
OPENTREP::DBType::NODB
Definition: DBType.hpp:20
OPENTREP::parseAndDisplayConnectionString
std::string parseAndDisplayConnectionString(const DBType &iDBType, const std::string &iSQLDBConnStr, const DeploymentNumber_T &iDeploymentNumber)
Definition: Utilities.cpp:255
OPENTREP::DEFAULT_OPENTREP_INDEX_IN_XAPIAN
const bool DEFAULT_OPENTREP_INDEX_IN_XAPIAN
BasConst_OPENTREP_Service.hpp
OPENTREP::shouldAddPORInSQLDB_T
bool shouldAddPORInSQLDB_T
Definition: OPENTREP_Types.hpp:146
OPENTREP::DEFAULT_OPENTREP_SQLITE_DB_FILEPATH
const std::string DEFAULT_OPENTREP_SQLITE_DB_FILEPATH
OPENTREP::OPENTREP_Service::insertIntoDBAndXapian
NbOfDBEntries_T insertIntoDBAndXapian()
Definition: OPENTREP_Service.cpp:870
OPENTREP::DEFAULT_OPENTREP_ADD_IN_DB
const bool DEFAULT_OPENTREP_ADD_IN_DB
readConfiguration
int readConfiguration(int argc, char *argv[], std::string &ioPORFilepath, std::string &ioXapianDBFilepath, std::string &ioSQLDBTypeString, std::string &ioSQLDBConnectionString, unsigned short &ioDeploymentNumber, bool &ioIncludeNonIATAPOR, bool &ioIndexPORInXapian, bool &ioAddPORInDB, std::string &ioLogFilename)
Definition: opentrep-indexer.cpp:52
Location.hpp
K_OPENTREP_EARLY_RETURN_STATUS
const int K_OPENTREP_EARLY_RETURN_STATUS
Definition: opentrep-indexer.cpp:49
OPENTREP::DEFAULT_OPENTREP_POR_FILEPATH
const std::string DEFAULT_OPENTREP_POR_FILEPATH
K_OPENTREP_DEFAULT_POR_INCLUDING
const bool K_OPENTREP_DEFAULT_POR_INCLUDING
Definition: opentrep-indexer.cpp:44
DBType.hpp
main
int main(int argc, char *argv[])
Definition: opentrep-indexer.cpp:232
OPENTREP::DEFAULT_OPENTREP_SQL_DB_TYPE
const std::string DEFAULT_OPENTREP_SQL_DB_TYPE
K_OPENTREP_DEFAULT_LOG_FILENAME
const std::string K_OPENTREP_DEFAULT_LOG_FILENAME("opentrep-indexer.log")
OPENTREP::DeploymentNumber_T
unsigned short DeploymentNumber_T
Definition: OPENTREP_Types.hpp:108
OPENTREP::TravelDBFilePath_T
Definition: OPENTREP_Types.hpp:46
OPENTREP::DEFAULT_OPENTREP_DEPLOYMENT_NUMBER
const unsigned short DEFAULT_OPENTREP_DEPLOYMENT_NUMBER
OPENTREP::shouldIndexPORInXapian_T
bool shouldIndexPORInXapian_T
Definition: OPENTREP_Types.hpp:141
OPENTREP::DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH
const std::string DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH
OPENTREP::DBType
Enumeration of database types.
Definition: DBType.hpp:17
Utilities.hpp
WordList_T
std::vector< std::string > WordList_T
Definition: opentrep-indexer.cpp:23