Alexandria  2.14.1
Please provide a description of the project.
AsciiReader.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012-2020 Euclid Science Ground Segment
3  *
4  * This library is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License as published by the Free
6  * Software Foundation; either version 3.0 of the License, or (at your option)
7  * any later version.
8  *
9  * This library is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
25 #include <fstream>
26 #include <set>
27 // The std regex library is not fully implemented in GCC 4.8. The following lines
28 // make use of the BOOST library and can be modified if GCC 4.9 will be used in
29 // the future.
30 // #include <regex>
31 #include <boost/regex.hpp>
32 using boost::regex;
33 using boost::regex_match;
34 #include <boost/algorithm/string.hpp>
35 
37 #include "Table/AsciiReader.h"
38 
39 #include "ReaderHelper.h"
40 #include "AsciiReaderHelper.h"
41 
42 namespace Euclid {
43 namespace Table {
44 
45 AsciiReader::AsciiReader(std::istream& stream) : AsciiReader(InstOrRefHolder<std::istream>::create(stream)) {
46 }
47 
48 AsciiReader::AsciiReader(const std::string& filename) : AsciiReader(create<std::ifstream>(filename)) {
49 }
50 
52  : m_stream_holder(std::move(stream_holder)) {
53 }
54 
56  if (m_reading_started) {
57  throw Elements::Exception() << "Changing comment indicator after reading "
58  << "has started is not allowed";
59  }
60  if (indicator.empty()) {
61  throw Elements::Exception() << "Empty string as comment indicator";
62  }
63  m_comment = indicator;
64  return *this;
65 }
66 
68  if (m_reading_started) {
69  throw Elements::Exception() << "Fixing the column names after reading "
70  << "has started is not allowed";
71  }
72 
73  m_column_names = std::move(column_names);
74 
75  std::set<std::string> set {};
76  regex whitespace {".*\\s.*"}; // Checks if input contains any whitespace characters
77  for (const auto& name : m_column_names) {
78  if (name.empty()) {
79  throw Elements::Exception() << "Empty string column names are not allowed";
80  }
81  if (regex_match(name, whitespace)) {
82  throw Elements::Exception() << "Column name '" << name << "' contains "
83  << "whitespace characters";
84  }
85  if (!set.insert(name).second) { // Check for duplicate names
86  throw Elements::Exception() << "Duplicate column name " << name;
87  }
88  }
91  throw Elements::Exception() << "Different number of column names and types";
92  }
93 
94  return *this;
95 }
96 
98  if (m_reading_started) {
99  throw Elements::Exception() << "Fixing the column types after reading "
100  << "has started is not allowed";
101  }
102 
103  m_column_types = std::move(column_types);
104 
107  throw Elements::Exception() << "Different number of column names and types";
108  }
109 
110  return *this;
111 }
112 
114  if (m_column_info != nullptr) {
115  return;
116  }
117  m_reading_started = true;
118 
119  auto& in = m_stream_holder->ref();
120 
121  size_t columns_number = countColumns(in, m_comment);
122  if (!m_column_names.empty() && m_column_names.size() != columns_number) {
123  throw Elements::Exception() << "Columns number in stream (" << columns_number
124  << ") does not match the column names number ("
125  << m_column_names.size() << ")";
126  }
127  if (!m_column_types.empty() && m_column_types.size() != columns_number) {
128  throw Elements::Exception() << "Columns number in stream (" << columns_number
129  << ") does not match the column types number ("
130  << m_column_types.size() << ")";
131  }
132 
133  auto auto_names = autoDetectColumnNames(in, m_comment, columns_number);
134  auto auto_desc = autoDetectColumnDescriptions(in, m_comment);
135 
136  std::vector<std::string> names {};
138  std::vector<std::string> units {};
139  std::vector<std::string> descriptions {};
140  for (size_t i=0; i<columns_number; ++i) {
141  if (m_column_names.empty()) {
142  names.emplace_back(auto_names[i]);
143  } else {
144  names.emplace_back(m_column_names[i]);
145  }
146  auto info = auto_desc.find(auto_names[i]);
147  if (info != auto_desc.end()) {
148  if (m_column_types.empty()) {
149  types.emplace_back(info->second.type);
150  } else {
151  types.emplace_back(m_column_types[i]);
152  }
153  units.emplace_back(info->second.unit);
154  descriptions.emplace_back(info->second.description);
155  } else {
156  if (m_column_types.empty()) {
157  types.emplace_back(typeid(std::string));
158  } else {
159  types.emplace_back(m_column_types[i]);
160  }
161  units.emplace_back("");
162  descriptions.emplace_back("");
163  }
164  }
165  m_column_info = createColumnInfo(names, types, units, descriptions);
166 
167 }
168 
169 
171  readColumnInfo();
172  return *m_column_info;
173 }
174 
176  std::string line;
177  auto pos = in.tellg();
178  getline(in, line);
179  in.seekg(pos);
180  return line;
181 }
182 
184  std::ostringstream comment;
185 
186  m_reading_started = true;
187  auto &in = m_stream_holder->ref();
188  while (in && _peekLine(in).compare(0, m_comment.size(), m_comment) == 0) {
189  std::string line;
190  getline(in, line);
191  line = line.substr(m_comment.size());
192  boost::trim(line);
193  comment << line << '\n';
194  }
195 
196  auto full_comment = comment.str();
197  boost::trim(full_comment);
198  return full_comment;
199 }
200 
202  readColumnInfo();
203  auto& in = m_stream_holder->ref();
204 
205  std::vector<Row> row_list;
206  regex column_separator {"\\s+"};
207  while(in && rows != 0) {
208  std::string line;
209  getline(in, line);
210  size_t comment_pos = line.find(m_comment);
211  if (comment_pos != std::string::npos) {
212  line = line.substr(0, comment_pos);
213  }
214  boost::trim(line);
215  if (!line.empty()) {
216  --rows;
217  boost::sregex_token_iterator i (line.begin(), line.end(), column_separator, -1);
218  boost::sregex_token_iterator j;
219  size_t count {0};
220  std::vector<Row::cell_type> values {};
221  while (i != j) {
222  if (count >= m_column_info->size()) {
223  throw Elements::Exception() << "Line with wrong number of cells: " << line;
224  }
225  values.push_back(convertToCellType(*i, m_column_info->getDescription(count).type));
226  ++count;
227  ++i;
228  }
229  row_list.push_back(Row{std::move(values), m_column_info});
230  }
231  }
232 
233  if (row_list.empty()) {
234  throw Elements::Exception() << "No more table rows left";
235  }
236  return Table{std::move(row_list)};
237 }
238 
239 void AsciiReader::skip(long rows) {
240  readColumnInfo();
241  auto& in = m_stream_holder->ref();
242 
243  while(in && rows != 0) {
244  std::string line;
245  getline(in, line);
246  size_t comment_pos = line.find(m_comment);
247  if (comment_pos != std::string::npos) {
248  line = line.substr(0, comment_pos);
249  }
250  boost::trim(line);
251  if (!line.empty()) {
252  --rows;
253  }
254  }
255 }
256 
258  return hasNextRow(m_stream_holder->ref(), m_comment);
259 }
260 
263 }
264 
265 } // Table namespace
266 } // Euclid namespace
267 
268 
269 
std::vector< std::type_index > m_column_types
Definition: AsciiReader.h:233
std::size_t countRemainingRows(std::istream &in, const std::string &comment)
AsciiReader & setCommentIndicator(const std::string &indicator)
Set the comment indicator.
Definition: AsciiReader.cpp:55
T empty(T... args)
std::shared_ptr< ColumnInfo > createColumnInfo(const std::vector< std::string > &names, const std::vector< std::type_index > &types, const std::vector< std::string > &units, const std::vector< std::string > &descriptions)
Creates a ColumnInfo object from the given names and types.
std::shared_ptr< ColumnInfo > m_column_info
Definition: AsciiReader.h:235
STL namespace.
T end(T... args)
Row::cell_type convertToCellType(const std::string &value, std::type_index type)
Converts the given value to a Row::cell_type of the given type.
STL class.
const ColumnInfo & getInfo() override
Returns the column information of the table.
T seekg(T... args)
STL class.
T push_back(T... args)
std::size_t rowsLeft() override
Implements the TableReader::rowsLeft() contract.
std::unique_ptr< InstOrRefHolder< std::istream > > m_stream_holder
Definition: AsciiReader.h:230
Table readImpl(long rows) override
Reads the next rows into a Table.
AsciiReader(std::istream &stream)
Constructs an AsciiReader which reads from the given stream.
Definition: AsciiReader.cpp:45
TableReader implementation for reading ASCII tables from streams.
Definition: AsciiReader.h:87
T str(T... args)
T move(T... args)
T tellg(T... args)
std::map< std::string, ColumnDescription > autoDetectColumnDescriptions(std::istream &in, const std::string &comment)
Reads the column descriptions of the given stream.
Represents one row of a Table.
Definition: Row.h:64
Represents a table.
Definition: Table.h:49
bool hasNextRow(std::istream &in, const std::string &comment)
T find(T... args)
T size(T... args)
STL class.
void skip(long rows) override
Implements the TableReader::skip() contract.
Provides information about the columns of a Table.
Definition: ColumnInfo.h:52
STL class.
AsciiReader & fixColumnTypes(std::vector< std::type_index > column_types)
Overrides the automatically detected column types.
Definition: AsciiReader.cpp:97
std::vector< std::string > m_column_names
Definition: AsciiReader.h:234
T begin(T... args)
std::string getComment() override
T substr(T... args)
static std::string _peekLine(std::istream &in)
bool hasMoreRows() override
Implements the TableReader::hasMoreRows() contract.
size_t countColumns(std::istream &in, const std::string &comment)
Returns the number of whitespace separated tokens of the first non commented line.
AsciiReader & fixColumnNames(std::vector< std::string > column_names)
Overrides the automatically detected column names.
Definition: AsciiReader.cpp:67
std::vector< std::string > autoDetectColumnNames(std::istream &in, const std::string &comment, size_t columns_number)
Reads the column names of the given stream.
T emplace_back(T... args)