2 CLAW - a C++ Library Absolutely Wonderful
4 CLAW is a free library without any particular aim but being useful to
7 Copyright (C) 2005-2011 Julien Jorge
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
14 This library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
19 You should have received a copy of the GNU Lesser General Public
20 License along with this library; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 contact: julien.jorge@gamned.org
26 * \file string_algorithm.tpp
27 * \brief Implementation of the algorithms on strings.
28 * \author Julien Jorge
31 #include <claw/algorithm.hpp>
32 #include <claw/glob.hpp>
38 /*----------------------------------------------------------------------------*/
40 * \brief A portable version of std::getline( is, str, '\\n' ) that removes a
42 * \param is The stream in which we read.
43 * \param str The line read from the stream.
45 template<typename StreamType, typename StringType>
46 StreamType& claw::text::getline( StreamType& is, StringType& str )
48 std::getline( is, str );
51 if ( str[ str.size() - 1 ] == typename StringType::value_type('\r') )
52 str.erase( str.size() - 1 );
57 /*----------------------------------------------------------------------------*/
59 * \brief Remove characters at the begining of a string.
60 * \param str The string to modify.
61 * \param s The characters to remove.
63 template<typename StringType>
64 void claw::text::trim_left( StringType& str,
65 const typename StringType::value_type* const s )
67 typename StringType::size_type p = str.find_first_not_of(s);
69 if (p != StringType::npos)
73 /*----------------------------------------------------------------------------*/
75 * \brief Remove characters at the end of a string.
76 * \param str The string to modify.
77 * \param s The characters to remove.
79 template<typename StringType>
80 void claw::text::trim_right( StringType& str,
81 const typename StringType::value_type* const s )
83 typename StringType::size_type p = str.find_last_not_of(s);
85 if (p != StringType::npos)
86 str = str.substr( 0, p+1 );
89 /*----------------------------------------------------------------------------*/
91 * \brief Remove characters at the begining end at the end of a string.
92 * \param str The string to modify.
93 * \param s The characters to remove.
95 template<typename StringType>
96 void claw::text::trim( StringType& str,
97 const typename StringType::value_type* const s )
99 typename StringType::size_type first = str.find_first_not_of(s);
100 typename StringType::size_type last = str.find_last_not_of(s);
102 if (first != StringType::npos)
103 str = str.substr( first, last - first + 1 );
106 /*----------------------------------------------------------------------------*/
108 * \brief Squeeze successive characters of a string into one character.
109 * \param str The string to modify.
110 * \param s The characters to remove.
114 * std::string s("word aaa bbb abab");
115 * claw::squeeze( s, "ab" );
116 * std::cout << s << std::end; // result is "word a b abab"
119 template<typename StringType>
120 void claw::text::squeeze( StringType& str,
121 const typename StringType::value_type* const s )
123 typedef typename StringType::size_type size_type;
129 first = str.find_first_of(s, first);
131 if ( first != StringType::npos )
133 size_type last = str.find_first_not_of(str[first], first+1);
135 if ( last == StringType::npos )
136 str = str.substr(0, first+1);
137 else if ( last - first > 1 )
138 str = str.substr(0, first+1) + str.substr(last);
143 while ( (first != StringType::npos) && (first != str.length()) );
146 /*----------------------------------------------------------------------------*/
148 * \brief Replace a set of characters by other characters.
149 * \param str The string to modify.
150 * \param e1 The characters to remove.
151 * \param e2 The characters replacing the ones in \a e1.
153 * \return The number of replaced characters.
155 * Each character e1[i] will be replaced with e2[i]. If \a e1 is smaller than
156 * \a e2, the latter will be completed by repeating its last character.
160 * std::string s("word aaa bbb abab");
161 * claw::replace( s, "ab", "ba" );
162 * std::cout << s << std::end; // result is "word bbb aaa baba"
165 template<typename StringType>
166 std::size_t claw::text::replace
167 ( StringType& str, const StringType& e1, const StringType& e2 )
171 ( str.begin(), str.end(), e1.begin(), e1.end(), e2.begin(), e2.end() );
174 /*----------------------------------------------------------------------------*/
176 * \brief Test if the content of a string is immediately convertible to a type.
177 * \param str The string to test.
179 template<typename T, typename StringType>
180 bool claw::text::is_of_type( const StringType& str )
182 std::basic_istringstream< typename StringType::value_type,
183 typename StringType::traits_type,
184 typename StringType::allocator_type > iss(str);
195 /*----------------------------------------------------------------------------*/
197 * \brief Split a string into several substrings, according to a given
199 * \param sequence A sequence in which the substrings are added.
200 * \param str The string to split.
201 * \param sep The separator on which the string is splitted.
203 template<typename Sequence>
204 void claw::text::split
205 ( Sequence& sequence, const typename Sequence::value_type& str,
206 const typename Sequence::value_type::value_type sep )
208 split(sequence, str.begin(), str.end(), sep);
211 /*----------------------------------------------------------------------------*/
213 * \brief Split a string into several substrings, according to a given
215 * \param sequence A sequence in which the substrings are added.
216 * \param first Iterator on the beginning of the string to split.
217 * \param last Iterator just past the end of the string to split.
218 * \param sep The separator on which the string is splitted.
220 template<typename Sequence>
221 void claw::text::split
222 ( Sequence& sequence, typename Sequence::value_type::const_iterator first,
223 typename Sequence::value_type::const_iterator last,
224 const typename Sequence::value_type::value_type sep )
226 typedef typename Sequence::value_type string_type;
229 std::basic_istringstream< typename string_type::value_type,
230 typename string_type::traits_type,
231 typename string_type::allocator_type > iss( string_type(first, last) );
233 while ( std::getline(iss, line, sep) )
234 *std::insert_iterator<Sequence>(sequence, sequence.end()) = line;
237 /*----------------------------------------------------------------------------*/
239 * \brief Find escaped symbols in a sequence of characters and replace them by
240 * their c-equivalent.
242 * \param first Iterator on the beginning of the string to escape.
243 * \param last Iterator just past the end of the string to escape.
244 * \param out Iterator on the beginning of the output string.
245 * \pre \a out points on a range long enough to store the resulting string.
248 * std::string s("\\a\\t\\n\\r");
251 * claw::text::c_escape( s.begin(), s.end(), std::insert_iterator(r, r.end()) );
253 * if ( r == "\a\t\n\r" )
254 * std::cout << "It works!" << std::endl;
257 * \remark This method has not been tested with wide chars yet.
259 template<typename InputIterator, typename OutputIterator>
260 void claw::text::c_escape
261 ( InputIterator first, InputIterator last, OutputIterator out )
263 typedef typename std::iterator_traits<InputIterator>::value_type char_type;
264 typedef std::basic_string<char_type> string_type;
266 const string_type oct("01234567");
267 const string_type hex("0123456789ABCDEFabcdef");
271 for ( ; first!=last; ++out )
276 case 'a': *out = '\a'; ++first; break;
277 case 'b': *out = '\b'; ++first; break;
278 case 'f': *out = '\f'; ++first; break;
279 case 'n': *out = '\n'; ++first; break;
280 case 'r': *out = '\r'; ++first; break;
281 case 't': *out = '\t'; ++first; break;
282 case 'v': *out = '\v'; ++first; break;
287 const InputIterator e
288 ( find_first_not_of(first, last, oct.begin(), oct.end()) );
290 std::basic_istringstream<char_type> iss( string_type(first, e) );
291 iss >> std::oct >> v;
300 const InputIterator e
301 ( find_first_not_of(first, last, hex.begin(), hex.end()) );
303 std::basic_istringstream<char_type> iss( string_type(first, e) );
304 iss >> std::hex >> v;
309 default: *out = *first; ++first;
314 else if ( *first == '\\' )
326 /*----------------------------------------------------------------------------*/
328 * \brief Check if a string matches a given pattern.
329 * \param pattern The pattern.
330 * \param text The text to check.
331 * \param any_sequence A value representing any sequence of values, empty or
333 * \param zero_or_one A value representing any value or no value.
334 * \param any A value representing any value.
336 template<typename StringType>
337 bool claw::text::glob_match
338 ( const StringType& pattern, const StringType& text,
339 const typename StringType::value_type any_sequence,
340 const typename StringType::value_type zero_or_one,
341 const typename StringType::value_type any )
343 return claw::glob_match
344 ( pattern.begin(), pattern.end(), text.begin(), text.end(), any_sequence,
348 /*----------------------------------------------------------------------------*/
350 * \brief Check if a string may match a given pattern.
351 * \param pattern The pattern.
352 * \param text The text to check.
353 * \param any_sequence A value representing any sequence of values, empty or
355 * \param zero_or_one A value representing any value or no value.
356 * \param any A value representing any value.
358 template<typename StringType>
359 bool claw::text::glob_potential_match
360 ( const StringType& pattern, const StringType& text,
361 const typename StringType::value_type any_sequence,
362 const typename StringType::value_type zero_or_one,
363 const typename StringType::value_type any )
365 return claw::glob_potential_match
366 ( pattern.begin(), pattern.end(), text.begin(), text.end(), any_sequence,
368 } // glob_potential_match()