regex.hpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. // Boost string_algo library regex.hpp header file ---------------------------//
  2. // Copyright Pavol Droba 2002-2003.
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. // See http://www.boost.org/ for updates, documentation, and revision history.
  8. #ifndef BOOST_STRING_REGEX_HPP
  9. #define BOOST_STRING_REGEX_HPP
  10. #include <boost/algorithm/string/config.hpp>
  11. #include <boost/regex.hpp>
  12. #include <boost/range/iterator_range_core.hpp>
  13. #include <boost/range/begin.hpp>
  14. #include <boost/range/end.hpp>
  15. #include <boost/range/iterator.hpp>
  16. #include <boost/range/as_literal.hpp>
  17. #include <boost/algorithm/string/find_format.hpp>
  18. #include <boost/algorithm/string/regex_find_format.hpp>
  19. #include <boost/algorithm/string/formatter.hpp>
  20. #include <boost/algorithm/string/iter_find.hpp>
  21. /*! \file
  22. Defines regex variants of the algorithms.
  23. */
  24. namespace boost {
  25. namespace algorithm {
  26. // find_regex -----------------------------------------------//
  27. //! Find regex algorithm
  28. /*!
  29. Search for a substring matching the given regex in the input.
  30. \param Input A container which will be searched.
  31. \param Rx A regular expression
  32. \param Flags Regex options
  33. \return
  34. An \c iterator_range delimiting the match.
  35. Returned iterator is either \c RangeT::iterator or
  36. \c RangeT::const_iterator, depending on the constness of
  37. the input parameter.
  38. \note This function provides the strong exception-safety guarantee
  39. */
  40. template<
  41. typename RangeT,
  42. typename CharT,
  43. typename RegexTraitsT>
  44. inline iterator_range<
  45. BOOST_STRING_TYPENAME range_iterator<RangeT>::type >
  46. find_regex(
  47. RangeT& Input,
  48. const basic_regex<CharT, RegexTraitsT>& Rx,
  49. match_flag_type Flags=match_default )
  50. {
  51. iterator_range<BOOST_STRING_TYPENAME range_iterator<RangeT>::type> lit_input(::boost::as_literal(Input));
  52. return ::boost::algorithm::regex_finder(Rx,Flags)(
  53. ::boost::begin(lit_input), ::boost::end(lit_input) );
  54. }
  55. // replace_regex --------------------------------------------------------------------//
  56. //! Replace regex algorithm
  57. /*!
  58. Search for a substring matching given regex and format it with
  59. the specified format.
  60. The result is a modified copy of the input. It is returned as a sequence
  61. or copied to the output iterator.
  62. \param Output An output iterator to which the result will be copied
  63. \param Input An input string
  64. \param Rx A regular expression
  65. \param Format Regex format definition
  66. \param Flags Regex options
  67. \return An output iterator pointing just after the last inserted character or
  68. a modified copy of the input
  69. \note The second variant of this function provides the strong exception-safety guarantee
  70. */
  71. template<
  72. typename OutputIteratorT,
  73. typename RangeT,
  74. typename CharT,
  75. typename RegexTraitsT,
  76. typename FormatStringTraitsT, typename FormatStringAllocatorT >
  77. inline OutputIteratorT replace_regex_copy(
  78. OutputIteratorT Output,
  79. const RangeT& Input,
  80. const basic_regex<CharT, RegexTraitsT>& Rx,
  81. const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
  82. match_flag_type Flags=match_default | format_default )
  83. {
  84. return ::boost::algorithm::find_format_copy(
  85. Output,
  86. Input,
  87. ::boost::algorithm::regex_finder( Rx, Flags ),
  88. ::boost::algorithm::regex_formatter( Format, Flags ) );
  89. }
  90. //! Replace regex algorithm
  91. /*!
  92. \overload
  93. */
  94. template<
  95. typename SequenceT,
  96. typename CharT,
  97. typename RegexTraitsT,
  98. typename FormatStringTraitsT, typename FormatStringAllocatorT >
  99. inline SequenceT replace_regex_copy(
  100. const SequenceT& Input,
  101. const basic_regex<CharT, RegexTraitsT>& Rx,
  102. const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
  103. match_flag_type Flags=match_default | format_default )
  104. {
  105. return ::boost::algorithm::find_format_copy(
  106. Input,
  107. ::boost::algorithm::regex_finder( Rx, Flags ),
  108. ::boost::algorithm::regex_formatter( Format, Flags ) );
  109. }
  110. //! Replace regex algorithm
  111. /*!
  112. Search for a substring matching given regex and format it with
  113. the specified format. The input string is modified in-place.
  114. \param Input An input string
  115. \param Rx A regular expression
  116. \param Format Regex format definition
  117. \param Flags Regex options
  118. */
  119. template<
  120. typename SequenceT,
  121. typename CharT,
  122. typename RegexTraitsT,
  123. typename FormatStringTraitsT, typename FormatStringAllocatorT >
  124. inline void replace_regex(
  125. SequenceT& Input,
  126. const basic_regex<CharT, RegexTraitsT>& Rx,
  127. const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
  128. match_flag_type Flags=match_default | format_default )
  129. {
  130. ::boost::algorithm::find_format(
  131. Input,
  132. ::boost::algorithm::regex_finder( Rx, Flags ),
  133. ::boost::algorithm::regex_formatter( Format, Flags ) );
  134. }
  135. // replace_all_regex --------------------------------------------------------------------//
  136. //! Replace all regex algorithm
  137. /*!
  138. Format all substrings, matching given regex, with the specified format.
  139. The result is a modified copy of the input. It is returned as a sequence
  140. or copied to the output iterator.
  141. \param Output An output iterator to which the result will be copied
  142. \param Input An input string
  143. \param Rx A regular expression
  144. \param Format Regex format definition
  145. \param Flags Regex options
  146. \return An output iterator pointing just after the last inserted character or
  147. a modified copy of the input
  148. \note The second variant of this function provides the strong exception-safety guarantee
  149. */
  150. template<
  151. typename OutputIteratorT,
  152. typename RangeT,
  153. typename CharT,
  154. typename RegexTraitsT,
  155. typename FormatStringTraitsT, typename FormatStringAllocatorT >
  156. inline OutputIteratorT replace_all_regex_copy(
  157. OutputIteratorT Output,
  158. const RangeT& Input,
  159. const basic_regex<CharT, RegexTraitsT>& Rx,
  160. const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
  161. match_flag_type Flags=match_default | format_default )
  162. {
  163. return ::boost::algorithm::find_format_all_copy(
  164. Output,
  165. Input,
  166. ::boost::algorithm::regex_finder( Rx, Flags ),
  167. ::boost::algorithm::regex_formatter( Format, Flags ) );
  168. }
  169. //! Replace all regex algorithm
  170. /*!
  171. \overload
  172. */
  173. template<
  174. typename SequenceT,
  175. typename CharT,
  176. typename RegexTraitsT,
  177. typename FormatStringTraitsT, typename FormatStringAllocatorT >
  178. inline SequenceT replace_all_regex_copy(
  179. const SequenceT& Input,
  180. const basic_regex<CharT, RegexTraitsT>& Rx,
  181. const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
  182. match_flag_type Flags=match_default | format_default )
  183. {
  184. return ::boost::algorithm::find_format_all_copy(
  185. Input,
  186. ::boost::algorithm::regex_finder( Rx, Flags ),
  187. ::boost::algorithm::regex_formatter( Format, Flags ) );
  188. }
  189. //! Replace all regex algorithm
  190. /*!
  191. Format all substrings, matching given regex, with the specified format.
  192. The input string is modified in-place.
  193. \param Input An input string
  194. \param Rx A regular expression
  195. \param Format Regex format definition
  196. \param Flags Regex options
  197. */
  198. template<
  199. typename SequenceT,
  200. typename CharT,
  201. typename RegexTraitsT,
  202. typename FormatStringTraitsT, typename FormatStringAllocatorT >
  203. inline void replace_all_regex(
  204. SequenceT& Input,
  205. const basic_regex<CharT, RegexTraitsT>& Rx,
  206. const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
  207. match_flag_type Flags=match_default | format_default )
  208. {
  209. ::boost::algorithm::find_format_all(
  210. Input,
  211. ::boost::algorithm::regex_finder( Rx, Flags ),
  212. ::boost::algorithm::regex_formatter( Format, Flags ) );
  213. }
  214. // erase_regex --------------------------------------------------------------------//
  215. //! Erase regex algorithm
  216. /*!
  217. Remove a substring matching given regex from the input.
  218. The result is a modified copy of the input. It is returned as a sequence
  219. or copied to the output iterator.
  220. \param Output An output iterator to which the result will be copied
  221. \param Input An input string
  222. \param Rx A regular expression
  223. \param Flags Regex options
  224. \return An output iterator pointing just after the last inserted character or
  225. a modified copy of the input
  226. \note The second variant of this function provides the strong exception-safety guarantee
  227. */
  228. template<
  229. typename OutputIteratorT,
  230. typename RangeT,
  231. typename CharT,
  232. typename RegexTraitsT >
  233. inline OutputIteratorT erase_regex_copy(
  234. OutputIteratorT Output,
  235. const RangeT& Input,
  236. const basic_regex<CharT, RegexTraitsT>& Rx,
  237. match_flag_type Flags=match_default )
  238. {
  239. return ::boost::algorithm::find_format_copy(
  240. Output,
  241. Input,
  242. ::boost::algorithm::regex_finder( Rx, Flags ),
  243. ::boost::algorithm::empty_formatter( Input ) );
  244. }
  245. //! Erase regex algorithm
  246. /*!
  247. \overload
  248. */
  249. template<
  250. typename SequenceT,
  251. typename CharT,
  252. typename RegexTraitsT >
  253. inline SequenceT erase_regex_copy(
  254. const SequenceT& Input,
  255. const basic_regex<CharT, RegexTraitsT>& Rx,
  256. match_flag_type Flags=match_default )
  257. {
  258. return ::boost::algorithm::find_format_copy(
  259. Input,
  260. ::boost::algorithm::regex_finder( Rx, Flags ),
  261. ::boost::algorithm::empty_formatter( Input ) );
  262. }
  263. //! Erase regex algorithm
  264. /*!
  265. Remove a substring matching given regex from the input.
  266. The input string is modified in-place.
  267. \param Input An input string
  268. \param Rx A regular expression
  269. \param Flags Regex options
  270. */
  271. template<
  272. typename SequenceT,
  273. typename CharT,
  274. typename RegexTraitsT >
  275. inline void erase_regex(
  276. SequenceT& Input,
  277. const basic_regex<CharT, RegexTraitsT>& Rx,
  278. match_flag_type Flags=match_default )
  279. {
  280. ::boost::algorithm::find_format(
  281. Input,
  282. ::boost::algorithm::regex_finder( Rx, Flags ),
  283. ::boost::algorithm::empty_formatter( Input ) );
  284. }
  285. // erase_all_regex --------------------------------------------------------------------//
  286. //! Erase all regex algorithm
  287. /*!
  288. Erase all substrings, matching given regex, from the input.
  289. The result is a modified copy of the input. It is returned as a sequence
  290. or copied to the output iterator.
  291. \param Output An output iterator to which the result will be copied
  292. \param Input An input string
  293. \param Rx A regular expression
  294. \param Flags Regex options
  295. \return An output iterator pointing just after the last inserted character or
  296. a modified copy of the input
  297. \note The second variant of this function provides the strong exception-safety guarantee
  298. */
  299. template<
  300. typename OutputIteratorT,
  301. typename RangeT,
  302. typename CharT,
  303. typename RegexTraitsT >
  304. inline OutputIteratorT erase_all_regex_copy(
  305. OutputIteratorT Output,
  306. const RangeT& Input,
  307. const basic_regex<CharT, RegexTraitsT>& Rx,
  308. match_flag_type Flags=match_default )
  309. {
  310. return ::boost::algorithm::find_format_all_copy(
  311. Output,
  312. Input,
  313. ::boost::algorithm::regex_finder( Rx, Flags ),
  314. ::boost::algorithm::empty_formatter( Input ) );
  315. }
  316. //! Erase all regex algorithm
  317. /*!
  318. \overload
  319. */
  320. template<
  321. typename SequenceT,
  322. typename CharT,
  323. typename RegexTraitsT >
  324. inline SequenceT erase_all_regex_copy(
  325. const SequenceT& Input,
  326. const basic_regex<CharT, RegexTraitsT>& Rx,
  327. match_flag_type Flags=match_default )
  328. {
  329. return ::boost::algorithm::find_format_all_copy(
  330. Input,
  331. ::boost::algorithm::regex_finder( Rx, Flags ),
  332. ::boost::algorithm::empty_formatter( Input ) );
  333. }
  334. //! Erase all regex algorithm
  335. /*!
  336. Erase all substrings, matching given regex, from the input.
  337. The input string is modified in-place.
  338. \param Input An input string
  339. \param Rx A regular expression
  340. \param Flags Regex options
  341. */
  342. template<
  343. typename SequenceT,
  344. typename CharT,
  345. typename RegexTraitsT>
  346. inline void erase_all_regex(
  347. SequenceT& Input,
  348. const basic_regex<CharT, RegexTraitsT>& Rx,
  349. match_flag_type Flags=match_default )
  350. {
  351. ::boost::algorithm::find_format_all(
  352. Input,
  353. ::boost::algorithm::regex_finder( Rx, Flags ),
  354. ::boost::algorithm::empty_formatter( Input ) );
  355. }
  356. // find_all_regex ------------------------------------------------------------------//
  357. //! Find all regex algorithm
  358. /*!
  359. This algorithm finds all substrings matching the give regex
  360. in the input.
  361. Each part is copied and added as a new element to the output container.
  362. Thus the result container must be able to hold copies
  363. of the matches (in a compatible structure like std::string) or
  364. a reference to it (e.g. using the iterator range class).
  365. Examples of such a container are \c std::vector<std::string>
  366. or \c std::list<boost::iterator_range<std::string::iterator>>
  367. \param Result A container that can hold copies of references to the substrings.
  368. \param Input A container which will be searched.
  369. \param Rx A regular expression
  370. \param Flags Regex options
  371. \return A reference to the result
  372. \note Prior content of the result will be overwritten.
  373. \note This function provides the strong exception-safety guarantee
  374. */
  375. template<
  376. typename SequenceSequenceT,
  377. typename RangeT,
  378. typename CharT,
  379. typename RegexTraitsT >
  380. inline SequenceSequenceT& find_all_regex(
  381. SequenceSequenceT& Result,
  382. const RangeT& Input,
  383. const basic_regex<CharT, RegexTraitsT>& Rx,
  384. match_flag_type Flags=match_default )
  385. {
  386. return ::boost::algorithm::iter_find(
  387. Result,
  388. Input,
  389. ::boost::algorithm::regex_finder(Rx,Flags) );
  390. }
  391. // split_regex ------------------------------------------------------------------//
  392. //! Split regex algorithm
  393. /*!
  394. Tokenize expression. This function is equivalent to C strtok. Input
  395. sequence is split into tokens, separated by separators. Separator
  396. is an every match of the given regex.
  397. Each part is copied and added as a new element to the output container.
  398. Thus the result container must be able to hold copies
  399. of the matches (in a compatible structure like std::string) or
  400. a reference to it (e.g. using the iterator range class).
  401. Examples of such a container are \c std::vector<std::string>
  402. or \c std::list<boost::iterator_range<std::string::iterator>>
  403. \param Result A container that can hold copies of references to the substrings.
  404. \param Input A container which will be searched.
  405. \param Rx A regular expression
  406. \param Flags Regex options
  407. \return A reference to the result
  408. \note Prior content of the result will be overwritten.
  409. \note This function provides the strong exception-safety guarantee
  410. */
  411. template<
  412. typename SequenceSequenceT,
  413. typename RangeT,
  414. typename CharT,
  415. typename RegexTraitsT >
  416. inline SequenceSequenceT& split_regex(
  417. SequenceSequenceT& Result,
  418. const RangeT& Input,
  419. const basic_regex<CharT, RegexTraitsT>& Rx,
  420. match_flag_type Flags=match_default )
  421. {
  422. return ::boost::algorithm::iter_split(
  423. Result,
  424. Input,
  425. ::boost::algorithm::regex_finder(Rx,Flags) );
  426. }
  427. // join_if ------------------------------------------------------------------//
  428. #ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
  429. //! Conditional join algorithm
  430. /*!
  431. This algorithm joins all strings in a 'list' into one long string.
  432. Segments are concatenated by given separator. Only segments that
  433. match the given regular expression will be added to the result
  434. This is a specialization of join_if algorithm.
  435. \param Input A container that holds the input strings. It must be a container-of-containers.
  436. \param Separator A string that will separate the joined segments.
  437. \param Rx A regular expression
  438. \param Flags Regex options
  439. \return Concatenated string.
  440. \note This function provides the strong exception-safety guarantee
  441. */
  442. template<
  443. typename SequenceSequenceT,
  444. typename Range1T,
  445. typename CharT,
  446. typename RegexTraitsT >
  447. inline typename range_value<SequenceSequenceT>::type
  448. join_if(
  449. const SequenceSequenceT& Input,
  450. const Range1T& Separator,
  451. const basic_regex<CharT, RegexTraitsT>& Rx,
  452. match_flag_type Flags=match_default )
  453. {
  454. // Define working types
  455. typedef typename range_value<SequenceSequenceT>::type ResultT;
  456. typedef typename range_const_iterator<SequenceSequenceT>::type InputIteratorT;
  457. // Parse input
  458. InputIteratorT itBegin=::boost::begin(Input);
  459. InputIteratorT itEnd=::boost::end(Input);
  460. // Construct container to hold the result
  461. ResultT Result;
  462. // Roll to the first element that will be added
  463. while(
  464. itBegin!=itEnd &&
  465. !::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;
  466. // Add this element
  467. if(itBegin!=itEnd)
  468. {
  469. detail::insert(Result, ::boost::end(Result), *itBegin);
  470. ++itBegin;
  471. }
  472. for(;itBegin!=itEnd; ++itBegin)
  473. {
  474. if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags))
  475. {
  476. // Add separator
  477. detail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));
  478. // Add element
  479. detail::insert(Result, ::boost::end(Result), *itBegin);
  480. }
  481. }
  482. return Result;
  483. }
  484. #else // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
  485. //! Conditional join algorithm
  486. /*!
  487. This algorithm joins all strings in a 'list' into one long string.
  488. Segments are concatenated by given separator. Only segments that
  489. match the given regular expression will be added to the result
  490. This is a specialization of join_if algorithm.
  491. \param Input A container that holds the input strings. It must be a container-of-containers.
  492. \param Separator A string that will separate the joined segments.
  493. \param Rx A regular expression
  494. \param Flags Regex options
  495. \return Concatenated string.
  496. \note This function provides the strong exception-safety guarantee
  497. */
  498. template<
  499. typename SequenceSequenceT,
  500. typename Range1T,
  501. typename CharT,
  502. typename RegexTraitsT >
  503. inline typename range_value<SequenceSequenceT>::type
  504. join_if_regex(
  505. const SequenceSequenceT& Input,
  506. const Range1T& Separator,
  507. const basic_regex<CharT, RegexTraitsT>& Rx,
  508. match_flag_type Flags=match_default )
  509. {
  510. // Define working types
  511. typedef typename range_value<SequenceSequenceT>::type ResultT;
  512. typedef typename range_const_iterator<SequenceSequenceT>::type InputIteratorT;
  513. // Parse input
  514. InputIteratorT itBegin=::boost::begin(Input);
  515. InputIteratorT itEnd=::boost::end(Input);
  516. // Construct container to hold the result
  517. ResultT Result;
  518. // Roll to the first element that will be added
  519. while(
  520. itBegin!=itEnd &&
  521. !::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;
  522. // Add this element
  523. if(itBegin!=itEnd)
  524. {
  525. detail::insert(Result, ::boost::end(Result), *itBegin);
  526. ++itBegin;
  527. }
  528. for(;itBegin!=itEnd; ++itBegin)
  529. {
  530. if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags))
  531. {
  532. // Add separator
  533. detail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));
  534. // Add element
  535. detail::insert(Result, ::boost::end(Result), *itBegin);
  536. }
  537. }
  538. return Result;
  539. }
  540. #endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
  541. } // namespace algorithm
  542. // pull names into the boost namespace
  543. using algorithm::find_regex;
  544. using algorithm::replace_regex;
  545. using algorithm::replace_regex_copy;
  546. using algorithm::replace_all_regex;
  547. using algorithm::replace_all_regex_copy;
  548. using algorithm::erase_regex;
  549. using algorithm::erase_regex_copy;
  550. using algorithm::erase_all_regex;
  551. using algorithm::erase_all_regex_copy;
  552. using algorithm::find_all_regex;
  553. using algorithm::split_regex;
  554. #ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
  555. using algorithm::join_if;
  556. #else // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
  557. using algorithm::join_if_regex;
  558. #endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
  559. } // namespace boost
  560. #endif // BOOST_STRING_REGEX_HPP