You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

regex.tcc 16KB

3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. // class template regex -*- C++ -*-
  2. // Copyright (C) 2013-2020 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /**
  21. * @file bits/regex.tcc
  22. * This is an internal header file, included by other library headers.
  23. * Do not attempt to use it directly. @headername{regex}
  24. */
  25. namespace std _GLIBCXX_VISIBILITY(default)
  26. {
  27. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  28. namespace __detail
  29. {
  30. /// @cond undocumented
  31. // Result of merging regex_match and regex_search.
  32. //
  33. // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
  34. // the other one if possible, for test purpose).
  35. //
  36. // That __match_mode is true means regex_match, else regex_search.
  37. template<typename _BiIter, typename _Alloc,
  38. typename _CharT, typename _TraitsT,
  39. _RegexExecutorPolicy __policy,
  40. bool __match_mode>
  41. bool
  42. __regex_algo_impl(_BiIter __s,
  43. _BiIter __e,
  44. match_results<_BiIter, _Alloc>& __m,
  45. const basic_regex<_CharT, _TraitsT>& __re,
  46. regex_constants::match_flag_type __flags)
  47. {
  48. if (__re._M_automaton == nullptr)
  49. return false;
  50. typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
  51. __m._M_begin = __s;
  52. __m._M_resize(__re._M_automaton->_M_sub_count());
  53. bool __ret;
  54. if ((__re.flags() & regex_constants::__polynomial)
  55. || (__policy == _RegexExecutorPolicy::_S_alternate
  56. && !__re._M_automaton->_M_has_backref))
  57. {
  58. _Executor<_BiIter, _Alloc, _TraitsT, false>
  59. __executor(__s, __e, __m, __re, __flags);
  60. if (__match_mode)
  61. __ret = __executor._M_match();
  62. else
  63. __ret = __executor._M_search();
  64. }
  65. else
  66. {
  67. _Executor<_BiIter, _Alloc, _TraitsT, true>
  68. __executor(__s, __e, __m, __re, __flags);
  69. if (__match_mode)
  70. __ret = __executor._M_match();
  71. else
  72. __ret = __executor._M_search();
  73. }
  74. if (__ret)
  75. {
  76. for (auto& __it : __res)
  77. if (!__it.matched)
  78. __it.first = __it.second = __e;
  79. auto& __pre = __m._M_prefix();
  80. auto& __suf = __m._M_suffix();
  81. if (__match_mode)
  82. {
  83. __pre.matched = false;
  84. __pre.first = __s;
  85. __pre.second = __s;
  86. __suf.matched = false;
  87. __suf.first = __e;
  88. __suf.second = __e;
  89. }
  90. else
  91. {
  92. __pre.first = __s;
  93. __pre.second = __res[0].first;
  94. __pre.matched = (__pre.first != __pre.second);
  95. __suf.first = __res[0].second;
  96. __suf.second = __e;
  97. __suf.matched = (__suf.first != __suf.second);
  98. }
  99. }
  100. else
  101. {
  102. __m._M_establish_failed_match(__e);
  103. }
  104. return __ret;
  105. }
  106. /// @endcond
  107. } // namespace __detail
  108. /// @cond
  109. template<typename _Ch_type>
  110. template<typename _Fwd_iter>
  111. typename regex_traits<_Ch_type>::string_type
  112. regex_traits<_Ch_type>::
  113. lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
  114. {
  115. typedef std::ctype<char_type> __ctype_type;
  116. const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
  117. static const char* __collatenames[] =
  118. {
  119. "NUL",
  120. "SOH",
  121. "STX",
  122. "ETX",
  123. "EOT",
  124. "ENQ",
  125. "ACK",
  126. "alert",
  127. "backspace",
  128. "tab",
  129. "newline",
  130. "vertical-tab",
  131. "form-feed",
  132. "carriage-return",
  133. "SO",
  134. "SI",
  135. "DLE",
  136. "DC1",
  137. "DC2",
  138. "DC3",
  139. "DC4",
  140. "NAK",
  141. "SYN",
  142. "ETB",
  143. "CAN",
  144. "EM",
  145. "SUB",
  146. "ESC",
  147. "IS4",
  148. "IS3",
  149. "IS2",
  150. "IS1",
  151. "space",
  152. "exclamation-mark",
  153. "quotation-mark",
  154. "number-sign",
  155. "dollar-sign",
  156. "percent-sign",
  157. "ampersand",
  158. "apostrophe",
  159. "left-parenthesis",
  160. "right-parenthesis",
  161. "asterisk",
  162. "plus-sign",
  163. "comma",
  164. "hyphen",
  165. "period",
  166. "slash",
  167. "zero",
  168. "one",
  169. "two",
  170. "three",
  171. "four",
  172. "five",
  173. "six",
  174. "seven",
  175. "eight",
  176. "nine",
  177. "colon",
  178. "semicolon",
  179. "less-than-sign",
  180. "equals-sign",
  181. "greater-than-sign",
  182. "question-mark",
  183. "commercial-at",
  184. "A",
  185. "B",
  186. "C",
  187. "D",
  188. "E",
  189. "F",
  190. "G",
  191. "H",
  192. "I",
  193. "J",
  194. "K",
  195. "L",
  196. "M",
  197. "N",
  198. "O",
  199. "P",
  200. "Q",
  201. "R",
  202. "S",
  203. "T",
  204. "U",
  205. "V",
  206. "W",
  207. "X",
  208. "Y",
  209. "Z",
  210. "left-square-bracket",
  211. "backslash",
  212. "right-square-bracket",
  213. "circumflex",
  214. "underscore",
  215. "grave-accent",
  216. "a",
  217. "b",
  218. "c",
  219. "d",
  220. "e",
  221. "f",
  222. "g",
  223. "h",
  224. "i",
  225. "j",
  226. "k",
  227. "l",
  228. "m",
  229. "n",
  230. "o",
  231. "p",
  232. "q",
  233. "r",
  234. "s",
  235. "t",
  236. "u",
  237. "v",
  238. "w",
  239. "x",
  240. "y",
  241. "z",
  242. "left-curly-bracket",
  243. "vertical-line",
  244. "right-curly-bracket",
  245. "tilde",
  246. "DEL",
  247. };
  248. string __s;
  249. for (; __first != __last; ++__first)
  250. __s += __fctyp.narrow(*__first, 0);
  251. for (const auto& __it : __collatenames)
  252. if (__s == __it)
  253. return string_type(1, __fctyp.widen(
  254. static_cast<char>(&__it - __collatenames)));
  255. // TODO Add digraph support:
  256. // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
  257. return string_type();
  258. }
  259. template<typename _Ch_type>
  260. template<typename _Fwd_iter>
  261. typename regex_traits<_Ch_type>::char_class_type
  262. regex_traits<_Ch_type>::
  263. lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
  264. {
  265. typedef std::ctype<char_type> __ctype_type;
  266. const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
  267. // Mappings from class name to class mask.
  268. static const pair<const char*, char_class_type> __classnames[] =
  269. {
  270. {"d", ctype_base::digit},
  271. {"w", {ctype_base::alnum, _RegexMask::_S_under}},
  272. {"s", ctype_base::space},
  273. {"alnum", ctype_base::alnum},
  274. {"alpha", ctype_base::alpha},
  275. {"blank", ctype_base::blank},
  276. {"cntrl", ctype_base::cntrl},
  277. {"digit", ctype_base::digit},
  278. {"graph", ctype_base::graph},
  279. {"lower", ctype_base::lower},
  280. {"print", ctype_base::print},
  281. {"punct", ctype_base::punct},
  282. {"space", ctype_base::space},
  283. {"upper", ctype_base::upper},
  284. {"xdigit", ctype_base::xdigit},
  285. };
  286. string __s;
  287. for (; __first != __last; ++__first)
  288. __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
  289. for (const auto& __it : __classnames)
  290. if (__s == __it.first)
  291. {
  292. if (__icase
  293. && ((__it.second
  294. & (ctype_base::lower | ctype_base::upper)) != 0))
  295. return ctype_base::alpha;
  296. return __it.second;
  297. }
  298. return 0;
  299. }
  300. template<typename _Ch_type>
  301. bool
  302. regex_traits<_Ch_type>::
  303. isctype(_Ch_type __c, char_class_type __f) const
  304. {
  305. typedef std::ctype<char_type> __ctype_type;
  306. const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
  307. return __fctyp.is(__f._M_base, __c)
  308. // [[:w:]]
  309. || ((__f._M_extended & _RegexMask::_S_under)
  310. && __c == __fctyp.widen('_'));
  311. }
  312. template<typename _Ch_type>
  313. int
  314. regex_traits<_Ch_type>::
  315. value(_Ch_type __ch, int __radix) const
  316. {
  317. std::basic_istringstream<char_type> __is(string_type(1, __ch));
  318. long __v;
  319. if (__radix == 8)
  320. __is >> std::oct;
  321. else if (__radix == 16)
  322. __is >> std::hex;
  323. __is >> __v;
  324. return __is.fail() ? -1 : __v;
  325. }
  326. template<typename _Bi_iter, typename _Alloc>
  327. template<typename _Out_iter>
  328. _Out_iter
  329. match_results<_Bi_iter, _Alloc>::
  330. format(_Out_iter __out,
  331. const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
  332. const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
  333. match_flag_type __flags) const
  334. {
  335. __glibcxx_assert( ready() );
  336. regex_traits<char_type> __traits;
  337. typedef std::ctype<char_type> __ctype_type;
  338. const __ctype_type&
  339. __fctyp(use_facet<__ctype_type>(__traits.getloc()));
  340. auto __output = [&](size_t __idx)
  341. {
  342. auto& __sub = (*this)[__idx];
  343. if (__sub.matched)
  344. __out = std::copy(__sub.first, __sub.second, __out);
  345. };
  346. if (__flags & regex_constants::format_sed)
  347. {
  348. bool __escaping = false;
  349. for (; __fmt_first != __fmt_last; __fmt_first++)
  350. {
  351. if (__escaping)
  352. {
  353. __escaping = false;
  354. if (__fctyp.is(__ctype_type::digit, *__fmt_first))
  355. __output(__traits.value(*__fmt_first, 10));
  356. else
  357. *__out++ = *__fmt_first;
  358. continue;
  359. }
  360. if (*__fmt_first == '\\')
  361. {
  362. __escaping = true;
  363. continue;
  364. }
  365. if (*__fmt_first == '&')
  366. {
  367. __output(0);
  368. continue;
  369. }
  370. *__out++ = *__fmt_first;
  371. }
  372. if (__escaping)
  373. *__out++ = '\\';
  374. }
  375. else
  376. {
  377. while (1)
  378. {
  379. auto __next = std::find(__fmt_first, __fmt_last, '$');
  380. if (__next == __fmt_last)
  381. break;
  382. __out = std::copy(__fmt_first, __next, __out);
  383. auto __eat = [&](char __ch) -> bool
  384. {
  385. if (*__next == __ch)
  386. {
  387. ++__next;
  388. return true;
  389. }
  390. return false;
  391. };
  392. if (++__next == __fmt_last)
  393. *__out++ = '$';
  394. else if (__eat('$'))
  395. *__out++ = '$';
  396. else if (__eat('&'))
  397. __output(0);
  398. else if (__eat('`'))
  399. {
  400. auto& __sub = _M_prefix();
  401. if (__sub.matched)
  402. __out = std::copy(__sub.first, __sub.second, __out);
  403. }
  404. else if (__eat('\''))
  405. {
  406. auto& __sub = _M_suffix();
  407. if (__sub.matched)
  408. __out = std::copy(__sub.first, __sub.second, __out);
  409. }
  410. else if (__fctyp.is(__ctype_type::digit, *__next))
  411. {
  412. long __num = __traits.value(*__next, 10);
  413. if (++__next != __fmt_last
  414. && __fctyp.is(__ctype_type::digit, *__next))
  415. {
  416. __num *= 10;
  417. __num += __traits.value(*__next++, 10);
  418. }
  419. if (0 <= __num && __num < this->size())
  420. __output(__num);
  421. }
  422. else
  423. *__out++ = '$';
  424. __fmt_first = __next;
  425. }
  426. __out = std::copy(__fmt_first, __fmt_last, __out);
  427. }
  428. return __out;
  429. }
  430. template<typename _Out_iter, typename _Bi_iter,
  431. typename _Rx_traits, typename _Ch_type>
  432. _Out_iter
  433. regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
  434. const basic_regex<_Ch_type, _Rx_traits>& __e,
  435. const _Ch_type* __fmt,
  436. regex_constants::match_flag_type __flags)
  437. {
  438. typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
  439. _IterT __i(__first, __last, __e, __flags);
  440. _IterT __end;
  441. if (__i == __end)
  442. {
  443. if (!(__flags & regex_constants::format_no_copy))
  444. __out = std::copy(__first, __last, __out);
  445. }
  446. else
  447. {
  448. sub_match<_Bi_iter> __last;
  449. auto __len = char_traits<_Ch_type>::length(__fmt);
  450. for (; __i != __end; ++__i)
  451. {
  452. if (!(__flags & regex_constants::format_no_copy))
  453. __out = std::copy(__i->prefix().first, __i->prefix().second,
  454. __out);
  455. __out = __i->format(__out, __fmt, __fmt + __len, __flags);
  456. __last = __i->suffix();
  457. if (__flags & regex_constants::format_first_only)
  458. break;
  459. }
  460. if (!(__flags & regex_constants::format_no_copy))
  461. __out = std::copy(__last.first, __last.second, __out);
  462. }
  463. return __out;
  464. }
  465. template<typename _Bi_iter,
  466. typename _Ch_type,
  467. typename _Rx_traits>
  468. bool
  469. regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  470. operator==(const regex_iterator& __rhs) const noexcept
  471. {
  472. if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
  473. return true;
  474. return _M_pregex == __rhs._M_pregex
  475. && _M_begin == __rhs._M_begin
  476. && _M_end == __rhs._M_end
  477. && _M_flags == __rhs._M_flags
  478. && _M_match[0] == __rhs._M_match[0];
  479. }
  480. template<typename _Bi_iter,
  481. typename _Ch_type,
  482. typename _Rx_traits>
  483. regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
  484. regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  485. operator++()
  486. {
  487. // In all cases in which the call to regex_search returns true,
  488. // match.prefix().first shall be equal to the previous value of
  489. // match[0].second, and for each index i in the half-open range
  490. // [0, match.size()) for which match[i].matched is true,
  491. // match[i].position() shall return distance(begin, match[i].first).
  492. // [28.12.1.4.5]
  493. if (_M_match[0].matched)
  494. {
  495. auto __start = _M_match[0].second;
  496. auto __prefix_first = _M_match[0].second;
  497. if (_M_match[0].first == _M_match[0].second)
  498. {
  499. if (__start == _M_end)
  500. {
  501. _M_pregex = nullptr;
  502. return *this;
  503. }
  504. else
  505. {
  506. if (regex_search(__start, _M_end, _M_match, *_M_pregex,
  507. _M_flags
  508. | regex_constants::match_not_null
  509. | regex_constants::match_continuous))
  510. {
  511. __glibcxx_assert(_M_match[0].matched);
  512. auto& __prefix = _M_match._M_prefix();
  513. __prefix.first = __prefix_first;
  514. __prefix.matched = __prefix.first != __prefix.second;
  515. // [28.12.1.4.5]
  516. _M_match._M_begin = _M_begin;
  517. return *this;
  518. }
  519. else
  520. ++__start;
  521. }
  522. }
  523. _M_flags |= regex_constants::match_prev_avail;
  524. if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
  525. {
  526. __glibcxx_assert(_M_match[0].matched);
  527. auto& __prefix = _M_match._M_prefix();
  528. __prefix.first = __prefix_first;
  529. __prefix.matched = __prefix.first != __prefix.second;
  530. // [28.12.1.4.5]
  531. _M_match._M_begin = _M_begin;
  532. }
  533. else
  534. _M_pregex = nullptr;
  535. }
  536. return *this;
  537. }
  538. template<typename _Bi_iter,
  539. typename _Ch_type,
  540. typename _Rx_traits>
  541. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
  542. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  543. operator=(const regex_token_iterator& __rhs)
  544. {
  545. _M_position = __rhs._M_position;
  546. _M_subs = __rhs._M_subs;
  547. _M_n = __rhs._M_n;
  548. _M_suffix = __rhs._M_suffix;
  549. _M_has_m1 = __rhs._M_has_m1;
  550. _M_normalize_result();
  551. return *this;
  552. }
  553. template<typename _Bi_iter,
  554. typename _Ch_type,
  555. typename _Rx_traits>
  556. bool
  557. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  558. operator==(const regex_token_iterator& __rhs) const
  559. {
  560. if (_M_end_of_seq() && __rhs._M_end_of_seq())
  561. return true;
  562. if (_M_suffix.matched && __rhs._M_suffix.matched
  563. && _M_suffix == __rhs._M_suffix)
  564. return true;
  565. if (_M_end_of_seq() || _M_suffix.matched
  566. || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
  567. return false;
  568. return _M_position == __rhs._M_position
  569. && _M_n == __rhs._M_n
  570. && _M_subs == __rhs._M_subs;
  571. }
  572. template<typename _Bi_iter,
  573. typename _Ch_type,
  574. typename _Rx_traits>
  575. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
  576. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  577. operator++()
  578. {
  579. _Position __prev = _M_position;
  580. if (_M_suffix.matched)
  581. *this = regex_token_iterator();
  582. else if (_M_n + 1 < _M_subs.size())
  583. {
  584. _M_n++;
  585. _M_result = &_M_current_match();
  586. }
  587. else
  588. {
  589. _M_n = 0;
  590. ++_M_position;
  591. if (_M_position != _Position())
  592. _M_result = &_M_current_match();
  593. else if (_M_has_m1 && __prev->suffix().length() != 0)
  594. {
  595. _M_suffix.matched = true;
  596. _M_suffix.first = __prev->suffix().first;
  597. _M_suffix.second = __prev->suffix().second;
  598. _M_result = &_M_suffix;
  599. }
  600. else
  601. *this = regex_token_iterator();
  602. }
  603. return *this;
  604. }
  605. template<typename _Bi_iter,
  606. typename _Ch_type,
  607. typename _Rx_traits>
  608. void
  609. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  610. _M_init(_Bi_iter __a, _Bi_iter __b)
  611. {
  612. _M_has_m1 = false;
  613. for (auto __it : _M_subs)
  614. if (__it == -1)
  615. {
  616. _M_has_m1 = true;
  617. break;
  618. }
  619. if (_M_position != _Position())
  620. _M_result = &_M_current_match();
  621. else if (_M_has_m1)
  622. {
  623. _M_suffix.matched = true;
  624. _M_suffix.first = __a;
  625. _M_suffix.second = __b;
  626. _M_result = &_M_suffix;
  627. }
  628. else
  629. _M_result = nullptr;
  630. }
  631. /// @endcond
  632. _GLIBCXX_END_NAMESPACE_VERSION
  633. } // namespace