You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

regex_constants.h 14KB

3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. // class template regex -*- C++ -*-
  2. // Copyright (C) 2010-2020 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /**
  21. * @file bits/regex_constants.h
  22. * @brief Constant definitions for the std regex library.
  23. *
  24. * This is an internal header file, included by other library headers.
  25. * Do not attempt to use it directly. @headername{regex}
  26. */
  27. namespace std _GLIBCXX_VISIBILITY(default)
  28. {
  29. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  30. /**
  31. * @defgroup regex Regular Expressions
  32. *
  33. * A facility for performing regular expression pattern matching.
  34. * @{
  35. */
  36. /**
  37. * @namespace std::regex_constants
  38. * @brief ISO C++ 2011 namespace for options and flags used with std::regex
  39. */
  40. namespace regex_constants
  41. {
  42. /**
  43. * @name 5.1 Regular Expression Syntax Options
  44. */
  45. //@{
  46. enum __syntax_option
  47. {
  48. _S_icase,
  49. _S_nosubs,
  50. _S_optimize,
  51. _S_collate,
  52. _S_ECMAScript,
  53. _S_basic,
  54. _S_extended,
  55. _S_awk,
  56. _S_grep,
  57. _S_egrep,
  58. _S_polynomial,
  59. _S_syntax_last
  60. };
  61. /**
  62. * @brief This is a bitmask type indicating how to interpret the regex.
  63. *
  64. * The @c syntax_option_type is implementation defined but it is valid to
  65. * perform bitwise operations on these values and expect the right thing to
  66. * happen.
  67. *
  68. * A valid value of type syntax_option_type shall have exactly one of the
  69. * elements @c ECMAScript, @c basic, @c extended, @c awk, @c grep, @c egrep
  70. * %set.
  71. */
  72. enum syntax_option_type : unsigned int { };
  73. /**
  74. * Specifies that the matching of regular expressions against a character
  75. * sequence shall be performed without regard to case.
  76. */
  77. _GLIBCXX17_INLINE constexpr syntax_option_type icase =
  78. static_cast<syntax_option_type>(1 << _S_icase);
  79. /**
  80. * Specifies that when a regular expression is matched against a character
  81. * container sequence, no sub-expression matches are to be stored in the
  82. * supplied match_results structure.
  83. */
  84. _GLIBCXX17_INLINE constexpr syntax_option_type nosubs =
  85. static_cast<syntax_option_type>(1 << _S_nosubs);
  86. /**
  87. * Specifies that the regular expression engine should pay more attention to
  88. * the speed with which regular expressions are matched, and less to the
  89. * speed with which regular expression objects are constructed. Otherwise
  90. * it has no detectable effect on the program output.
  91. */
  92. _GLIBCXX17_INLINE constexpr syntax_option_type optimize =
  93. static_cast<syntax_option_type>(1 << _S_optimize);
  94. /**
  95. * Specifies that character ranges of the form [a-b] should be locale
  96. * sensitive.
  97. */
  98. _GLIBCXX17_INLINE constexpr syntax_option_type collate =
  99. static_cast<syntax_option_type>(1 << _S_collate);
  100. /**
  101. * Specifies that the grammar recognized by the regular expression engine is
  102. * that used by ECMAScript in ECMA-262 [Ecma International, ECMAScript
  103. * Language Specification, Standard Ecma-262, third edition, 1999], as
  104. * modified in section [28.13]. This grammar is similar to that defined
  105. * in the PERL scripting language but extended with elements found in the
  106. * POSIX regular expression grammar.
  107. */
  108. _GLIBCXX17_INLINE constexpr syntax_option_type ECMAScript =
  109. static_cast<syntax_option_type>(1 << _S_ECMAScript);
  110. /**
  111. * Specifies that the grammar recognized by the regular expression engine is
  112. * that used by POSIX basic regular expressions in IEEE Std 1003.1-2001,
  113. * Portable Operating System Interface (POSIX), Base Definitions and
  114. * Headers, Section 9, Regular Expressions [IEEE, Information Technology --
  115. * Portable Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
  116. */
  117. _GLIBCXX17_INLINE constexpr syntax_option_type basic =
  118. static_cast<syntax_option_type>(1 << _S_basic);
  119. /**
  120. * Specifies that the grammar recognized by the regular expression engine is
  121. * that used by POSIX extended regular expressions in IEEE Std 1003.1-2001,
  122. * Portable Operating System Interface (POSIX), Base Definitions and
  123. * Headers, Section 9, Regular Expressions.
  124. */
  125. _GLIBCXX17_INLINE constexpr syntax_option_type extended =
  126. static_cast<syntax_option_type>(1 << _S_extended);
  127. /**
  128. * Specifies that the grammar recognized by the regular expression engine is
  129. * that used by POSIX utility awk in IEEE Std 1003.1-2001. This option is
  130. * identical to syntax_option_type extended, except that C-style escape
  131. * sequences are supported. These sequences are:
  132. * \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\&apos,, &apos,,
  133. * and \\ddd (where ddd is one, two, or three octal digits).
  134. */
  135. _GLIBCXX17_INLINE constexpr syntax_option_type awk =
  136. static_cast<syntax_option_type>(1 << _S_awk);
  137. /**
  138. * Specifies that the grammar recognized by the regular expression engine is
  139. * that used by POSIX utility grep in IEEE Std 1003.1-2001. This option is
  140. * identical to syntax_option_type basic, except that newlines are treated
  141. * as whitespace.
  142. */
  143. _GLIBCXX17_INLINE constexpr syntax_option_type grep =
  144. static_cast<syntax_option_type>(1 << _S_grep);
  145. /**
  146. * Specifies that the grammar recognized by the regular expression engine is
  147. * that used by POSIX utility grep when given the -E option in
  148. * IEEE Std 1003.1-2001. This option is identical to syntax_option_type
  149. * extended, except that newlines are treated as whitespace.
  150. */
  151. _GLIBCXX17_INLINE constexpr syntax_option_type egrep =
  152. static_cast<syntax_option_type>(1 << _S_egrep);
  153. /**
  154. * Extension: Ensure both space complexity of compiled regex and
  155. * time complexity execution are not exponential.
  156. * If specified in a regex with back-references, the exception
  157. * regex_constants::error_complexity will be thrown.
  158. */
  159. _GLIBCXX17_INLINE constexpr syntax_option_type __polynomial =
  160. static_cast<syntax_option_type>(1 << _S_polynomial);
  161. constexpr inline syntax_option_type
  162. operator&(syntax_option_type __a, syntax_option_type __b)
  163. {
  164. return (syntax_option_type)(static_cast<unsigned int>(__a)
  165. & static_cast<unsigned int>(__b));
  166. }
  167. constexpr inline syntax_option_type
  168. operator|(syntax_option_type __a, syntax_option_type __b)
  169. {
  170. return (syntax_option_type)(static_cast<unsigned int>(__a)
  171. | static_cast<unsigned int>(__b));
  172. }
  173. constexpr inline syntax_option_type
  174. operator^(syntax_option_type __a, syntax_option_type __b)
  175. {
  176. return (syntax_option_type)(static_cast<unsigned int>(__a)
  177. ^ static_cast<unsigned int>(__b));
  178. }
  179. constexpr inline syntax_option_type
  180. operator~(syntax_option_type __a)
  181. { return (syntax_option_type)(~static_cast<unsigned int>(__a)); }
  182. inline syntax_option_type&
  183. operator&=(syntax_option_type& __a, syntax_option_type __b)
  184. { return __a = __a & __b; }
  185. inline syntax_option_type&
  186. operator|=(syntax_option_type& __a, syntax_option_type __b)
  187. { return __a = __a | __b; }
  188. inline syntax_option_type&
  189. operator^=(syntax_option_type& __a, syntax_option_type __b)
  190. { return __a = __a ^ __b; }
  191. //@}
  192. /**
  193. * @name 5.2 Matching Rules
  194. *
  195. * Matching a regular expression against a sequence of characters [first,
  196. * last) proceeds according to the rules of the grammar specified for the
  197. * regular expression object, modified according to the effects listed
  198. * below for any bitmask elements set.
  199. *
  200. */
  201. //@{
  202. enum __match_flag
  203. {
  204. _S_not_bol,
  205. _S_not_eol,
  206. _S_not_bow,
  207. _S_not_eow,
  208. _S_any,
  209. _S_not_null,
  210. _S_continuous,
  211. _S_prev_avail,
  212. _S_sed,
  213. _S_no_copy,
  214. _S_first_only,
  215. _S_match_flag_last
  216. };
  217. /**
  218. * @brief This is a bitmask type indicating regex matching rules.
  219. *
  220. * The @c match_flag_type is implementation defined but it is valid to
  221. * perform bitwise operations on these values and expect the right thing to
  222. * happen.
  223. */
  224. enum match_flag_type : unsigned int { };
  225. /**
  226. * The default matching rules.
  227. */
  228. _GLIBCXX17_INLINE constexpr match_flag_type match_default =
  229. static_cast<match_flag_type>(0);
  230. /**
  231. * The first character in the sequence [first, last) is treated as though it
  232. * is not at the beginning of a line, so the character (^) in the regular
  233. * expression shall not match [first, first).
  234. */
  235. _GLIBCXX17_INLINE constexpr match_flag_type match_not_bol =
  236. static_cast<match_flag_type>(1 << _S_not_bol);
  237. /**
  238. * The last character in the sequence [first, last) is treated as though it
  239. * is not at the end of a line, so the character ($) in the regular
  240. * expression shall not match [last, last).
  241. */
  242. _GLIBCXX17_INLINE constexpr match_flag_type match_not_eol =
  243. static_cast<match_flag_type>(1 << _S_not_eol);
  244. /**
  245. * The expression \\b is not matched against the sub-sequence
  246. * [first,first).
  247. */
  248. _GLIBCXX17_INLINE constexpr match_flag_type match_not_bow =
  249. static_cast<match_flag_type>(1 << _S_not_bow);
  250. /**
  251. * The expression \\b should not be matched against the sub-sequence
  252. * [last,last).
  253. */
  254. _GLIBCXX17_INLINE constexpr match_flag_type match_not_eow =
  255. static_cast<match_flag_type>(1 << _S_not_eow);
  256. /**
  257. * If more than one match is possible then any match is an acceptable
  258. * result.
  259. */
  260. _GLIBCXX17_INLINE constexpr match_flag_type match_any =
  261. static_cast<match_flag_type>(1 << _S_any);
  262. /**
  263. * The expression does not match an empty sequence.
  264. */
  265. _GLIBCXX17_INLINE constexpr match_flag_type match_not_null =
  266. static_cast<match_flag_type>(1 << _S_not_null);
  267. /**
  268. * The expression only matches a sub-sequence that begins at first .
  269. */
  270. _GLIBCXX17_INLINE constexpr match_flag_type match_continuous =
  271. static_cast<match_flag_type>(1 << _S_continuous);
  272. /**
  273. * --first is a valid iterator position. When this flag is set then the
  274. * flags match_not_bol and match_not_bow are ignored by the regular
  275. * expression algorithms 28.11 and iterators 28.12.
  276. */
  277. _GLIBCXX17_INLINE constexpr match_flag_type match_prev_avail =
  278. static_cast<match_flag_type>(1 << _S_prev_avail);
  279. /**
  280. * When a regular expression match is to be replaced by a new string, the
  281. * new string is constructed using the rules used by the ECMAScript replace
  282. * function in ECMA- 262 [Ecma International, ECMAScript Language
  283. * Specification, Standard Ecma-262, third edition, 1999], part 15.5.4.11
  284. * String.prototype.replace. In addition, during search and replace
  285. * operations all non-overlapping occurrences of the regular expression
  286. * are located and replaced, and sections of the input that did not match
  287. * the expression are copied unchanged to the output string.
  288. *
  289. * Format strings (from ECMA-262 [15.5.4.11]):
  290. * @li $$ The dollar-sign itself ($)
  291. * @li $& The matched substring.
  292. * @li $` The portion of @a string that precedes the matched substring.
  293. * This would be match_results::prefix().
  294. * @li $' The portion of @a string that follows the matched substring.
  295. * This would be match_results::suffix().
  296. * @li $n The nth capture, where n is in [1,9] and $n is not followed by a
  297. * decimal digit. If n <= match_results::size() and the nth capture
  298. * is undefined, use the empty string instead. If n >
  299. * match_results::size(), the result is implementation-defined.
  300. * @li $nn The nnth capture, where nn is a two-digit decimal number on
  301. * [01, 99]. If nn <= match_results::size() and the nth capture is
  302. * undefined, use the empty string instead. If
  303. * nn > match_results::size(), the result is implementation-defined.
  304. */
  305. _GLIBCXX17_INLINE constexpr match_flag_type format_default =
  306. static_cast<match_flag_type>(0);
  307. /**
  308. * When a regular expression match is to be replaced by a new string, the
  309. * new string is constructed using the rules used by the POSIX sed utility
  310. * in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable
  311. * Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
  312. */
  313. _GLIBCXX17_INLINE constexpr match_flag_type format_sed =
  314. static_cast<match_flag_type>(1 << _S_sed);
  315. /**
  316. * During a search and replace operation, sections of the character
  317. * container sequence being searched that do not match the regular
  318. * expression shall not be copied to the output string.
  319. */
  320. _GLIBCXX17_INLINE constexpr match_flag_type format_no_copy =
  321. static_cast<match_flag_type>(1 << _S_no_copy);
  322. /**
  323. * When specified during a search and replace operation, only the first
  324. * occurrence of the regular expression shall be replaced.
  325. */
  326. _GLIBCXX17_INLINE constexpr match_flag_type format_first_only =
  327. static_cast<match_flag_type>(1 << _S_first_only);
  328. constexpr inline match_flag_type
  329. operator&(match_flag_type __a, match_flag_type __b)
  330. {
  331. return (match_flag_type)(static_cast<unsigned int>(__a)
  332. & static_cast<unsigned int>(__b));
  333. }
  334. constexpr inline match_flag_type
  335. operator|(match_flag_type __a, match_flag_type __b)
  336. {
  337. return (match_flag_type)(static_cast<unsigned int>(__a)
  338. | static_cast<unsigned int>(__b));
  339. }
  340. constexpr inline match_flag_type
  341. operator^(match_flag_type __a, match_flag_type __b)
  342. {
  343. return (match_flag_type)(static_cast<unsigned int>(__a)
  344. ^ static_cast<unsigned int>(__b));
  345. }
  346. constexpr inline match_flag_type
  347. operator~(match_flag_type __a)
  348. { return (match_flag_type)(~static_cast<unsigned int>(__a)); }
  349. inline match_flag_type&
  350. operator&=(match_flag_type& __a, match_flag_type __b)
  351. { return __a = __a & __b; }
  352. inline match_flag_type&
  353. operator|=(match_flag_type& __a, match_flag_type __b)
  354. { return __a = __a | __b; }
  355. inline match_flag_type&
  356. operator^=(match_flag_type& __a, match_flag_type __b)
  357. { return __a = __a ^ __b; }
  358. //@}
  359. } // namespace regex_constants
  360. /* @} */ // group regex
  361. _GLIBCXX_END_NAMESPACE_VERSION
  362. } // namespace std