You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

233 lines
7.5KB

  1. // class template regex -*- C++ -*-
  2. // Copyright (C) 2013-2020 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /**
  21. * @file bits/regex_automaton.tcc
  22. * This is an internal header file, included by other library headers.
  23. * Do not attempt to use it directly. @headername{regex}
  24. */
  25. namespace std _GLIBCXX_VISIBILITY(default)
  26. {
  27. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  28. namespace __detail
  29. {
  30. #ifdef _GLIBCXX_DEBUG
  31. inline std::ostream&
  32. _State_base::_M_print(std::ostream& ostr) const
  33. {
  34. switch (_M_opcode)
  35. {
  36. case _S_opcode_alternative:
  37. case _S_opcode_repeat:
  38. ostr << "alt next=" << _M_next << " alt=" << _M_alt;
  39. break;
  40. case _S_opcode_subexpr_begin:
  41. ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
  42. break;
  43. case _S_opcode_subexpr_end:
  44. ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
  45. break;
  46. case _S_opcode_backref:
  47. ostr << "backref next=" << _M_next << " index=" << _M_backref_index;
  48. break;
  49. case _S_opcode_match:
  50. ostr << "match next=" << _M_next;
  51. break;
  52. case _S_opcode_accept:
  53. ostr << "accept next=" << _M_next;
  54. break;
  55. default:
  56. ostr << "unknown next=" << _M_next;
  57. break;
  58. }
  59. return ostr;
  60. }
  61. // Prints graphviz dot commands for state.
  62. inline std::ostream&
  63. _State_base::_M_dot(std::ostream& __ostr, _StateIdT __id) const
  64. {
  65. switch (_M_opcode)
  66. {
  67. case _S_opcode_alternative:
  68. case _S_opcode_repeat:
  69. __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
  70. << __id << " -> " << _M_next
  71. << " [label=\"next\", tailport=\"s\"];\n"
  72. << __id << " -> " << _M_alt
  73. << " [label=\"alt\", tailport=\"n\"];\n";
  74. break;
  75. case _S_opcode_backref:
  76. __ostr << __id << " [label=\"" << __id << "\\nBACKREF "
  77. << _M_subexpr << "\"];\n"
  78. << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
  79. break;
  80. case _S_opcode_line_begin_assertion:
  81. __ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n"
  82. << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
  83. break;
  84. case _S_opcode_line_end_assertion:
  85. __ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n"
  86. << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
  87. break;
  88. case _S_opcode_word_boundary:
  89. __ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY "
  90. << _M_neg << "\"];\n"
  91. << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
  92. break;
  93. case _S_opcode_subexpr_lookahead:
  94. __ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n"
  95. << __id << " -> " << _M_next
  96. << " [label=\"epsilon\", tailport=\"s\"];\n"
  97. << __id << " -> " << _M_alt
  98. << " [label=\"<assert>\", tailport=\"n\"];\n";
  99. break;
  100. case _S_opcode_subexpr_begin:
  101. __ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
  102. << _M_subexpr << "\"];\n"
  103. << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
  104. break;
  105. case _S_opcode_subexpr_end:
  106. __ostr << __id << " [label=\"" << __id << "\\nSEND "
  107. << _M_subexpr << "\"];\n"
  108. << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
  109. break;
  110. case _S_opcode_dummy:
  111. break;
  112. case _S_opcode_match:
  113. __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
  114. << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
  115. break;
  116. case _S_opcode_accept:
  117. __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
  118. break;
  119. default:
  120. _GLIBCXX_DEBUG_ASSERT(false);
  121. break;
  122. }
  123. return __ostr;
  124. }
  125. template<typename _TraitsT>
  126. std::ostream&
  127. _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const
  128. {
  129. __ostr << "digraph _Nfa {\n"
  130. " rankdir=LR;\n";
  131. for (size_t __i = 0; __i < this->size(); ++__i)
  132. (*this)[__i]._M_dot(__ostr, __i);
  133. __ostr << "}\n";
  134. return __ostr;
  135. }
  136. #endif
  137. template<typename _TraitsT>
  138. _StateIdT
  139. _NFA<_TraitsT>::_M_insert_backref(size_t __index)
  140. {
  141. if (this->_M_flags & regex_constants::__polynomial)
  142. __throw_regex_error(regex_constants::error_complexity,
  143. "Unexpected back-reference in polynomial mode.");
  144. // To figure out whether a backref is valid, a stack is used to store
  145. // unfinished sub-expressions. For example, when parsing
  146. // "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3
  147. // sub expressions are parsed or partially parsed(in the stack), aka,
  148. // "(a..", "(b)" and "(c..").
  149. // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
  150. // time, "\\2" is valid, but "\\1" and "\\3" are not.
  151. if (__index >= _M_subexpr_count)
  152. __throw_regex_error(
  153. regex_constants::error_backref,
  154. "Back-reference index exceeds current sub-expression count.");
  155. for (auto __it : this->_M_paren_stack)
  156. if (__index == __it)
  157. __throw_regex_error(
  158. regex_constants::error_backref,
  159. "Back-reference referred to an opened sub-expression.");
  160. this->_M_has_backref = true;
  161. _StateT __tmp(_S_opcode_backref);
  162. __tmp._M_backref_index = __index;
  163. return _M_insert_state(std::move(__tmp));
  164. }
  165. template<typename _TraitsT>
  166. void
  167. _NFA<_TraitsT>::_M_eliminate_dummy()
  168. {
  169. for (auto& __it : *this)
  170. {
  171. while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode()
  172. == _S_opcode_dummy)
  173. __it._M_next = (*this)[__it._M_next]._M_next;
  174. if (__it._M_has_alt())
  175. while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode()
  176. == _S_opcode_dummy)
  177. __it._M_alt = (*this)[__it._M_alt]._M_next;
  178. }
  179. }
  180. // Just apply DFS on the sequence and re-link their links.
  181. template<typename _TraitsT>
  182. _StateSeq<_TraitsT>
  183. _StateSeq<_TraitsT>::_M_clone()
  184. {
  185. std::map<_StateIdT, _StateIdT> __m;
  186. std::stack<_StateIdT> __stack;
  187. __stack.push(_M_start);
  188. while (!__stack.empty())
  189. {
  190. auto __u = __stack.top();
  191. __stack.pop();
  192. auto __dup = _M_nfa[__u];
  193. // _M_insert_state() never return -1
  194. auto __id = _M_nfa._M_insert_state(std::move(__dup));
  195. __m[__u] = __id;
  196. if (__dup._M_has_alt())
  197. if (__dup._M_alt != _S_invalid_state_id
  198. && __m.count(__dup._M_alt) == 0)
  199. __stack.push(__dup._M_alt);
  200. if (__u == _M_end)
  201. continue;
  202. if (__dup._M_next != _S_invalid_state_id
  203. && __m.count(__dup._M_next) == 0)
  204. __stack.push(__dup._M_next);
  205. }
  206. for (auto __it : __m)
  207. {
  208. auto __v = __it.second;
  209. auto& __ref = _M_nfa[__v];
  210. if (__ref._M_next != _S_invalid_state_id)
  211. __ref._M_next = __m.find(__ref._M_next)->second;
  212. if (__ref._M_has_alt() && __ref._M_alt != _S_invalid_state_id)
  213. __ref._M_alt = __m.find(__ref._M_alt)->second;
  214. }
  215. return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]);
  216. }
  217. } // namespace __detail
  218. _GLIBCXX_END_NAMESPACE_VERSION
  219. } // namespace