You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

619 lines
18KB

  1. // wstring_convert implementation -*- C++ -*-
  2. // Copyright (C) 2015-2020 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /** @file bits/locale_conv.h
  21. * This is an internal header file, included by other library headers.
  22. * Do not attempt to use it directly. @headername{locale}
  23. */
  24. #ifndef _LOCALE_CONV_H
  25. #define _LOCALE_CONV_H 1
  26. #if __cplusplus < 201103L
  27. # include <bits/c++0x_warning.h>
  28. #else
  29. #include <streambuf>
  30. #include <bits/stringfwd.h>
  31. #include <bits/allocator.h>
  32. #include <bits/codecvt.h>
  33. #include <bits/unique_ptr.h>
  34. namespace std _GLIBCXX_VISIBILITY(default)
  35. {
  36. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  37. /**
  38. * @addtogroup locales
  39. * @{
  40. */
  41. template<typename _OutStr, typename _InChar, typename _Codecvt,
  42. typename _State, typename _Fn>
  43. bool
  44. __do_str_codecvt(const _InChar* __first, const _InChar* __last,
  45. _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
  46. size_t& __count, _Fn __fn)
  47. {
  48. if (__first == __last)
  49. {
  50. __outstr.clear();
  51. __count = 0;
  52. return true;
  53. }
  54. size_t __outchars = 0;
  55. auto __next = __first;
  56. const auto __maxlen = __cvt.max_length() + 1;
  57. codecvt_base::result __result;
  58. do
  59. {
  60. __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
  61. auto __outnext = &__outstr.front() + __outchars;
  62. auto const __outlast = &__outstr.back() + 1;
  63. __result = (__cvt.*__fn)(__state, __next, __last, __next,
  64. __outnext, __outlast, __outnext);
  65. __outchars = __outnext - &__outstr.front();
  66. }
  67. while (__result == codecvt_base::partial && __next != __last
  68. && (__outstr.size() - __outchars) < __maxlen);
  69. if (__result == codecvt_base::error)
  70. {
  71. __count = __next - __first;
  72. return false;
  73. }
  74. // The codecvt facet will only return noconv when the types are
  75. // the same, so avoid instantiating basic_string::assign otherwise
  76. if _GLIBCXX17_CONSTEXPR (is_same<typename _Codecvt::intern_type,
  77. typename _Codecvt::extern_type>())
  78. if (__result == codecvt_base::noconv)
  79. {
  80. __outstr.assign(__first, __last);
  81. __count = __last - __first;
  82. return true;
  83. }
  84. __outstr.resize(__outchars);
  85. __count = __next - __first;
  86. return true;
  87. }
  88. // Convert narrow character string to wide.
  89. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  90. inline bool
  91. __str_codecvt_in(const char* __first, const char* __last,
  92. basic_string<_CharT, _Traits, _Alloc>& __outstr,
  93. const codecvt<_CharT, char, _State>& __cvt,
  94. _State& __state, size_t& __count)
  95. {
  96. using _Codecvt = codecvt<_CharT, char, _State>;
  97. using _ConvFn
  98. = codecvt_base::result
  99. (_Codecvt::*)(_State&, const char*, const char*, const char*&,
  100. _CharT*, _CharT*, _CharT*&) const;
  101. _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
  102. return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
  103. __count, __fn);
  104. }
  105. // As above, but with no __count parameter
  106. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  107. inline bool
  108. __str_codecvt_in(const char* __first, const char* __last,
  109. basic_string<_CharT, _Traits, _Alloc>& __outstr,
  110. const codecvt<_CharT, char, _State>& __cvt)
  111. {
  112. _State __state = {};
  113. size_t __n;
  114. return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
  115. }
  116. // As above, but returns false for partial conversion
  117. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  118. inline bool
  119. __str_codecvt_in_all(const char* __first, const char* __last,
  120. basic_string<_CharT, _Traits, _Alloc>& __outstr,
  121. const codecvt<_CharT, char, _State>& __cvt)
  122. {
  123. _State __state = {};
  124. size_t __n;
  125. return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n)
  126. && (__n == (__last - __first));
  127. }
  128. // Convert wide character string to narrow.
  129. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  130. inline bool
  131. __str_codecvt_out(const _CharT* __first, const _CharT* __last,
  132. basic_string<char, _Traits, _Alloc>& __outstr,
  133. const codecvt<_CharT, char, _State>& __cvt,
  134. _State& __state, size_t& __count)
  135. {
  136. using _Codecvt = codecvt<_CharT, char, _State>;
  137. using _ConvFn
  138. = codecvt_base::result
  139. (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
  140. char*, char*, char*&) const;
  141. _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
  142. return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
  143. __count, __fn);
  144. }
  145. // As above, but with no __count parameter
  146. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  147. inline bool
  148. __str_codecvt_out(const _CharT* __first, const _CharT* __last,
  149. basic_string<char, _Traits, _Alloc>& __outstr,
  150. const codecvt<_CharT, char, _State>& __cvt)
  151. {
  152. _State __state = {};
  153. size_t __n;
  154. return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
  155. }
  156. // As above, but returns false for partial conversions
  157. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  158. inline bool
  159. __str_codecvt_out_all(const _CharT* __first, const _CharT* __last,
  160. basic_string<char, _Traits, _Alloc>& __outstr,
  161. const codecvt<_CharT, char, _State>& __cvt)
  162. {
  163. _State __state = {};
  164. size_t __n;
  165. return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n)
  166. && (__n == (__last - __first));
  167. }
  168. #ifdef _GLIBCXX_USE_CHAR8_T
  169. // Convert wide character string to narrow.
  170. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  171. inline bool
  172. __str_codecvt_out(const _CharT* __first, const _CharT* __last,
  173. basic_string<char8_t, _Traits, _Alloc>& __outstr,
  174. const codecvt<_CharT, char8_t, _State>& __cvt,
  175. _State& __state, size_t& __count)
  176. {
  177. using _Codecvt = codecvt<_CharT, char8_t, _State>;
  178. using _ConvFn
  179. = codecvt_base::result
  180. (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
  181. char8_t*, char8_t*, char8_t*&) const;
  182. _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out;
  183. return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
  184. __count, __fn);
  185. }
  186. template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
  187. inline bool
  188. __str_codecvt_out(const _CharT* __first, const _CharT* __last,
  189. basic_string<char8_t, _Traits, _Alloc>& __outstr,
  190. const codecvt<_CharT, char8_t, _State>& __cvt)
  191. {
  192. _State __state = {};
  193. size_t __n;
  194. return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
  195. }
  196. #endif // _GLIBCXX_USE_CHAR8_T
  197. #ifdef _GLIBCXX_USE_WCHAR_T
  198. _GLIBCXX_BEGIN_NAMESPACE_CXX11
  199. /// String conversions
  200. template<typename _Codecvt, typename _Elem = wchar_t,
  201. typename _Wide_alloc = allocator<_Elem>,
  202. typename _Byte_alloc = allocator<char>>
  203. class wstring_convert
  204. {
  205. public:
  206. typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string;
  207. typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
  208. typedef typename _Codecvt::state_type state_type;
  209. typedef typename wide_string::traits_type::int_type int_type;
  210. /// Default constructor.
  211. wstring_convert() : _M_cvt(new _Codecvt()) { }
  212. /** Constructor.
  213. *
  214. * @param __pcvt The facet to use for conversions.
  215. *
  216. * Takes ownership of @p __pcvt and will delete it in the destructor.
  217. */
  218. explicit
  219. wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt)
  220. {
  221. if (!_M_cvt)
  222. __throw_logic_error("wstring_convert");
  223. }
  224. /** Construct with an initial converstion state.
  225. *
  226. * @param __pcvt The facet to use for conversions.
  227. * @param __state Initial conversion state.
  228. *
  229. * Takes ownership of @p __pcvt and will delete it in the destructor.
  230. * The object's conversion state will persist between conversions.
  231. */
  232. wstring_convert(_Codecvt* __pcvt, state_type __state)
  233. : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
  234. {
  235. if (!_M_cvt)
  236. __throw_logic_error("wstring_convert");
  237. }
  238. /** Construct with error strings.
  239. *
  240. * @param __byte_err A string to return on failed conversions.
  241. * @param __wide_err A wide string to return on failed conversions.
  242. */
  243. explicit
  244. wstring_convert(const byte_string& __byte_err,
  245. const wide_string& __wide_err = wide_string())
  246. : _M_cvt(new _Codecvt),
  247. _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
  248. _M_with_strings(true)
  249. {
  250. if (!_M_cvt)
  251. __throw_logic_error("wstring_convert");
  252. }
  253. ~wstring_convert() = default;
  254. // _GLIBCXX_RESOLVE_LIB_DEFECTS
  255. // 2176. Special members for wstring_convert and wbuffer_convert
  256. wstring_convert(const wstring_convert&) = delete;
  257. wstring_convert& operator=(const wstring_convert&) = delete;
  258. /// @{ Convert from bytes.
  259. wide_string
  260. from_bytes(char __byte)
  261. {
  262. char __bytes[2] = { __byte };
  263. return from_bytes(__bytes, __bytes+1);
  264. }
  265. wide_string
  266. from_bytes(const char* __ptr)
  267. { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
  268. wide_string
  269. from_bytes(const byte_string& __str)
  270. {
  271. auto __ptr = __str.data();
  272. return from_bytes(__ptr, __ptr + __str.size());
  273. }
  274. wide_string
  275. from_bytes(const char* __first, const char* __last)
  276. {
  277. if (!_M_with_cvtstate)
  278. _M_state = state_type();
  279. wide_string __out{ _M_wide_err_string.get_allocator() };
  280. if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
  281. _M_count))
  282. return __out;
  283. if (_M_with_strings)
  284. return _M_wide_err_string;
  285. __throw_range_error("wstring_convert::from_bytes");
  286. }
  287. /// @}
  288. /// @{ Convert to bytes.
  289. byte_string
  290. to_bytes(_Elem __wchar)
  291. {
  292. _Elem __wchars[2] = { __wchar };
  293. return to_bytes(__wchars, __wchars+1);
  294. }
  295. byte_string
  296. to_bytes(const _Elem* __ptr)
  297. {
  298. return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
  299. }
  300. byte_string
  301. to_bytes(const wide_string& __wstr)
  302. {
  303. auto __ptr = __wstr.data();
  304. return to_bytes(__ptr, __ptr + __wstr.size());
  305. }
  306. byte_string
  307. to_bytes(const _Elem* __first, const _Elem* __last)
  308. {
  309. if (!_M_with_cvtstate)
  310. _M_state = state_type();
  311. byte_string __out{ _M_byte_err_string.get_allocator() };
  312. if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
  313. _M_count))
  314. return __out;
  315. if (_M_with_strings)
  316. return _M_byte_err_string;
  317. __throw_range_error("wstring_convert::to_bytes");
  318. }
  319. /// @}
  320. // _GLIBCXX_RESOLVE_LIB_DEFECTS
  321. // 2174. wstring_convert::converted() should be noexcept
  322. /// The number of elements successfully converted in the last conversion.
  323. size_t converted() const noexcept { return _M_count; }
  324. /// The final conversion state of the last conversion.
  325. state_type state() const { return _M_state; }
  326. private:
  327. unique_ptr<_Codecvt> _M_cvt;
  328. byte_string _M_byte_err_string;
  329. wide_string _M_wide_err_string;
  330. state_type _M_state = state_type();
  331. size_t _M_count = 0;
  332. bool _M_with_cvtstate = false;
  333. bool _M_with_strings = false;
  334. };
  335. _GLIBCXX_END_NAMESPACE_CXX11
  336. /// Buffer conversions
  337. template<typename _Codecvt, typename _Elem = wchar_t,
  338. typename _Tr = char_traits<_Elem>>
  339. class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
  340. {
  341. typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
  342. public:
  343. typedef typename _Codecvt::state_type state_type;
  344. /// Default constructor.
  345. wbuffer_convert() : wbuffer_convert(nullptr) { }
  346. /** Constructor.
  347. *
  348. * @param __bytebuf The underlying byte stream buffer.
  349. * @param __pcvt The facet to use for conversions.
  350. * @param __state Initial conversion state.
  351. *
  352. * Takes ownership of @p __pcvt and will delete it in the destructor.
  353. */
  354. explicit
  355. wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
  356. state_type __state = state_type())
  357. : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
  358. {
  359. if (!_M_cvt)
  360. __throw_logic_error("wbuffer_convert");
  361. _M_always_noconv = _M_cvt->always_noconv();
  362. if (_M_buf)
  363. {
  364. this->setp(_M_put_area, _M_put_area + _S_buffer_length);
  365. this->setg(_M_get_area + _S_putback_length,
  366. _M_get_area + _S_putback_length,
  367. _M_get_area + _S_putback_length);
  368. }
  369. }
  370. ~wbuffer_convert() = default;
  371. // _GLIBCXX_RESOLVE_LIB_DEFECTS
  372. // 2176. Special members for wstring_convert and wbuffer_convert
  373. wbuffer_convert(const wbuffer_convert&) = delete;
  374. wbuffer_convert& operator=(const wbuffer_convert&) = delete;
  375. streambuf* rdbuf() const noexcept { return _M_buf; }
  376. streambuf*
  377. rdbuf(streambuf *__bytebuf) noexcept
  378. {
  379. auto __prev = _M_buf;
  380. _M_buf = __bytebuf;
  381. return __prev;
  382. }
  383. /// The conversion state following the last conversion.
  384. state_type state() const noexcept { return _M_state; }
  385. protected:
  386. int
  387. sync()
  388. { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
  389. typename _Wide_streambuf::int_type
  390. overflow(typename _Wide_streambuf::int_type __out)
  391. {
  392. if (!_M_buf || !_M_conv_put())
  393. return _Tr::eof();
  394. else if (!_Tr::eq_int_type(__out, _Tr::eof()))
  395. return this->sputc(__out);
  396. return _Tr::not_eof(__out);
  397. }
  398. typename _Wide_streambuf::int_type
  399. underflow()
  400. {
  401. if (!_M_buf)
  402. return _Tr::eof();
  403. if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
  404. return _Tr::to_int_type(*this->gptr());
  405. else
  406. return _Tr::eof();
  407. }
  408. streamsize
  409. xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
  410. {
  411. if (!_M_buf || __n == 0)
  412. return 0;
  413. streamsize __done = 0;
  414. do
  415. {
  416. auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
  417. __n - __done);
  418. _Tr::copy(this->pptr(), __s + __done, __nn);
  419. this->pbump(__nn);
  420. __done += __nn;
  421. } while (__done < __n && _M_conv_put());
  422. return __done;
  423. }
  424. private:
  425. // fill the get area from converted contents of the byte stream buffer
  426. bool
  427. _M_conv_get()
  428. {
  429. const streamsize __pb1 = this->gptr() - this->eback();
  430. const streamsize __pb2 = _S_putback_length;
  431. const streamsize __npb = std::min(__pb1, __pb2);
  432. _Tr::move(_M_get_area + _S_putback_length - __npb,
  433. this->gptr() - __npb, __npb);
  434. streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
  435. __nbytes = std::min(__nbytes, _M_buf->in_avail());
  436. if (__nbytes < 1)
  437. __nbytes = 1;
  438. __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
  439. if (__nbytes < 1)
  440. return false;
  441. __nbytes += _M_unconv;
  442. // convert _M_get_buf into _M_get_area
  443. _Elem* __outbuf = _M_get_area + _S_putback_length;
  444. _Elem* __outnext = __outbuf;
  445. const char* __bnext = _M_get_buf;
  446. codecvt_base::result __result;
  447. if (_M_always_noconv)
  448. __result = codecvt_base::noconv;
  449. else
  450. {
  451. _Elem* __outend = _M_get_area + _S_buffer_length;
  452. __result = _M_cvt->in(_M_state,
  453. __bnext, __bnext + __nbytes, __bnext,
  454. __outbuf, __outend, __outnext);
  455. }
  456. if (__result == codecvt_base::noconv)
  457. {
  458. // cast is safe because noconv means _Elem is same type as char
  459. auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
  460. _Tr::copy(__outbuf, __get_buf, __nbytes);
  461. _M_unconv = 0;
  462. return true;
  463. }
  464. if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
  465. char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
  466. this->setg(__outbuf, __outbuf, __outnext);
  467. return __result != codecvt_base::error;
  468. }
  469. // unused
  470. bool
  471. _M_put(...)
  472. { return false; }
  473. bool
  474. _M_put(const char* __p, streamsize __n)
  475. {
  476. if (_M_buf->sputn(__p, __n) < __n)
  477. return false;
  478. return true;
  479. }
  480. // convert the put area and write to the byte stream buffer
  481. bool
  482. _M_conv_put()
  483. {
  484. _Elem* const __first = this->pbase();
  485. const _Elem* const __last = this->pptr();
  486. const streamsize __pending = __last - __first;
  487. if (_M_always_noconv)
  488. return _M_put(__first, __pending);
  489. char __outbuf[2 * _S_buffer_length];
  490. const _Elem* __next = __first;
  491. const _Elem* __start;
  492. do
  493. {
  494. __start = __next;
  495. char* __outnext = __outbuf;
  496. char* const __outlast = __outbuf + sizeof(__outbuf);
  497. auto __result = _M_cvt->out(_M_state, __next, __last, __next,
  498. __outnext, __outlast, __outnext);
  499. if (__result == codecvt_base::error)
  500. return false;
  501. else if (__result == codecvt_base::noconv)
  502. return _M_put(__next, __pending);
  503. if (!_M_put(__outbuf, __outnext - __outbuf))
  504. return false;
  505. }
  506. while (__next != __last && __next != __start);
  507. if (__next != __last)
  508. _Tr::move(__first, __next, __last - __next);
  509. this->pbump(__first - __next);
  510. return __next != __first;
  511. }
  512. streambuf* _M_buf;
  513. unique_ptr<_Codecvt> _M_cvt;
  514. state_type _M_state;
  515. static const streamsize _S_buffer_length = 32;
  516. static const streamsize _S_putback_length = 3;
  517. _Elem _M_put_area[_S_buffer_length];
  518. _Elem _M_get_area[_S_buffer_length];
  519. streamsize _M_unconv = 0;
  520. char _M_get_buf[_S_buffer_length-_S_putback_length];
  521. bool _M_always_noconv;
  522. };
  523. #endif // _GLIBCXX_USE_WCHAR_T
  524. /// @} group locales
  525. _GLIBCXX_END_NAMESPACE_VERSION
  526. } // namespace
  527. #endif // __cplusplus
  528. #endif /* _LOCALE_CONV_H */