|
- // wstring_convert implementation -*- C++ -*-
-
- // Copyright (C) 2015-2020 Free Software Foundation, Inc.
- //
- // This file is part of the GNU ISO C++ Library. This library is free
- // software; you can redistribute it and/or modify it under the
- // terms of the GNU General Public License as published by the
- // Free Software Foundation; either version 3, or (at your option)
- // any later version.
-
- // This library is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
-
- // Under Section 7 of GPL version 3, you are granted additional
- // permissions described in the GCC Runtime Library Exception, version
- // 3.1, as published by the Free Software Foundation.
-
- // You should have received a copy of the GNU General Public License and
- // a copy of the GCC Runtime Library Exception along with this program;
- // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- // <http://www.gnu.org/licenses/>.
-
- /** @file bits/locale_conv.h
- * This is an internal header file, included by other library headers.
- * Do not attempt to use it directly. @headername{locale}
- */
-
- #ifndef _LOCALE_CONV_H
- #define _LOCALE_CONV_H 1
-
- #if __cplusplus < 201103L
- # include <bits/c++0x_warning.h>
- #else
-
- #include <streambuf>
- #include <bits/stringfwd.h>
- #include <bits/allocator.h>
- #include <bits/codecvt.h>
- #include <bits/unique_ptr.h>
-
- namespace std _GLIBCXX_VISIBILITY(default)
- {
- _GLIBCXX_BEGIN_NAMESPACE_VERSION
-
- /**
- * @addtogroup locales
- * @{
- */
-
- template<typename _OutStr, typename _InChar, typename _Codecvt,
- typename _State, typename _Fn>
- bool
- __do_str_codecvt(const _InChar* __first, const _InChar* __last,
- _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
- size_t& __count, _Fn __fn)
- {
- if (__first == __last)
- {
- __outstr.clear();
- __count = 0;
- return true;
- }
-
- size_t __outchars = 0;
- auto __next = __first;
- const auto __maxlen = __cvt.max_length() + 1;
-
- codecvt_base::result __result;
- do
- {
- __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
- auto __outnext = &__outstr.front() + __outchars;
- auto const __outlast = &__outstr.back() + 1;
- __result = (__cvt.*__fn)(__state, __next, __last, __next,
- __outnext, __outlast, __outnext);
- __outchars = __outnext - &__outstr.front();
- }
- while (__result == codecvt_base::partial && __next != __last
- && (__outstr.size() - __outchars) < __maxlen);
-
- if (__result == codecvt_base::error)
- {
- __count = __next - __first;
- return false;
- }
-
- // The codecvt facet will only return noconv when the types are
- // the same, so avoid instantiating basic_string::assign otherwise
- if _GLIBCXX17_CONSTEXPR (is_same<typename _Codecvt::intern_type,
- typename _Codecvt::extern_type>())
- if (__result == codecvt_base::noconv)
- {
- __outstr.assign(__first, __last);
- __count = __last - __first;
- return true;
- }
-
- __outstr.resize(__outchars);
- __count = __next - __first;
- return true;
- }
-
- // Convert narrow character string to wide.
- template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
- inline bool
- __str_codecvt_in(const char* __first, const char* __last,
- basic_string<_CharT, _Traits, _Alloc>& __outstr,
- const codecvt<_CharT, char, _State>& __cvt,
- _State& __state, size_t& __count)
- {
- using _Codecvt = codecvt<_CharT, char, _State>;
- using _ConvFn
- = codecvt_base::result
- (_Codecvt::*)(_State&, const char*, const char*, const char*&,
- _CharT*, _CharT*, _CharT*&) const;
- _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
- return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
- __count, __fn);
- }
-
- // As above, but with no __count parameter
- template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
- inline bool
- __str_codecvt_in(const char* __first, const char* __last,
- basic_string<_CharT, _Traits, _Alloc>& __outstr,
- const codecvt<_CharT, char, _State>& __cvt)
- {
- _State __state = {};
- size_t __n;
- return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
- }
-
- // As above, but returns false for partial conversion
- template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
- inline bool
- __str_codecvt_in_all(const char* __first, const char* __last,
- basic_string<_CharT, _Traits, _Alloc>& __outstr,
- const codecvt<_CharT, char, _State>& __cvt)
- {
- _State __state = {};
- size_t __n;
- return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n)
- && (__n == (__last - __first));
- }
-
- // Convert wide character string to narrow.
- template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
- inline bool
- __str_codecvt_out(const _CharT* __first, const _CharT* __last,
- basic_string<char, _Traits, _Alloc>& __outstr,
- const codecvt<_CharT, char, _State>& __cvt,
- _State& __state, size_t& __count)
- {
- using _Codecvt = codecvt<_CharT, char, _State>;
- using _ConvFn
- = codecvt_base::result
- (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
- char*, char*, char*&) const;
- _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
- return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
- __count, __fn);
- }
-
- // As above, but with no __count parameter
- template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
- inline bool
- __str_codecvt_out(const _CharT* __first, const _CharT* __last,
- basic_string<char, _Traits, _Alloc>& __outstr,
- const codecvt<_CharT, char, _State>& __cvt)
- {
- _State __state = {};
- size_t __n;
- return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
- }
-
- // As above, but returns false for partial conversions
- template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
- inline bool
- __str_codecvt_out_all(const _CharT* __first, const _CharT* __last,
- basic_string<char, _Traits, _Alloc>& __outstr,
- const codecvt<_CharT, char, _State>& __cvt)
- {
- _State __state = {};
- size_t __n;
- return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n)
- && (__n == (__last - __first));
- }
-
- #ifdef _GLIBCXX_USE_CHAR8_T
-
- // Convert wide character string to narrow.
- template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
- inline bool
- __str_codecvt_out(const _CharT* __first, const _CharT* __last,
- basic_string<char8_t, _Traits, _Alloc>& __outstr,
- const codecvt<_CharT, char8_t, _State>& __cvt,
- _State& __state, size_t& __count)
- {
- using _Codecvt = codecvt<_CharT, char8_t, _State>;
- using _ConvFn
- = codecvt_base::result
- (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
- char8_t*, char8_t*, char8_t*&) const;
- _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out;
- return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
- __count, __fn);
- }
-
- template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
- inline bool
- __str_codecvt_out(const _CharT* __first, const _CharT* __last,
- basic_string<char8_t, _Traits, _Alloc>& __outstr,
- const codecvt<_CharT, char8_t, _State>& __cvt)
- {
- _State __state = {};
- size_t __n;
- return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
- }
-
- #endif // _GLIBCXX_USE_CHAR8_T
-
- #ifdef _GLIBCXX_USE_WCHAR_T
-
- _GLIBCXX_BEGIN_NAMESPACE_CXX11
-
- /// String conversions
- template<typename _Codecvt, typename _Elem = wchar_t,
- typename _Wide_alloc = allocator<_Elem>,
- typename _Byte_alloc = allocator<char>>
- class wstring_convert
- {
- public:
- typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string;
- typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
- typedef typename _Codecvt::state_type state_type;
- typedef typename wide_string::traits_type::int_type int_type;
-
- /// Default constructor.
- wstring_convert() : _M_cvt(new _Codecvt()) { }
-
- /** Constructor.
- *
- * @param __pcvt The facet to use for conversions.
- *
- * Takes ownership of @p __pcvt and will delete it in the destructor.
- */
- explicit
- wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt)
- {
- if (!_M_cvt)
- __throw_logic_error("wstring_convert");
- }
-
- /** Construct with an initial converstion state.
- *
- * @param __pcvt The facet to use for conversions.
- * @param __state Initial conversion state.
- *
- * Takes ownership of @p __pcvt and will delete it in the destructor.
- * The object's conversion state will persist between conversions.
- */
- wstring_convert(_Codecvt* __pcvt, state_type __state)
- : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
- {
- if (!_M_cvt)
- __throw_logic_error("wstring_convert");
- }
-
- /** Construct with error strings.
- *
- * @param __byte_err A string to return on failed conversions.
- * @param __wide_err A wide string to return on failed conversions.
- */
- explicit
- wstring_convert(const byte_string& __byte_err,
- const wide_string& __wide_err = wide_string())
- : _M_cvt(new _Codecvt),
- _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
- _M_with_strings(true)
- {
- if (!_M_cvt)
- __throw_logic_error("wstring_convert");
- }
-
- ~wstring_convert() = default;
-
- // _GLIBCXX_RESOLVE_LIB_DEFECTS
- // 2176. Special members for wstring_convert and wbuffer_convert
- wstring_convert(const wstring_convert&) = delete;
- wstring_convert& operator=(const wstring_convert&) = delete;
-
- /// @{ Convert from bytes.
- wide_string
- from_bytes(char __byte)
- {
- char __bytes[2] = { __byte };
- return from_bytes(__bytes, __bytes+1);
- }
-
- wide_string
- from_bytes(const char* __ptr)
- { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
-
- wide_string
- from_bytes(const byte_string& __str)
- {
- auto __ptr = __str.data();
- return from_bytes(__ptr, __ptr + __str.size());
- }
-
- wide_string
- from_bytes(const char* __first, const char* __last)
- {
- if (!_M_with_cvtstate)
- _M_state = state_type();
- wide_string __out{ _M_wide_err_string.get_allocator() };
- if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
- _M_count))
- return __out;
- if (_M_with_strings)
- return _M_wide_err_string;
- __throw_range_error("wstring_convert::from_bytes");
- }
- /// @}
-
- /// @{ Convert to bytes.
- byte_string
- to_bytes(_Elem __wchar)
- {
- _Elem __wchars[2] = { __wchar };
- return to_bytes(__wchars, __wchars+1);
- }
-
- byte_string
- to_bytes(const _Elem* __ptr)
- {
- return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
- }
-
- byte_string
- to_bytes(const wide_string& __wstr)
- {
- auto __ptr = __wstr.data();
- return to_bytes(__ptr, __ptr + __wstr.size());
- }
-
- byte_string
- to_bytes(const _Elem* __first, const _Elem* __last)
- {
- if (!_M_with_cvtstate)
- _M_state = state_type();
- byte_string __out{ _M_byte_err_string.get_allocator() };
- if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
- _M_count))
- return __out;
- if (_M_with_strings)
- return _M_byte_err_string;
- __throw_range_error("wstring_convert::to_bytes");
- }
- /// @}
-
- // _GLIBCXX_RESOLVE_LIB_DEFECTS
- // 2174. wstring_convert::converted() should be noexcept
- /// The number of elements successfully converted in the last conversion.
- size_t converted() const noexcept { return _M_count; }
-
- /// The final conversion state of the last conversion.
- state_type state() const { return _M_state; }
-
- private:
- unique_ptr<_Codecvt> _M_cvt;
- byte_string _M_byte_err_string;
- wide_string _M_wide_err_string;
- state_type _M_state = state_type();
- size_t _M_count = 0;
- bool _M_with_cvtstate = false;
- bool _M_with_strings = false;
- };
-
- _GLIBCXX_END_NAMESPACE_CXX11
-
- /// Buffer conversions
- template<typename _Codecvt, typename _Elem = wchar_t,
- typename _Tr = char_traits<_Elem>>
- class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
- {
- typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
-
- public:
- typedef typename _Codecvt::state_type state_type;
-
- /// Default constructor.
- wbuffer_convert() : wbuffer_convert(nullptr) { }
-
- /** Constructor.
- *
- * @param __bytebuf The underlying byte stream buffer.
- * @param __pcvt The facet to use for conversions.
- * @param __state Initial conversion state.
- *
- * Takes ownership of @p __pcvt and will delete it in the destructor.
- */
- explicit
- wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
- state_type __state = state_type())
- : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
- {
- if (!_M_cvt)
- __throw_logic_error("wbuffer_convert");
-
- _M_always_noconv = _M_cvt->always_noconv();
-
- if (_M_buf)
- {
- this->setp(_M_put_area, _M_put_area + _S_buffer_length);
- this->setg(_M_get_area + _S_putback_length,
- _M_get_area + _S_putback_length,
- _M_get_area + _S_putback_length);
- }
- }
-
- ~wbuffer_convert() = default;
-
- // _GLIBCXX_RESOLVE_LIB_DEFECTS
- // 2176. Special members for wstring_convert and wbuffer_convert
- wbuffer_convert(const wbuffer_convert&) = delete;
- wbuffer_convert& operator=(const wbuffer_convert&) = delete;
-
- streambuf* rdbuf() const noexcept { return _M_buf; }
-
- streambuf*
- rdbuf(streambuf *__bytebuf) noexcept
- {
- auto __prev = _M_buf;
- _M_buf = __bytebuf;
- return __prev;
- }
-
- /// The conversion state following the last conversion.
- state_type state() const noexcept { return _M_state; }
-
- protected:
- int
- sync()
- { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
-
- typename _Wide_streambuf::int_type
- overflow(typename _Wide_streambuf::int_type __out)
- {
- if (!_M_buf || !_M_conv_put())
- return _Tr::eof();
- else if (!_Tr::eq_int_type(__out, _Tr::eof()))
- return this->sputc(__out);
- return _Tr::not_eof(__out);
- }
-
- typename _Wide_streambuf::int_type
- underflow()
- {
- if (!_M_buf)
- return _Tr::eof();
-
- if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
- return _Tr::to_int_type(*this->gptr());
- else
- return _Tr::eof();
- }
-
- streamsize
- xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
- {
- if (!_M_buf || __n == 0)
- return 0;
- streamsize __done = 0;
- do
- {
- auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
- __n - __done);
- _Tr::copy(this->pptr(), __s + __done, __nn);
- this->pbump(__nn);
- __done += __nn;
- } while (__done < __n && _M_conv_put());
- return __done;
- }
-
- private:
- // fill the get area from converted contents of the byte stream buffer
- bool
- _M_conv_get()
- {
- const streamsize __pb1 = this->gptr() - this->eback();
- const streamsize __pb2 = _S_putback_length;
- const streamsize __npb = std::min(__pb1, __pb2);
-
- _Tr::move(_M_get_area + _S_putback_length - __npb,
- this->gptr() - __npb, __npb);
-
- streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
- __nbytes = std::min(__nbytes, _M_buf->in_avail());
- if (__nbytes < 1)
- __nbytes = 1;
- __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
- if (__nbytes < 1)
- return false;
- __nbytes += _M_unconv;
-
- // convert _M_get_buf into _M_get_area
-
- _Elem* __outbuf = _M_get_area + _S_putback_length;
- _Elem* __outnext = __outbuf;
- const char* __bnext = _M_get_buf;
-
- codecvt_base::result __result;
- if (_M_always_noconv)
- __result = codecvt_base::noconv;
- else
- {
- _Elem* __outend = _M_get_area + _S_buffer_length;
-
- __result = _M_cvt->in(_M_state,
- __bnext, __bnext + __nbytes, __bnext,
- __outbuf, __outend, __outnext);
- }
-
- if (__result == codecvt_base::noconv)
- {
- // cast is safe because noconv means _Elem is same type as char
- auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
- _Tr::copy(__outbuf, __get_buf, __nbytes);
- _M_unconv = 0;
- return true;
- }
-
- if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
- char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
-
- this->setg(__outbuf, __outbuf, __outnext);
-
- return __result != codecvt_base::error;
- }
-
- // unused
- bool
- _M_put(...)
- { return false; }
-
- bool
- _M_put(const char* __p, streamsize __n)
- {
- if (_M_buf->sputn(__p, __n) < __n)
- return false;
- return true;
- }
-
- // convert the put area and write to the byte stream buffer
- bool
- _M_conv_put()
- {
- _Elem* const __first = this->pbase();
- const _Elem* const __last = this->pptr();
- const streamsize __pending = __last - __first;
-
- if (_M_always_noconv)
- return _M_put(__first, __pending);
-
- char __outbuf[2 * _S_buffer_length];
-
- const _Elem* __next = __first;
- const _Elem* __start;
- do
- {
- __start = __next;
- char* __outnext = __outbuf;
- char* const __outlast = __outbuf + sizeof(__outbuf);
- auto __result = _M_cvt->out(_M_state, __next, __last, __next,
- __outnext, __outlast, __outnext);
- if (__result == codecvt_base::error)
- return false;
- else if (__result == codecvt_base::noconv)
- return _M_put(__next, __pending);
-
- if (!_M_put(__outbuf, __outnext - __outbuf))
- return false;
- }
- while (__next != __last && __next != __start);
-
- if (__next != __last)
- _Tr::move(__first, __next, __last - __next);
-
- this->pbump(__first - __next);
- return __next != __first;
- }
-
- streambuf* _M_buf;
- unique_ptr<_Codecvt> _M_cvt;
- state_type _M_state;
-
- static const streamsize _S_buffer_length = 32;
- static const streamsize _S_putback_length = 3;
- _Elem _M_put_area[_S_buffer_length];
- _Elem _M_get_area[_S_buffer_length];
- streamsize _M_unconv = 0;
- char _M_get_buf[_S_buffer_length-_S_putback_length];
- bool _M_always_noconv;
- };
-
- #endif // _GLIBCXX_USE_WCHAR_T
-
- /// @} group locales
-
- _GLIBCXX_END_NAMESPACE_VERSION
- } // namespace
-
- #endif // __cplusplus
-
- #endif /* _LOCALE_CONV_H */
|