|
- // Locale support (codecvt) -*- C++ -*-
-
- // Copyright (C) 2000-2020 Free Software Foundation, Inc.
- //
- // This file is part of the GNU ISO C++ Library. This library is free
- // software; you can redistribute it and/or modify it under the
- // terms of the GNU General Public License as published by the
- // Free Software Foundation; either version 3, or (at your option)
- // any later version.
-
- // This library is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
-
- // Under Section 7 of GPL version 3, you are granted additional
- // permissions described in the GCC Runtime Library Exception, version
- // 3.1, as published by the Free Software Foundation.
-
- // You should have received a copy of the GNU General Public License and
- // a copy of the GCC Runtime Library Exception along with this program;
- // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- // <http://www.gnu.org/licenses/>.
-
- //
- // ISO C++ 14882: 22.2.1.5 Template class codecvt
- //
-
- // Written by Benjamin Kosnik <bkoz@redhat.com>
-
- /** @file ext/codecvt_specializations.h
- * This file is a GNU extension to the Standard C++ Library.
- */
-
- #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
- #define _EXT_CODECVT_SPECIALIZATIONS_H 1
-
- #include <bits/c++config.h>
- #include <locale>
- #include <iconv.h>
-
- namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
- {
- _GLIBCXX_BEGIN_NAMESPACE_VERSION
- _GLIBCXX_BEGIN_NAMESPACE_CXX11
-
- /// Extension to use iconv for dealing with character encodings.
- // This includes conversions and comparisons between various character
- // sets. This object encapsulates data that may need to be shared between
- // char_traits, codecvt and ctype.
- class encoding_state
- {
- public:
- // Types:
- // NB: A conversion descriptor subsumes and enhances the
- // functionality of a simple state type such as mbstate_t.
- typedef iconv_t descriptor_type;
-
- protected:
- // Name of internal character set encoding.
- std::string _M_int_enc;
-
- // Name of external character set encoding.
- std::string _M_ext_enc;
-
- // Conversion descriptor between external encoding to internal encoding.
- descriptor_type _M_in_desc;
-
- // Conversion descriptor between internal encoding to external encoding.
- descriptor_type _M_out_desc;
-
- // The byte-order marker for the external encoding, if necessary.
- int _M_ext_bom;
-
- // The byte-order marker for the internal encoding, if necessary.
- int _M_int_bom;
-
- // Number of external bytes needed to construct one complete
- // character in the internal encoding.
- // NB: -1 indicates variable, or stateful, encodings.
- int _M_bytes;
-
- public:
- explicit
- encoding_state()
- : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
- { }
-
- explicit
- encoding_state(const char* __int, const char* __ext,
- int __ibom = 0, int __ebom = 0, int __bytes = 1)
- : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
- _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
- { init(); }
-
- // 21.1.2 traits typedefs
- // p4
- // typedef STATE_T state_type
- // requires: state_type shall meet the requirements of
- // CopyConstructible types (20.1.3)
- // NB: This does not preserve the actual state of the conversion
- // descriptor member, but it does duplicate the encoding
- // information.
- encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
- { construct(__obj); }
-
- // Need assignment operator as well.
- encoding_state&
- operator=(const encoding_state& __obj)
- {
- construct(__obj);
- return *this;
- }
-
- ~encoding_state()
- { destroy(); }
-
- bool
- good() const throw()
- {
- const descriptor_type __err = (iconv_t)(-1);
- bool __test = _M_in_desc && _M_in_desc != __err;
- __test &= _M_out_desc && _M_out_desc != __err;
- return __test;
- }
-
- int
- character_ratio() const
- { return _M_bytes; }
-
- const std::string
- internal_encoding() const
- { return _M_int_enc; }
-
- int
- internal_bom() const
- { return _M_int_bom; }
-
- const std::string
- external_encoding() const
- { return _M_ext_enc; }
-
- int
- external_bom() const
- { return _M_ext_bom; }
-
- const descriptor_type&
- in_descriptor() const
- { return _M_in_desc; }
-
- const descriptor_type&
- out_descriptor() const
- { return _M_out_desc; }
-
- protected:
- void
- init()
- {
- const descriptor_type __err = (iconv_t)(-1);
- const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
- if (!_M_in_desc && __have_encodings)
- {
- _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
- if (_M_in_desc == __err)
- std::__throw_runtime_error(__N("encoding_state::_M_init "
- "creating iconv input descriptor failed"));
- }
- if (!_M_out_desc && __have_encodings)
- {
- _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
- if (_M_out_desc == __err)
- std::__throw_runtime_error(__N("encoding_state::_M_init "
- "creating iconv output descriptor failed"));
- }
- }
-
- void
- construct(const encoding_state& __obj)
- {
- destroy();
- _M_int_enc = __obj._M_int_enc;
- _M_ext_enc = __obj._M_ext_enc;
- _M_ext_bom = __obj._M_ext_bom;
- _M_int_bom = __obj._M_int_bom;
- _M_bytes = __obj._M_bytes;
- init();
- }
-
- void
- destroy() throw()
- {
- const descriptor_type __err = (iconv_t)(-1);
- if (_M_in_desc && _M_in_desc != __err)
- {
- iconv_close(_M_in_desc);
- _M_in_desc = 0;
- }
- if (_M_out_desc && _M_out_desc != __err)
- {
- iconv_close(_M_out_desc);
- _M_out_desc = 0;
- }
- }
- };
-
- /// encoding_char_traits
- // Custom traits type with encoding_state for the state type, and the
- // associated fpos<encoding_state> for the position type, all other
- // bits equivalent to the required char_traits instantiations.
- template<typename _CharT>
- struct encoding_char_traits
- : public std::char_traits<_CharT>
- {
- typedef encoding_state state_type;
- typedef typename std::fpos<state_type> pos_type;
- };
-
- _GLIBCXX_END_NAMESPACE_CXX11
- _GLIBCXX_END_NAMESPACE_VERSION
- } // namespace
-
-
- namespace std _GLIBCXX_VISIBILITY(default)
- {
- _GLIBCXX_BEGIN_NAMESPACE_VERSION
-
- using __gnu_cxx::encoding_state;
-
- /// codecvt<InternT, _ExternT, encoding_state> specialization.
- // This partial specialization takes advantage of iconv to provide
- // code conversions between a large number of character encodings.
- template<typename _InternT, typename _ExternT>
- class codecvt<_InternT, _ExternT, encoding_state>
- : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
- {
- public:
- // Types:
- typedef codecvt_base::result result;
- typedef _InternT intern_type;
- typedef _ExternT extern_type;
- typedef __gnu_cxx::encoding_state state_type;
- typedef state_type::descriptor_type descriptor_type;
-
- // Data Members:
- static locale::id id;
-
- explicit
- codecvt(size_t __refs = 0)
- : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
- { }
-
- explicit
- codecvt(state_type& __enc, size_t __refs = 0)
- : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
- { }
-
- protected:
- virtual
- ~codecvt() { }
-
- virtual result
- do_out(state_type& __state, const intern_type* __from,
- const intern_type* __from_end, const intern_type*& __from_next,
- extern_type* __to, extern_type* __to_end,
- extern_type*& __to_next) const;
-
- virtual result
- do_unshift(state_type& __state, extern_type* __to,
- extern_type* __to_end, extern_type*& __to_next) const;
-
- virtual result
- do_in(state_type& __state, const extern_type* __from,
- const extern_type* __from_end, const extern_type*& __from_next,
- intern_type* __to, intern_type* __to_end,
- intern_type*& __to_next) const;
-
- virtual int
- do_encoding() const throw();
-
- virtual bool
- do_always_noconv() const throw();
-
- virtual int
- do_length(state_type&, const extern_type* __from,
- const extern_type* __end, size_t __max) const;
-
- virtual int
- do_max_length() const throw();
- };
-
- template<typename _InternT, typename _ExternT>
- locale::id
- codecvt<_InternT, _ExternT, encoding_state>::id;
-
- // This adaptor works around the signature problems of the second
- // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2
- // uses 'char**', which matches the POSIX 1003.1-2001 standard.
- // Using this adaptor, g++ will do the work for us.
- template<typename _Tp>
- inline size_t
- __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
- iconv_t __cd, char** __inbuf, size_t* __inbytes,
- char** __outbuf, size_t* __outbytes)
- { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
-
- template<typename _InternT, typename _ExternT>
- codecvt_base::result
- codecvt<_InternT, _ExternT, encoding_state>::
- do_out(state_type& __state, const intern_type* __from,
- const intern_type* __from_end, const intern_type*& __from_next,
- extern_type* __to, extern_type* __to_end,
- extern_type*& __to_next) const
- {
- result __ret = codecvt_base::error;
- if (__state.good())
- {
- const descriptor_type& __desc = __state.out_descriptor();
- const size_t __fmultiple = sizeof(intern_type);
- size_t __fbytes = __fmultiple * (__from_end - __from);
- const size_t __tmultiple = sizeof(extern_type);
- size_t __tbytes = __tmultiple * (__to_end - __to);
-
- // Argument list for iconv specifies a byte sequence. Thus,
- // all to/from arrays must be brutally casted to char*.
- char* __cto = reinterpret_cast<char*>(__to);
- char* __cfrom;
- size_t __conv;
-
- // Some encodings need a byte order marker as the first item
- // in the byte stream, to designate endian-ness. The default
- // value for the byte order marker is NULL, so if this is
- // the case, it's not necessary and we can just go on our
- // merry way.
- int __int_bom = __state.internal_bom();
- if (__int_bom)
- {
- size_t __size = __from_end - __from;
- intern_type* __cfixed = static_cast<intern_type*>
- (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
- __cfixed[0] = static_cast<intern_type>(__int_bom);
- char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
- __cfrom = reinterpret_cast<char*>(__cfixed);
- __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
- &__fbytes, &__cto, &__tbytes);
- }
- else
- {
- intern_type* __cfixed = const_cast<intern_type*>(__from);
- __cfrom = reinterpret_cast<char*>(__cfixed);
- __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
- &__cto, &__tbytes);
- }
-
- if (__conv != size_t(-1))
- {
- __from_next = reinterpret_cast<const intern_type*>(__cfrom);
- __to_next = reinterpret_cast<extern_type*>(__cto);
- __ret = codecvt_base::ok;
- }
- else
- {
- if (__fbytes < __fmultiple * (__from_end - __from))
- {
- __from_next = reinterpret_cast<const intern_type*>(__cfrom);
- __to_next = reinterpret_cast<extern_type*>(__cto);
- __ret = codecvt_base::partial;
- }
- else
- __ret = codecvt_base::error;
- }
- }
- return __ret;
- }
-
- template<typename _InternT, typename _ExternT>
- codecvt_base::result
- codecvt<_InternT, _ExternT, encoding_state>::
- do_unshift(state_type& __state, extern_type* __to,
- extern_type* __to_end, extern_type*& __to_next) const
- {
- result __ret = codecvt_base::error;
- if (__state.good())
- {
- const descriptor_type& __desc = __state.in_descriptor();
- const size_t __tmultiple = sizeof(intern_type);
- size_t __tlen = __tmultiple * (__to_end - __to);
-
- // Argument list for iconv specifies a byte sequence. Thus,
- // all to/from arrays must be brutally casted to char*.
- char* __cto = reinterpret_cast<char*>(__to);
- size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0,
- &__cto, &__tlen);
-
- if (__conv != size_t(-1))
- {
- __to_next = reinterpret_cast<extern_type*>(__cto);
- if (__tlen == __tmultiple * (__to_end - __to))
- __ret = codecvt_base::noconv;
- else if (__tlen == 0)
- __ret = codecvt_base::ok;
- else
- __ret = codecvt_base::partial;
- }
- else
- __ret = codecvt_base::error;
- }
- return __ret;
- }
-
- template<typename _InternT, typename _ExternT>
- codecvt_base::result
- codecvt<_InternT, _ExternT, encoding_state>::
- do_in(state_type& __state, const extern_type* __from,
- const extern_type* __from_end, const extern_type*& __from_next,
- intern_type* __to, intern_type* __to_end,
- intern_type*& __to_next) const
- {
- result __ret = codecvt_base::error;
- if (__state.good())
- {
- const descriptor_type& __desc = __state.in_descriptor();
- const size_t __fmultiple = sizeof(extern_type);
- size_t __flen = __fmultiple * (__from_end - __from);
- const size_t __tmultiple = sizeof(intern_type);
- size_t __tlen = __tmultiple * (__to_end - __to);
-
- // Argument list for iconv specifies a byte sequence. Thus,
- // all to/from arrays must be brutally casted to char*.
- char* __cto = reinterpret_cast<char*>(__to);
- char* __cfrom;
- size_t __conv;
-
- // Some encodings need a byte order marker as the first item
- // in the byte stream, to designate endian-ness. The default
- // value for the byte order marker is NULL, so if this is
- // the case, it's not necessary and we can just go on our
- // merry way.
- int __ext_bom = __state.external_bom();
- if (__ext_bom)
- {
- size_t __size = __from_end - __from;
- extern_type* __cfixed = static_cast<extern_type*>
- (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
- __cfixed[0] = static_cast<extern_type>(__ext_bom);
- char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
- __cfrom = reinterpret_cast<char*>(__cfixed);
- __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
- &__flen, &__cto, &__tlen);
- }
- else
- {
- extern_type* __cfixed = const_cast<extern_type*>(__from);
- __cfrom = reinterpret_cast<char*>(__cfixed);
- __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
- &__flen, &__cto, &__tlen);
- }
-
-
- if (__conv != size_t(-1))
- {
- __from_next = reinterpret_cast<const extern_type*>(__cfrom);
- __to_next = reinterpret_cast<intern_type*>(__cto);
- __ret = codecvt_base::ok;
- }
- else
- {
- if (__flen < static_cast<size_t>(__from_end - __from))
- {
- __from_next = reinterpret_cast<const extern_type*>(__cfrom);
- __to_next = reinterpret_cast<intern_type*>(__cto);
- __ret = codecvt_base::partial;
- }
- else
- __ret = codecvt_base::error;
- }
- }
- return __ret;
- }
-
- template<typename _InternT, typename _ExternT>
- int
- codecvt<_InternT, _ExternT, encoding_state>::
- do_encoding() const throw()
- {
- int __ret = 0;
- if (sizeof(_ExternT) <= sizeof(_InternT))
- __ret = sizeof(_InternT) / sizeof(_ExternT);
- return __ret;
- }
-
- template<typename _InternT, typename _ExternT>
- bool
- codecvt<_InternT, _ExternT, encoding_state>::
- do_always_noconv() const throw()
- { return false; }
-
- template<typename _InternT, typename _ExternT>
- int
- codecvt<_InternT, _ExternT, encoding_state>::
- do_length(state_type&, const extern_type* __from,
- const extern_type* __end, size_t __max) const
- { return std::min(__max, static_cast<size_t>(__end - __from)); }
-
- // _GLIBCXX_RESOLVE_LIB_DEFECTS
- // 74. Garbled text for codecvt::do_max_length
- template<typename _InternT, typename _ExternT>
- int
- codecvt<_InternT, _ExternT, encoding_state>::
- do_max_length() const throw()
- { return 1; }
-
- _GLIBCXX_END_NAMESPACE_VERSION
- } // namespace
-
- #endif
|