Warning: mysqli::__construct(): (HY000/1203): User howardkn already has more than 'max_user_connections' active connections in D:\Inetpub\vhosts\howardknight.net\al.howardknight.net\includes\artfuncs.php on line 21
Failed to connect to MySQL: (1203) User howardkn already has more than 'max_user_connections' active connections
Warning: mysqli::query(): Couldn't fetch mysqli in D:\Inetpub\vhosts\howardknight.net\al.howardknight.net\index.php on line 66
Article <vavsr7$14n11$1@raubtier-asyl.eternal-september.org>
Deutsch   English   Français   Italiano  
<vavsr7$14n11$1@raubtier-asyl.eternal-september.org>

View for Bookmarking (what is this?)
Look up another Usenet article

Path: news.eternal-september.org!eternal-september.org!news.eternal-september.org!raubtier-asyl.eternal-september.org!.POSTED!not-for-mail
From: Bonita Montero <Bonita.Montero@gmail.com>
Newsgroups: comp.lang.c++
Subject: UTF16 <-> UTF32
Date: Sat, 31 Aug 2024 22:01:43 +0200
Organization: A noiseless patient Spider
Lines: 98
Message-ID: <vavsr7$14n11$1@raubtier-asyl.eternal-september.org>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8; format=flowed
Content-Transfer-Encoding: 7bit
Injection-Date: Sat, 31 Aug 2024 22:01:43 +0200 (CEST)
Injection-Info: raubtier-asyl.eternal-september.org; posting-host="f3ac3daf96e4190349ad23da2cc878b7";
	logging-data="1203233"; mail-complaints-to="abuse@eternal-september.org";	posting-account="U2FsdGVkX1/hX0msLSj+AphfevzKJq9EU2QBSuEKRZ4="
User-Agent: Mozilla Thunderbird
Cancel-Lock: sha1:EgpdMNRGlNldQxnwN0RMJGm4FH8=
Content-Language: de-DE

Today I needed conversion functions from UTF32 to UTF16 and in the
opposite direction. I wanted to allow result-string re-usage and
decided to give the result string as a reference-parameter. This
would help the result string to keep its capacity.
I think there's no way to implement that code faster.


bool u16ToU32( u16string_view str, u32string &u32Str )
{
	auto iterate = [&]<bool Err>( bool_constant<Err>, auto fn ) -> bool
	{
		constexpr char16_t
			SURR_HDR_MSK = 0xF800,
			HIGH_SURR = 0xD800,
			SURR_HDR = HIGH_SURR,
			LOW_SURR  = 0xDC00,
			SURR_MASK = 0xFC00;
		for( auto it = str.begin(), end = str.end(); it != end; )
			if( (*it & SURR_HDR_MSK) != SURR_HDR ) [[likely]]
				fn( (char32_t)*it++ );
			else
			{
				if( Err && (*it & SURR_MASK) != HIGH_SURR ) [[unlikely]]
					return false;
				if( Err && it + 1 == end ) [[unlikely]]
					return false;
				if( Err && (it[1] & SURR_MASK) != LOW_SURR ) [[unlikely]]
					return false;
				fn( 0x10000 + ((char32_t)(*it & ~SURR_MASK) << 10 | (char32_t)(it[1] 
& ~SURR_MASK)) );
				it += 2;
			}
		return true;
	};
	size_t n = 0;
	if( !iterate( true_type(), [&]( char32_t ) { ++n; } ) )
		return false;
	u32Str.resize_and_overwrite( n, [&]( char32_t *p, size_t n )
		{
			auto it = span( p, n ).begin();
			iterate( false_type(), [&]( char32_t c ) { *it++ = c; } );
			return n;
		} );
	return true;
}

pair<bool, u32string> u16ToU32( u16string_view str )
{
	u32string u32Str;
	if( !u16ToU32( str, u32Str ) ) [[unlikely]]
		return { false, {} };
	return { true, move( u32Str ) };
}

bool u32ToU16( u32string_view str, u16string &u16Str )
{
	auto iterate = [&]<bool Err>( bool_constant<Err>, auto fn ) -> bool
	{
		constexpr char32_t
			UNICODE_MAX = 0x10FFFF;
		constexpr char16_t
			HIGH_SURR = 0xD800,
			LOW_SURR  = 0xDC00,
			END_SURR = 0xDFFF;
		for( auto it = str.begin(), end = str.end(); it != end; )
			if( !Err || *it <= UNICODE_MAX && (*it < LOW_SURR || *it > END_SURR) 
) [[likely]]
				if( *it <= 0xFFFF ) [[likely]]
					fn( (char16_t)*it++ );
				else
				{
					char32_t c = *it++ - 0x10000;
					fn( (char16_t)(HIGH_SURR | c >> 10) );
					fn( (char16_t)(LOW_SURR | c & 0x3FF) );
				}
			else
				return false;
		return true;
	};
	size_t n = 0;
	if( !iterate( true_type(), [&]( char16_t ) { ++n; } ) ) [[unlikely]]
		return false;
	u16Str.resize_and_overwrite( n, [&]( char16_t *p, size_t n )
		{
			auto it = span( p, n ).begin();
			iterate( false_type(), [&]( char16_t c ) { *it++ = c; } );
			return n;
		} );
	return true;
}

pair<bool, u16string> u32ToU16( u32string_view str )
{
	u16string u16Str;
	if( !u32ToU16( str, u16Str ) ) [[unlikely]]
		return { false, {} };
	return { true, move( u16Str ) };
}