Path: ...!eternal-september.org!feeder3.eternal-september.org!news.eternal-september.org!eternal-september.org!raubtier-asyl.eternal-september.org!.POSTED!not-for-mail From: Bonita Montero Newsgroups: comp.lang.c++ Subject: Re: notifying from inside or outside Date: Fri, 9 May 2025 21:23:17 +0200 Organization: A noiseless patient Spider Lines: 135 Message-ID: References: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit Injection-Date: Fri, 09 May 2025 21:23:16 +0200 (CEST) Injection-Info: raubtier-asyl.eternal-september.org; posting-host="4ca603212741b472c47a42c9eb719f99"; logging-data="3152454"; mail-complaints-to="abuse@eternal-september.org"; posting-account="U2FsdGVkX1+jAPO0sydUEXUyYZM188wJdfhOOCQuIaI=" User-Agent: Mozilla Thunderbird Cancel-Lock: sha1:JzAHRLh05LzNKmXVV8+JcQdquS0= Content-Language: de-DE In-Reply-To: Bytes: 5093 Am 09.05.2025 um 20:32 schrieb Chris M. Thomasson: > On 5/9/2025 6:13 AM, Bonita Montero wrote: >> The Windows-times were summed-up times where each iteration included >> the former iterations. Now it's corrected: >> >> 10000 rounds >> inside: >>          one: >>                  2.04687 seconds >>          all: >>                  4 seconds >> outside: >>          one: >>                  1.03125 seconds >>          all: >>                  1.14062 seconds > [...] > > I need to find some time to properly study your code, but does this > indicate that windows condvars do "better" when signaled/broadcasted > from the outside? With Linux notifying from inside is faster, no matter if you broadcast or notify n times. With Windows strangely notifying from outside is faster in general. That's the code: #if defined(_WIN32) #include #endif #include #include #include #include #include #include #include #include #if defined(__unix__) #include #endif using namespace std; struct params { params( unsigned argc, char **argv ); bool outside, add, all; }; int main( int argc, char **argv ) { constexpr size_t N = 10'000; cout << N << " rounds" << endl; int hc = thread::hardware_concurrency(), nClients = hc - 1; int64_t tLast = 0; for( unsigned outside = 0; outside <= 1; ++outside ) { cout << (outside ? "outside:" : "inside:") << endl; for( unsigned all = 0; all <= 1; ++all ) { cout << (all ? "\tall:" : "\tone:") << endl; mutex mtx; int signalled = 0; condition_variable cv; atomic_int ai( 0 ); binary_semaphore bs( false ); vector threads; atomic_int64_t nVoluntary( 0 ); atomic_bool stop( false ); for( int c = nClients; c; --c ) threads.emplace_back( [&] { for( size_t r = N; r; --r ) { unique_lock lock( mtx ); cv.wait( lock, [&] { return (bool)signalled; } ); --signalled; lock.unlock(); if( ai.fetch_sub( 1, memory_order_relaxed ) == 1 ) bs.release( 1 ); } #if defined(__unix__) rusage ru; getrusage( RUSAGE_THREAD, &ru ); nVoluntary.fetch_add( ru.ru_nvcsw, memory_order_relaxed ); #endif } ); for( size_t r = N; r; --r ) { auto notify = [&] { if( all ) cv.notify_all(); else for( int c = nClients; c; cv.notify_one(), --c ); }; unique_lock lock( mtx ); signalled = nClients; if( !outside ) notify(); ai.store( nClients, memory_order_relaxed ); lock.unlock(); if( outside ) notify(); bs.acquire(); } stop.store( true, memory_order_relaxed ); threads.resize( 0 ); #if defined(_WIN32) FILETIME ftDummy, ftKernel, ftUser; GetProcessTimes( GetCurrentProcess(), &ftDummy, &ftDummy, &ftKernel, &ftUser ); auto ftToU64 = []( FILETIME const &ft ) { return (uint64_t)ft.dwHighDateTime << 32 | ft.dwLowDateTime; }; int64_t t = ftToU64( ftKernel ) + ftToU64( ftUser ); cout << "\t\t" << (double)(t - tLast) / 1.0e7 << " seconds" << endl; tLast = t; #elif defined(__unix__) rusage ru; getrusage( RUSAGE_SELF, &ru ); auto tvToU64 = []( timeval const &tv ) { return (uint64_t)tv.tv_sec * 1'000'000u + tv.tv_usec; }; int64_t t = tvToU64( ru.ru_utime ) + tvToU64( ru.ru_stime ); cout << "\t\t" << (double)nVoluntary.load( memory_order_relaxed ) / nClients << " context switches per thread" << endl; cout << "\t\t" << (double)(t - tLast) / 1.0e6 << " seconds" << endl; tLast = t; #endif } } }