Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbb_misc_ex.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16 
17 
18 
19 */
20 
21 // Source file for miscellaneous entities that are infrequently referenced by
22 // an executing program, and implementation of which requires dynamic linking.
23 
24 #include "tbb_misc.h"
25 
26 #if !defined(__TBB_HardwareConcurrency)
27 
28 #include "dynamic_link.h"
29 #include <stdio.h>
30 #include <limits.h>
31 
32 #if _WIN32||_WIN64
34 #if __TBB_WIN8UI_SUPPORT
35 #include <thread>
36 #endif
37 #else
38 #include <unistd.h>
39 #if __linux__
40 #include <sys/sysinfo.h>
41 #include <string.h>
42 #include <sched.h>
43 #include <errno.h>
44 #elif __sun
45 #include <sys/sysinfo.h>
46 #elif __FreeBSD__
47 #include <errno.h>
48 #include <string.h>
49 #include <sys/param.h> // Required by <sys/cpuset.h>
50 #include <sys/cpuset.h>
51 #endif
52 #endif
53 
54 namespace tbb {
55 namespace internal {
56 
57 #if __TBB_USE_OS_AFFINITY_SYSCALL
58 
59 #if __linux__
60 // Handlers for interoperation with libiomp
61 static int (*libiomp_try_restoring_original_mask)();
62 // Table for mapping to libiomp entry points
63 static const dynamic_link_descriptor iompLinkTable[] = {
64  DLD_NOWEAK( kmp_set_thread_affinity_mask_initial, libiomp_try_restoring_original_mask )
65 };
66 #endif
67 
68 static void set_thread_affinity_mask( size_t maskSize, const basic_mask_t* threadMask ) {
69 #if __linux__
70  if( sched_setaffinity( 0, maskSize, threadMask ) )
71 #else /* FreeBSD */
72  if( cpuset_setaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
73 #endif
74  runtime_warning( "setaffinity syscall failed" );
75 }
76 
77 static void get_thread_affinity_mask( size_t maskSize, basic_mask_t* threadMask ) {
78 #if __linux__
79  if( sched_getaffinity( 0, maskSize, threadMask ) )
80 #else /* FreeBSD */
81  if( cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
82 #endif
83  runtime_warning( "getaffinity syscall failed" );
84 }
85 
86 static basic_mask_t* process_mask;
87 static int num_masks;
88 
89 void destroy_process_mask() {
90  if( process_mask ) {
91  delete [] process_mask;
92  }
93 }
94 
95 #define curMaskSize sizeof(basic_mask_t) * num_masks
96 affinity_helper::~affinity_helper() {
97  if( threadMask ) {
98  if( is_changed ) {
99  set_thread_affinity_mask( curMaskSize, threadMask );
100  }
101  delete [] threadMask;
102  }
103 }
104 void affinity_helper::protect_affinity_mask( bool restore_process_mask ) {
105  if( threadMask == NULL && num_masks ) { // TODO: assert num_masks validity?
106  threadMask = new basic_mask_t [num_masks];
107  memset( threadMask, 0, curMaskSize );
108  get_thread_affinity_mask( curMaskSize, threadMask );
109  if( restore_process_mask ) {
110  __TBB_ASSERT( process_mask, "A process mask is requested but not yet stored" );
111  is_changed = memcmp( process_mask, threadMask, curMaskSize );
112  if( is_changed )
113  set_thread_affinity_mask( curMaskSize, process_mask );
114  } else {
115  // Assume that the mask will be changed by the caller.
116  is_changed = 1;
117  }
118  }
119 }
121  if( threadMask ) {
122  delete [] threadMask;
123  threadMask = NULL;
124  }
125  is_changed = 0;
126 }
127 #undef curMaskSize
128 
129 static atomic<do_once_state> hardware_concurrency_info;
130 
131 static int theNumProcs;
132 
133 static void initialize_hardware_concurrency_info () {
134  int err;
135  int availableProcs = 0;
136  int numMasks = 1;
137 #if __linux__
138 #if __TBB_MAIN_THREAD_AFFINITY_BROKEN
139  int maxProcs = INT_MAX; // To check the entire mask.
140  int pid = 0; // Get the mask of the calling thread.
141 #else
142  int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
143  int pid = getpid();
144 #endif
145 #else /* FreeBSD >= 7.1 */
146  int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
147 #endif
148  basic_mask_t* processMask;
149  const size_t BasicMaskSize = sizeof(basic_mask_t);
150  for (;;) {
151  const int curMaskSize = BasicMaskSize * numMasks;
152  processMask = new basic_mask_t[numMasks];
153  memset( processMask, 0, curMaskSize );
154 #if __linux__
155  err = sched_getaffinity( pid, curMaskSize, processMask );
156  if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 )
157  break;
158 #else /* FreeBSD >= 7.1 */
159  // CPU_LEVEL_WHICH - anonymous (current) mask, CPU_LEVEL_CPUSET - assigned mask
160 #if __TBB_MAIN_THREAD_AFFINITY_BROKEN
161  err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, curMaskSize, processMask );
162 #else
163  err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask );
164 #endif
165  if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 )
166  break;
167 #endif /* FreeBSD >= 7.1 */
168  delete[] processMask;
169  numMasks <<= 1;
170  }
171  if ( !err ) {
172  // We have found the mask size and captured the process affinity mask into processMask.
173  num_masks = numMasks; // do here because it's needed for affinity_helper to work
174 #if __linux__
175  // For better coexistence with libiomp which might have changed the mask already,
176  // check for its presence and ask it to restore the mask.
177  dynamic_link_handle libhandle;
178  if ( dynamic_link( "libiomp5.so", iompLinkTable, 1, &libhandle, DYNAMIC_LINK_GLOBAL ) ) {
179  // We have found the symbol provided by libiomp5 for restoring original thread affinity.
180  affinity_helper affhelp;
181  affhelp.protect_affinity_mask( /*restore_process_mask=*/false );
182  if ( libiomp_try_restoring_original_mask()==0 ) {
183  // Now we have the right mask to capture, restored by libiomp.
184  const int curMaskSize = BasicMaskSize * numMasks;
185  memset( processMask, 0, curMaskSize );
186  get_thread_affinity_mask( curMaskSize, processMask );
187  } else
188  affhelp.dismiss(); // thread mask has not changed
189  dynamic_unlink( libhandle );
190  // Destructor of affinity_helper restores the thread mask (unless dismissed).
191  }
192 #endif
193  for ( int m = 0; availableProcs < maxProcs && m < numMasks; ++m ) {
194  for ( size_t i = 0; (availableProcs < maxProcs) && (i < BasicMaskSize * CHAR_BIT); ++i ) {
195  if ( CPU_ISSET( i, processMask + m ) )
196  ++availableProcs;
197  }
198  }
199  process_mask = processMask;
200  }
201  else {
202  // Failed to get the process affinity mask; assume the whole machine can be used.
203  availableProcs = (maxProcs == INT_MAX) ? sysconf(_SC_NPROCESSORS_ONLN) : maxProcs;
204  delete[] processMask;
205  }
206  theNumProcs = availableProcs > 0 ? availableProcs : 1; // Fail safety strap
207  __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), NULL );
208 }
209 
211  atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
212  return theNumProcs;
213 }
214 
215 /* End of __TBB_USE_OS_AFFINITY_SYSCALL implementation */
216 #elif __ANDROID__
217 
218 // Work-around for Android that reads the correct number of available CPUs since system calls are unreliable.
219 // Format of "present" file is: ([<int>-<int>|<int>],)+
221  FILE *fp = fopen("/sys/devices/system/cpu/present", "r");
222  if (fp == NULL) return 1;
223  int num_args, lower, upper, num_cpus=0;
224  while ((num_args = fscanf(fp, "%u-%u", &lower, &upper)) != EOF) {
225  switch(num_args) {
226  case 2: num_cpus += upper - lower + 1; break;
227  case 1: num_cpus += 1; break;
228  }
229  fscanf(fp, ",");
230  }
231  return (num_cpus > 0) ? num_cpus : 1;
232 }
233 
234 #elif defined(_SC_NPROCESSORS_ONLN)
235 
237  int n = sysconf(_SC_NPROCESSORS_ONLN);
238  return (n > 0) ? n : 1;
239 }
240 
241 #elif _WIN32||_WIN64
242 
243 static atomic<do_once_state> hardware_concurrency_info;
244 
245 static const WORD TBB_ALL_PROCESSOR_GROUPS = 0xffff;
246 
247 // Statically allocate an array for processor group information.
248 // Windows 7 supports maximum 4 groups, but let's look ahead a little.
249 static const WORD MaxProcessorGroups = 64;
250 
251 struct ProcessorGroupInfo {
252  DWORD_PTR mask;
253  int numProcs;
254  int numProcsRunningTotal;
255 
257  static int NumGroups;
258 
260 
266  static int HoleIndex;
267 };
268 
269 int ProcessorGroupInfo::NumGroups = 1;
270 int ProcessorGroupInfo::HoleIndex = 0;
271 
272 ProcessorGroupInfo theProcessorGroups[MaxProcessorGroups];
273 
274 struct TBB_GROUP_AFFINITY {
275  DWORD_PTR Mask;
276  WORD Group;
277  WORD Reserved[3];
278 };
279 
280 static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = NULL;
281 static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = NULL;
282 static BOOL (WINAPI *TBB_SetThreadGroupAffinity)( HANDLE hThread,
283  const TBB_GROUP_AFFINITY* newAff, TBB_GROUP_AFFINITY *prevAff );
284 static BOOL (WINAPI *TBB_GetThreadGroupAffinity)( HANDLE hThread, TBB_GROUP_AFFINITY* );
285 
286 static const dynamic_link_descriptor ProcessorGroupsApiLinkTable[] = {
287  DLD(GetActiveProcessorCount, TBB_GetActiveProcessorCount)
288  , DLD(GetActiveProcessorGroupCount, TBB_GetActiveProcessorGroupCount)
289  , DLD(SetThreadGroupAffinity, TBB_SetThreadGroupAffinity)
290  , DLD(GetThreadGroupAffinity, TBB_GetThreadGroupAffinity)
291 };
292 
293 static void initialize_hardware_concurrency_info () {
294 #if __TBB_WIN8UI_SUPPORT
295  // For these applications processor groups info is unavailable
296  // Setting up a number of processors for one processor group
297  theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = std::thread::hardware_concurrency();
298 #else /* __TBB_WIN8UI_SUPPORT */
299  dynamic_link( "Kernel32.dll", ProcessorGroupsApiLinkTable,
300  sizeof(ProcessorGroupsApiLinkTable)/sizeof(dynamic_link_descriptor) );
301  SYSTEM_INFO si;
302  GetNativeSystemInfo(&si);
303  DWORD_PTR pam, sam, m = 1;
304  GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam );
305  int nproc = 0;
306  for ( size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) {
307  if ( pam & m )
308  ++nproc;
309  }
310  __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, NULL );
311  // By default setting up a number of processors for one processor group
312  theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc;
313  // Setting up processor groups in case the process does not restrict affinity mask and more than one processor group is present
314  if ( nproc == (int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) {
315  // The process does not have restricting affinity mask and multiple processor groups are possible
316  ProcessorGroupInfo::NumGroups = (int)TBB_GetActiveProcessorGroupCount();
317  __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL );
318  // Fail safety bootstrap. Release versions will limit available concurrency
319  // level, while debug ones would assert.
320  if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups )
321  ProcessorGroupInfo::NumGroups = MaxProcessorGroups;
322  if ( ProcessorGroupInfo::NumGroups > 1 ) {
323  TBB_GROUP_AFFINITY ga;
324  if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) )
325  ProcessorGroupInfo::HoleIndex = ga.Group;
326  int nprocs = 0;
327  for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) {
328  ProcessorGroupInfo &pgi = theProcessorGroups[i];
329  pgi.numProcs = (int)TBB_GetActiveProcessorCount(i);
330  __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, NULL );
331  pgi.mask = pgi.numProcs == sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1;
332  pgi.numProcsRunningTotal = nprocs += pgi.numProcs;
333  }
334  __TBB_ASSERT( nprocs == (int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL );
335  }
336  }
337 #endif /* __TBB_WIN8UI_SUPPORT */
338 
339  PrintExtraVersionInfo("Processor groups", "%d", ProcessorGroupInfo::NumGroups);
340  if (ProcessorGroupInfo::NumGroups>1)
341  for (int i=0; i<ProcessorGroupInfo::NumGroups; ++i)
342  PrintExtraVersionInfo( "----- Group", "%d: size %d", i, theProcessorGroups[i].numProcs);
343 }
344 
345 int NumberOfProcessorGroups() {
346  __TBB_ASSERT( hardware_concurrency_info == initialization_complete, "NumberOfProcessorGroups is used before AvailableHwConcurrency" );
347  return ProcessorGroupInfo::NumGroups;
348 }
349 
350 // Offset for the slot reserved for the first master thread
351 #define HoleAdjusted(procIdx, grpIdx) (procIdx + (holeIdx <= grpIdx))
352 
353 int FindProcessorGroupIndex ( int procIdx ) {
354  // In case of oversubscription spread extra workers in a round robin manner
355  int holeIdx;
356  const int numProcs = theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
357  if ( procIdx >= numProcs - 1 ) {
358  holeIdx = INT_MAX;
359  procIdx = (procIdx - numProcs + 1) % numProcs;
360  }
361  else
362  holeIdx = ProcessorGroupInfo::HoleIndex;
363  __TBB_ASSERT( hardware_concurrency_info == initialization_complete, "FindProcessorGroupIndex is used before AvailableHwConcurrency" );
364  // Approximate the likely group index assuming all groups are of the same size
365  int i = procIdx / theProcessorGroups[0].numProcs;
366  // Make sure the approximation is a valid group index
367  if (i >= ProcessorGroupInfo::NumGroups) i = ProcessorGroupInfo::NumGroups-1;
368  // Now adjust the approximation up or down
369  if ( theProcessorGroups[i].numProcsRunningTotal > HoleAdjusted(procIdx, i) ) {
370  while ( theProcessorGroups[i].numProcsRunningTotal - theProcessorGroups[i].numProcs > HoleAdjusted(procIdx, i) ) {
371  __TBB_ASSERT( i > 0, NULL );
372  --i;
373  }
374  }
375  else {
376  do {
377  ++i;
378  } while ( theProcessorGroups[i].numProcsRunningTotal <= HoleAdjusted(procIdx, i) );
379  }
380  __TBB_ASSERT( i < ProcessorGroupInfo::NumGroups, NULL );
381  return i;
382 }
383 
384 void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ) {
385  __TBB_ASSERT( hardware_concurrency_info == initialization_complete, "MoveThreadIntoProcessorGroup is used before AvailableHwConcurrency" );
386  if ( !TBB_SetThreadGroupAffinity )
387  return;
388  TBB_GROUP_AFFINITY ga = { theProcessorGroups[groupIndex].mask, (WORD)groupIndex, {0,0,0} };
389  TBB_SetThreadGroupAffinity( hThread, &ga, NULL );
390 }
391 
393  atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
394  return theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
395 }
396 
397 /* End of _WIN32||_WIN64 implementation */
398 #else
399  #error AvailableHwConcurrency is not implemented for this OS
400 #endif
401 
402 } // namespace internal
403 } // namespace tbb
404 
405 #endif /* !__TBB_HardwareConcurrency */
void __TBB_EXPORTED_FUNC runtime_warning(const char *format,...)
Report a runtime warning.
#define DLD(s, h)
The helper to construct dynamic_link_descriptor structure.
Definition: dynamic_link.h:60
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:169
void PrintExtraVersionInfo(const char *category, const char *format,...)
Prints arbitrary extra TBB version information on stderr.
Definition: tbb_misc.cpp:202
Association between a handler name and location of pointer to it.
Definition: dynamic_link.h:64
OPEN_INTERNAL_NAMESPACE bool dynamic_link(const char *, const dynamic_link_descriptor *, size_t, dynamic_link_handle *handle, int)
The graph class.
void destroy_process_mask()
Definition: tbb_misc.h:263
void * dynamic_link_handle
Definition: dynamic_link.h:78
int AvailableHwConcurrency()
Returns maximal parallelism level supported by the current OS configuration.
void atomic_do_once(const F &initializer, atomic< do_once_state > &state)
One-time initialization function.
Definition: tbb_misc.h:210
const int DYNAMIC_LINK_GLOBAL
Definition: dynamic_link.h:81
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int mask
void dynamic_unlink(dynamic_link_handle)
#define DLD_NOWEAK(s, h)
Definition: dynamic_link.h:61

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.