Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbbbind.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #include "tbb/tbb_stddef.h" // For correct linking with TBB on Windows
18 
19 #include "tbb/task_arena.h"
21 #include "tbb/tbb_allocator.h"
22 
23 #if _MSC_VER && !__INTEL_COMPILER
24 #pragma warning( push )
25 #pragma warning( disable : 4100 )
26 #endif
27 #include <hwloc.h>
28 #if _MSC_VER && !__INTEL_COMPILER
29 #pragma warning( pop )
30 #endif
31 
32 #include <vector>
33 
34 // Most of hwloc calls returns negative exit code on error.
35 // This macro tracks error codes that are returned from the hwloc interfaces.
36 #define assertion_hwloc_wrapper(command, ...) \
37  __TBB_ASSERT_EX( (command(__VA_ARGS__)) >= 0, "Error occurred during call to hwloc API.");
38 
39 namespace tbb {
40 namespace internal {
41 
42 //------------------------------------------------------------------------
43 // Information about the machine's hardware TBB is happen to work on
44 //------------------------------------------------------------------------
46  friend class numa_affinity_handler;
47 
48  static hwloc_topology_t topology;
49  static hwloc_cpuset_t process_cpu_affinity_mask;
50  static hwloc_nodeset_t process_node_affinity_mask;
51  static std::vector<hwloc_cpuset_t> affinity_masks_list;
52 
53  static std::vector<int> default_concurrency_list;
54  static std::vector<int> numa_indexes_list;
55  static int numa_nodes_count;
56 
59 
60  // Binding threads to NUMA nodes that locates in another Windows Processor groups
61  // is allowed only if machine topology contains several Windows Processors groups
62  // and process affinity mask wasn`t limited manually (affinity mask cannot violates
63  // processors group boundaries).
64  static bool intergroup_binding_allowed(size_t groups_num) { return groups_num > 1; }
65 
66 public:
67  typedef hwloc_cpuset_t affinity_mask;
68  typedef hwloc_const_cpuset_t const_affinity_mask;
69 
71 
72  static void initialize( size_t groups_num ) {
74  return;
76 
77  // Parse topology
78  if ( hwloc_topology_init( &topology ) == 0 ) {
80  if ( hwloc_topology_load( topology ) == 0 ) {
82  }
83  }
84 
85  // Fill parameters by stubs if topology parsing brokes.
88  hwloc_topology_destroy(topology);
89  }
90  numa_nodes_count = 1;
91  numa_indexes_list.push_back(-1);
92  default_concurrency_list.push_back(-1);
93  return;
94  }
95 
96  // Getting process affinity mask
97  if ( intergroup_binding_allowed(groups_num) ) {
98  process_cpu_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology));
99  process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology));
100  } else {
101  process_cpu_affinity_mask = hwloc_bitmap_alloc();
102  process_node_affinity_mask = hwloc_bitmap_alloc();
103 
106  }
107 
108  // If system contains no NUMA nodes, HWLOC 1.11 returns an infinitely filled bitmap.
109  // hwloc_bitmap_weight() returns negative value for such bitmaps, so we use this check
110  // to change way of topology initialization.
111  if (hwloc_bitmap_weight(process_node_affinity_mask) < 0) {
112  numa_nodes_count = 1;
113  numa_indexes_list.push_back(0);
114  default_concurrency_list.push_back(hwloc_bitmap_weight(process_cpu_affinity_mask));
115 
116  affinity_masks_list.push_back(hwloc_bitmap_dup(process_cpu_affinity_mask));
118  return;
119  }
120 
121  // Get number of available NUMA nodes
122  numa_nodes_count = hwloc_bitmap_weight(process_node_affinity_mask);
123  __TBB_ASSERT(numa_nodes_count > 0, "Any system must contain one or more NUMA nodes");
124 
125  // Get NUMA logical indexes list
126  unsigned counter = 0;
127  int i = 0;
128  int max_numa_index = -1;
130  hwloc_obj_t node_buffer;
131  hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
132  node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i);
133  numa_indexes_list[counter] = static_cast<int>(node_buffer->logical_index);
134 
135  if ( numa_indexes_list[counter] > max_numa_index ) {
136  max_numa_index = numa_indexes_list[counter];
137  }
138 
139  counter++;
140  } hwloc_bitmap_foreach_end();
141  __TBB_ASSERT(max_numa_index >= 0, "Maximal NUMA index must not be negative");
142 
143  // Fill concurrency and affinity masks lists
144  default_concurrency_list.resize(max_numa_index + 1);
145  affinity_masks_list.resize(max_numa_index + 1);
146 
147  int index = 0;
148  hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
149  node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i);
150  index = static_cast<int>(node_buffer->logical_index);
151 
152  hwloc_cpuset_t& current_mask = affinity_masks_list[index];
153  current_mask = hwloc_bitmap_dup(node_buffer->cpuset);
154 
155  hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
156  __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), "hwloc detected unavailable NUMA node");
157  default_concurrency_list[index] = hwloc_bitmap_weight(current_mask);
158  } hwloc_bitmap_foreach_end();
160  }
161 
163  if ( is_topology_parsed() ) {
164  for (int i = 0; i < numa_nodes_count; i++) {
165  hwloc_bitmap_free(affinity_masks_list[numa_indexes_list[i]]);
166  }
167  hwloc_bitmap_free(process_node_affinity_mask);
168  hwloc_bitmap_free(process_cpu_affinity_mask);
169  }
170 
172  hwloc_topology_destroy(topology);
173  }
174 
176  }
177 
178  static void fill(int& nodes_count, int*& indexes_list, int*& concurrency_list ) {
179  __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
180  nodes_count = numa_nodes_count;
181  indexes_list = &numa_indexes_list.front();
182  concurrency_list = &default_concurrency_list.front();
183  }
184 
186  __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
187  return hwloc_bitmap_dup(process_cpu_affinity_mask);
188  }
189 
190  static void free_affinity_mask( affinity_mask mask_to_free ) {
191  hwloc_bitmap_free(mask_to_free); // If bitmap is NULL, no operation is performed.
192  }
193 
194  static void store_current_affinity_mask( affinity_mask current_mask ) {
195  assertion_hwloc_wrapper(hwloc_get_cpubind, topology, current_mask, HWLOC_CPUBIND_THREAD);
196 
197  hwloc_bitmap_and(current_mask,current_mask, process_cpu_affinity_mask);
198  __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask),
199  "Current affinity mask must intersects with process affinity mask");
200  }
201 
202  static void set_new_affinity_mask( const_affinity_mask new_mask ) {
203  assertion_hwloc_wrapper(hwloc_set_cpubind, topology, new_mask, HWLOC_CPUBIND_THREAD);
204  }
205 
206  static const_affinity_mask get_node_affinity_mask( int node_index ) {
207  __TBB_ASSERT((int)affinity_masks_list.size() > node_index,
208  "Trying to get affinity mask for uninitialized NUMA node");
209  return affinity_masks_list[node_index];
210  }
211 };
212 
213 hwloc_topology_t platform_topology::topology = NULL;
215 hwloc_nodeset_t platform_topology::process_node_affinity_mask = NULL;
216 std::vector<hwloc_cpuset_t> platform_topology::affinity_masks_list;
217 
219 std::vector<int> platform_topology::numa_indexes_list;
221 
223 
225  // Following vector saves thread affinity mask on scheduler entry to return it to this thread
226  // on scheduler exit.
227  typedef std::vector<platform_topology::affinity_mask> affinity_masks_container;
229 
230 public:
232  for (affinity_masks_container::iterator it = affinity_backup.begin();
233  it != affinity_backup.end(); it++) {
235  }
236  }
237 
239  for (affinity_masks_container::iterator it = affinity_backup.begin();
240  it != affinity_backup.end(); it++) {
242  }
243  }
244 
245  void bind_thread_to_node( unsigned slot_num, unsigned numa_node_id ) {
246  __TBB_ASSERT(slot_num < affinity_backup.size(),
247  "The slot number is greater than the number of slots in the arena");
249  "Trying to get access to uninitialized platform_topology");
251 
254  }
255 
256  void restore_previous_affinity_mask( unsigned slot_num ) {
258  "Trying to get access to uninitialized platform_topology");
260  };
261 
262 };
263 
267 public:
268  numa_binding_observer( task_arena* ta, int numa_id, int num_slots )
270  , my_numa_node_id(numa_id)
271  , numa_handler(num_slots)
272  {}
273 
276  }
277 
280  }
281 };
282 
283 extern "C" { // exported to TBB interfaces
284 
285 void initialize_numa_topology( size_t groups_num,
286  int& nodes_count, int*& indexes_list, int*& concurrency_list ) {
287  platform_topology::initialize(groups_num);
288  platform_topology::fill(nodes_count, indexes_list, concurrency_list);
289 }
290 
291 task_scheduler_observer* subscribe_arena( task_arena* ta, int numa_id, int num_slots ) {
292  task_scheduler_observer* binding_observer = new numa_binding_observer(ta, numa_id, num_slots);
293  __TBB_ASSERT(binding_observer, "Failure during NUMA binding observer allocation and construction");
294  binding_observer->observe(true);
295  return binding_observer;
296 }
297 
298 void unsubscribe_arena( task_scheduler_observer* binding_observer ) {
299  __TBB_ASSERT(binding_observer, "Trying to deallocate NULL pointer");
300  binding_observer->observe(false);
301  delete binding_observer;
302 }
303 
304 } // extern "C"
305 
306 } // namespace internal
307 } // namespace tbb
308 
309 #undef assertion_hwloc_wrapper
static const_affinity_mask get_node_affinity_mask(int node_index)
Definition: tbbbind.cpp:206
static void set_new_affinity_mask(const_affinity_mask new_mask)
Definition: tbbbind.cpp:202
static void free_affinity_mask(affinity_mask mask_to_free)
Definition: tbbbind.cpp:190
static void store_current_affinity_mask(affinity_mask current_mask)
Definition: tbbbind.cpp:194
affinity_masks_container affinity_backup
Definition: tbbbind.cpp:228
task_scheduler_observer * subscribe_arena(task_arena *ta, int numa_id, int num_slots)
Definition: tbbbind.cpp:291
The graph class.
static std::vector< int > default_concurrency_list
Definition: tbbbind.cpp:53
static hwloc_nodeset_t process_node_affinity_mask
Definition: tbbbind.cpp:50
static hwloc_topology_t topology
Definition: tbbbind.cpp:48
void restore_previous_affinity_mask(unsigned slot_num)
Definition: tbbbind.cpp:256
static bool is_topology_parsed()
Definition: tbbbind.cpp:70
static hwloc_cpuset_t process_cpu_affinity_mask
Definition: tbbbind.cpp:49
numa_affinity_handler numa_handler
Definition: tbbbind.cpp:266
static std::vector< hwloc_cpuset_t > affinity_masks_list
Definition: tbbbind.cpp:51
static std::vector< int > numa_indexes_list
Definition: tbbbind.cpp:54
static affinity_mask allocate_process_affinity_mask()
Definition: tbbbind.cpp:185
void unsubscribe_arena(task_scheduler_observer *binding_observer)
Definition: tbbbind.cpp:298
void bind_thread_to_node(unsigned slot_num, unsigned numa_node_id)
Definition: tbbbind.cpp:245
static init_stages initialization_state
Definition: tbbbind.cpp:58
void initialize_numa_topology(size_t groups_num, int &nodes_count, int *&indexes_list, int *&concurrency_list)
Definition: tbbbind.cpp:285
void observe(bool state=true)
Enable or disable observation.
static void fill(int &nodes_count, int *&indexes_list, int *&concurrency_list)
Definition: tbbbind.cpp:178
#define __TBB_override
Definition: tbb_stddef.h:240
void on_scheduler_exit(bool) __TBB_override
Exit notification.
Definition: tbbbind.cpp:278
hwloc_const_cpuset_t const_affinity_mask
Definition: tbbbind.cpp:68
void on_scheduler_entry(bool) __TBB_override
Entry notification.
Definition: tbbbind.cpp:274
std::vector< platform_topology::affinity_mask > affinity_masks_container
Definition: tbbbind.cpp:227
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
numa_binding_observer(task_arena *ta, int numa_id, int num_slots)
Definition: tbbbind.cpp:268
static void initialize(size_t groups_num)
Definition: tbbbind.cpp:72
#define assertion_hwloc_wrapper(command,...)
Definition: tbbbind.cpp:36
int current_thread_index()
Returns the index, aka slot number, of the calling thread in its current arena.
Definition: task_arena.h:479
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
static bool intergroup_binding_allowed(size_t groups_num)
Definition: tbbbind.cpp:64

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.