Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
flow_graph_opencl_node.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #ifndef __TBB_flow_graph_opencl_node_H
18 #define __TBB_flow_graph_opencl_node_H
19 
20 #define __TBB_flow_graph_opencl_node_H_include_area
22 
23 #include "tbb/tbb_config.h"
24 #if __TBB_PREVIEW_OPENCL_NODE
25 
26 #include "flow_graph.h"
27 
28 #include <vector>
29 #include <string>
30 #include <algorithm>
31 #include <iostream>
32 #include <fstream>
33 #include <map>
34 #include <mutex>
35 
36 #ifdef __APPLE__
37 #include <OpenCL/opencl.h>
38 #else
39 #include <CL/cl.h>
40 #endif
41 
42 namespace tbb {
43 namespace flow {
44 
45 namespace interface11 {
46 
47 template <typename DeviceFilter>
49 
50 namespace opencl_info {
52 }
53 
54 template <typename Factory>
56 
57 inline void enforce_cl_retcode(cl_int err, std::string msg) {
58  if (err != CL_SUCCESS) {
59  std::cerr << msg << "; error code: " << err << std::endl;
60  throw msg;
61  }
62 }
63 
64 template <typename T>
65 T event_info(cl_event e, cl_event_info i) {
66  T res;
67  enforce_cl_retcode(clGetEventInfo(e, i, sizeof(res), &res, NULL), "Failed to get OpenCL event information");
68  return res;
69 }
70 
71 template <typename T>
72 T device_info(cl_device_id d, cl_device_info i) {
73  T res;
74  enforce_cl_retcode(clGetDeviceInfo(d, i, sizeof(res), &res, NULL), "Failed to get OpenCL device information");
75  return res;
76 }
77 
78 template <>
79 inline std::string device_info<std::string>(cl_device_id d, cl_device_info i) {
80  size_t required;
81  enforce_cl_retcode(clGetDeviceInfo(d, i, 0, NULL, &required), "Failed to get OpenCL device information");
82 
83  char *buff = (char*)alloca(required);
84  enforce_cl_retcode(clGetDeviceInfo(d, i, required, buff, NULL), "Failed to get OpenCL device information");
85 
86  return buff;
87 }
88 
89 template <typename T>
90 T platform_info(cl_platform_id p, cl_platform_info i) {
91  T res;
92  enforce_cl_retcode(clGetPlatformInfo(p, i, sizeof(res), &res, NULL), "Failed to get OpenCL platform information");
93  return res;
94 }
95 
96 template <>
97 inline std::string platform_info<std::string>(cl_platform_id p, cl_platform_info i) {
98  size_t required;
99  enforce_cl_retcode(clGetPlatformInfo(p, i, 0, NULL, &required), "Failed to get OpenCL platform information");
100 
101  char *buff = (char*)alloca(required);
102  enforce_cl_retcode(clGetPlatformInfo(p, i, required, buff, NULL), "Failed to get OpenCL platform information");
103 
104  return buff;
105 }
106 
107 
109 public:
110  typedef size_t device_id_type;
111  enum : device_id_type {
114  };
115 
117 
118  opencl_device( cl_device_id d_id ) : my_device_id( unknown ), my_cl_device_id( d_id ), my_cl_command_queue( NULL ) {}
119 
121 
122  std::string platform_profile() const {
123  return platform_info<std::string>( platform_id(), CL_PLATFORM_PROFILE );
124  }
125  std::string platform_version() const {
126  return platform_info<std::string>( platform_id(), CL_PLATFORM_VERSION );
127  }
128  std::string platform_name() const {
129  return platform_info<std::string>( platform_id(), CL_PLATFORM_NAME );
130  }
131  std::string platform_vendor() const {
132  return platform_info<std::string>( platform_id(), CL_PLATFORM_VENDOR );
133  }
134  std::string platform_extensions() const {
135  return platform_info<std::string>( platform_id(), CL_PLATFORM_EXTENSIONS );
136  }
137 
138  template <typename T>
139  void info( cl_device_info i, T &t ) const {
140  t = device_info<T>( my_cl_device_id, i );
141  }
142  std::string version() const {
143  // The version string format: OpenCL<space><major_version.minor_version><space><vendor-specific information>
144  return device_info<std::string>( my_cl_device_id, CL_DEVICE_VERSION );
145  }
146  int major_version() const {
147  int major;
148  std::sscanf( version().c_str(), "OpenCL %d", &major );
149  return major;
150  }
151  int minor_version() const {
152  int major, minor;
153  std::sscanf( version().c_str(), "OpenCL %d.%d", &major, &minor );
154  return minor;
155  }
157 #if CL_VERSION_2_0
158  if ( major_version() >= 2 )
159  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
160  else
161 #endif /* CL_VERSION_2_0 */
162  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
163  }
165 #if CL_VERSION_2_0
166  if ( major_version() >= 2 )
167  return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
168  else
169 #endif /* CL_VERSION_2_0 */
170  return false;
171  }
172  std::array<size_t, 3> max_work_item_sizes() const {
173  return device_info<std::array<size_t, 3>>( my_cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES );
174  }
175  size_t max_work_group_size() const {
176  return device_info<size_t>( my_cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE );
177  }
178  bool built_in_kernel_available( const std::string& k ) const {
179  const std::string semi = ";";
180  // Added semicolumns to force an exact match (to avoid a partial match, e.g. "add" is partly matched with "madd").
181  return (semi + built_in_kernels() + semi).find( semi + k + semi ) != std::string::npos;
182  }
183  std::string built_in_kernels() const {
184  return device_info<std::string>( my_cl_device_id, CL_DEVICE_BUILT_IN_KERNELS );
185  }
186  std::string name() const {
187  return device_info<std::string>( my_cl_device_id, CL_DEVICE_NAME );
188  }
189  cl_bool available() const {
190  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_AVAILABLE );
191  }
192  cl_bool compiler_available() const {
193  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_COMPILER_AVAILABLE );
194  }
195  cl_bool linker_available() const {
196  return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_LINKER_AVAILABLE );
197  }
198  bool extension_available( const std::string &ext ) const {
199  const std::string space = " ";
200  // Added space to force an exact match (to avoid a partial match, e.g. "ext" is partly matched with "ext2").
201  return (space + extensions() + space).find( space + ext + space ) != std::string::npos;
202  }
203  std::string extensions() const {
204  return device_info<std::string>( my_cl_device_id, CL_DEVICE_EXTENSIONS );
205  }
206 
207  cl_device_type type() const {
208  return device_info<cl_device_type>( my_cl_device_id, CL_DEVICE_TYPE );
209  }
210 
211  std::string vendor() const {
212  return device_info<std::string>( my_cl_device_id, CL_DEVICE_VENDOR );
213  }
214 
215  cl_uint address_bits() const {
216  return device_info<cl_uint>( my_cl_device_id, CL_DEVICE_ADDRESS_BITS );
217  }
218 
219  cl_device_id device_id() const {
220  return my_cl_device_id;
221  }
222 
223  cl_command_queue command_queue() const {
224  return my_cl_command_queue;
225  }
226 
227  void set_command_queue( cl_command_queue cmd_queue ) {
228  my_cl_command_queue = cmd_queue;
229  }
230 
231  cl_platform_id platform_id() const {
232  return device_info<cl_platform_id>( my_cl_device_id, CL_DEVICE_PLATFORM );
233  }
234 
235 private:
236 
238  cl_device_id my_cl_device_id;
239  cl_command_queue my_cl_command_queue;
240 
241  friend bool operator==(opencl_device d1, opencl_device d2) { return d1.my_cl_device_id == d2.my_cl_device_id; }
242 
243  template <typename DeviceFilter>
244  friend class opencl_factory;
245  template <typename Factory>
246  friend class opencl_memory;
247  template <typename Factory>
248  friend class opencl_program;
249 
250 #if TBB_USE_ASSERT
251  template <typename T, typename Factory>
252  friend class opencl_buffer;
253 #endif
254 };
255 
257  typedef std::vector<opencl_device> container_type;
258 public:
259  typedef container_type::iterator iterator;
260  typedef container_type::const_iterator const_iterator;
261  typedef container_type::size_type size_type;
262 
264  opencl_device_list( std::initializer_list<opencl_device> il ) : my_container( il ) {}
265 
266  void add( opencl_device d ) { my_container.push_back( d ); }
267  size_type size() const { return my_container.size(); }
268  bool empty() const { return my_container.empty(); }
269  iterator begin() { return my_container.begin(); }
270  iterator end() { return my_container.end(); }
271  const_iterator begin() const { return my_container.begin(); }
272  const_iterator end() const { return my_container.end(); }
273  const_iterator cbegin() const { return my_container.cbegin(); }
274  const_iterator cend() const { return my_container.cend(); }
275 
276 private:
278 };
279 
280 namespace internal {
281 
282 // Retrieve all OpenCL devices from machine
284  opencl_device_list opencl_devices;
285 
286  cl_uint num_platforms;
287  enforce_cl_retcode(clGetPlatformIDs(0, NULL, &num_platforms), "clGetPlatformIDs failed");
288 
289  std::vector<cl_platform_id> platforms(num_platforms);
290  enforce_cl_retcode(clGetPlatformIDs(num_platforms, platforms.data(), NULL), "clGetPlatformIDs failed");
291 
292  cl_uint num_devices;
293  std::vector<cl_platform_id>::iterator platforms_it = platforms.begin();
294  cl_uint num_all_devices = 0;
295  while (platforms_it != platforms.end()) {
296  cl_int err = clGetDeviceIDs(*platforms_it, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
297  if (err == CL_DEVICE_NOT_FOUND) {
298  platforms_it = platforms.erase(platforms_it);
299  }
300  else {
301  enforce_cl_retcode(err, "clGetDeviceIDs failed");
302  num_all_devices += num_devices;
303  ++platforms_it;
304  }
305  }
306 
307  std::vector<cl_device_id> devices(num_all_devices);
308  std::vector<cl_device_id>::iterator devices_it = devices.begin();
309  for (auto p = platforms.begin(); p != platforms.end(); ++p) {
310  enforce_cl_retcode(clGetDeviceIDs((*p), CL_DEVICE_TYPE_ALL, (cl_uint)std::distance(devices_it, devices.end()), &*devices_it, &num_devices), "clGetDeviceIDs failed");
311  devices_it += num_devices;
312  }
313 
314  for (auto d = devices.begin(); d != devices.end(); ++d) {
315  opencl_devices.add(opencl_device((*d)));
316  }
317 
318  return opencl_devices;
319 }
320 
321 } // namespace internal
322 
323 // TODO: consider this namespace as public API
324 namespace opencl_info {
325 
327  // Static storage for all available OpenCL devices on machine
328  static const opencl_device_list my_devices = internal::find_available_devices();
329  return my_devices;
330  }
331 
332 } // namespace opencl_info
333 
334 
336 public:
337  virtual void call() = 0;
338  virtual ~callback_base() {}
339 };
340 
341 template <typename Callback, typename T>
342 class callback : public callback_base {
343  Callback my_callback;
345 public:
346  callback( Callback c, const T& t ) : my_callback( c ), my_data( t ) {}
347 
349  my_callback( my_data );
350  }
351 };
352 
353 template <typename T, typename Factory = opencl_info::default_opencl_factory>
354 class opencl_async_msg : public async_msg<T> {
355 public:
356  typedef T value_type;
357 
358  opencl_async_msg() : my_callback_flag_ptr( std::make_shared< tbb::atomic<bool>>() ) {
359  my_callback_flag_ptr->store<tbb::relaxed>(false);
360  }
361 
362  explicit opencl_async_msg( const T& data ) : my_data(data), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
363  my_callback_flag_ptr->store<tbb::relaxed>(false);
364  }
365 
366  opencl_async_msg( const T& data, cl_event event ) : my_data(data), my_event(event), my_is_event(true), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
367  my_callback_flag_ptr->store<tbb::relaxed>(false);
368  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
369  }
370 
371  T& data( bool wait = true ) {
372  if ( my_is_event && wait ) {
373  enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
374  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
375  my_is_event = false;
376  }
377  return my_data;
378  }
379 
380  const T& data( bool wait = true ) const {
381  if ( my_is_event && wait ) {
382  enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
383  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
384  my_is_event = false;
385  }
386  return my_data;
387  }
388 
389  opencl_async_msg( const opencl_async_msg &dmsg ) : async_msg<T>(dmsg),
390  my_data(dmsg.my_data), my_event(dmsg.my_event), my_is_event( dmsg.my_is_event ),
392  {
393  if ( my_is_event )
394  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
395  }
396 
397  opencl_async_msg( opencl_async_msg &&dmsg ) : async_msg<T>(std::move(dmsg)),
398  my_data(std::move(dmsg.my_data)), my_event(dmsg.my_event), my_is_event(dmsg.my_is_event),
400  {
401  dmsg.my_is_event = false;
402  }
403 
406 
407  // Release original event
408  if ( my_is_event )
409  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to retain an event" );
410 
411  my_data = dmsg.my_data;
412  my_event = dmsg.my_event;
413  my_is_event = dmsg.my_is_event;
414 
415  // Retain copied event
416  if ( my_is_event )
417  enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
418 
420  return *this;
421  }
422 
424  if ( my_is_event )
425  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
426  }
427 
428  cl_event const * get_event() const { return my_is_event ? &my_event : NULL; }
429  void set_event( cl_event e ) const {
430  if ( my_is_event ) {
431  cl_command_queue cq = event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE );
432  if ( cq != event_info<cl_command_queue>( e, CL_EVENT_COMMAND_QUEUE ) )
433  enforce_cl_retcode( clFlush( cq ), "Failed to flush an OpenCL command queue" );
434  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
435  }
436  my_is_event = true;
437  my_event = e;
438  clRetainEvent( my_event );
439  }
440 
441  void clear_event() const {
442  if ( my_is_event ) {
443  enforce_cl_retcode( clFlush( event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE ) ), "Failed to flush an OpenCL command queue" );
444  enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
445  }
446  my_is_event = false;
447  }
448 
449  template <typename Callback>
450  void register_callback( Callback c ) const {
451  __TBB_ASSERT( my_is_event, "The OpenCL event is not set" );
452  enforce_cl_retcode( clSetEventCallback( my_event, CL_COMPLETE, register_callback_func, new callback<Callback, T>( c, my_data ) ), "Failed to set an OpenCL callback" );
453  }
454 
455  operator T&() { return data(); }
456  operator const T&() const { return data(); }
457 
458 protected:
459  // Overridden in this derived class to inform that
460  // async calculation chain is over
461  void finalize() const __TBB_override {
463  if (! my_callback_flag_ptr->fetch_and_store(true)) {
464  opencl_async_msg a(*this);
465  if (my_is_event) {
466  register_callback([a](const T& t) mutable {
467  a.set(t);
468  });
469  }
470  else {
471  a.set(my_data);
472  }
473  }
474  clear_event();
475  }
476 
477 private:
478  static void CL_CALLBACK register_callback_func( cl_event, cl_int event_command_exec_status, void *data ) {
479  tbb::internal::suppress_unused_warning( event_command_exec_status );
480  __TBB_ASSERT( event_command_exec_status == CL_COMPLETE, NULL );
481  __TBB_ASSERT( data, NULL );
482  callback_base *c = static_cast<callback_base*>(data);
483  c->call();
484  delete c;
485  }
486 
488  mutable cl_event my_event;
489  mutable bool my_is_event = false;
490 
491  std::shared_ptr< tbb::atomic<bool> > my_callback_flag_ptr;
492 };
493 
494 template <typename K, typename T, typename Factory>
497  const T &t = dmsg.data( false );
498  __TBB_STATIC_ASSERT( true, "" );
499  return key_from_message<K, T>( t );
500 }
501 
502 template <typename Factory>
504 public:
506  opencl_memory( Factory &f ) : my_host_ptr( NULL ), my_factory( &f ), my_sending_event_present( false ) {
507  my_curr_device_id = my_factory->devices().begin()->my_device_id;
508  }
509 
511  if ( my_sending_event_present ) enforce_cl_retcode( clReleaseEvent( my_sending_event ), "Failed to release an event for the OpenCL buffer" );
512  enforce_cl_retcode( clReleaseMemObject( my_cl_mem ), "Failed to release an memory object" );
513  }
514 
515  cl_mem get_cl_mem() const {
516  return my_cl_mem;
517  }
518 
519  void* get_host_ptr() {
520  if ( !my_host_ptr ) {
522  d.data();
523  __TBB_ASSERT( d.data() == my_host_ptr, NULL );
524  }
525  return my_host_ptr;
526  }
527 
528  Factory *factory() const { return my_factory; }
529 
532  if (e) {
534  } else {
536  }
537 
538  // Concurrent receives are prohibited so we do not worry about synchronization.
540  map_memory(*my_factory->devices().begin(), d);
542  my_host_ptr = d.data(false);
543  }
544  // Release the sending event
546  enforce_cl_retcode(clReleaseEvent(my_sending_event), "Failed to release an event");
547  my_sending_event_present = false;
548  }
549  return d;
550  }
551 
553  opencl_device::device_id_type device_id = device.my_device_id;
554  if (!my_factory->is_same_context(my_curr_device_id.load<tbb::acquire>(), device_id)) {
555  {
557  if (!my_factory->is_same_context(my_curr_device_id.load<tbb::relaxed>(), device_id)) {
558  __TBB_ASSERT(my_host_ptr, "The buffer has not been mapped");
560  my_factory->enqueue_unmap_buffer(device, *this, d);
561  my_sending_event = *d.get_event();
563  enforce_cl_retcode(clRetainEvent(my_sending_event), "Failed to retain an event");
564  my_host_ptr = NULL;
565  my_curr_device_id.store<tbb::release>(device_id);
566  }
567  }
569  }
570 
571  // !e means that buffer has come from the host
573 
574  __TBB_ASSERT(!my_host_ptr, "The buffer has not been unmapped");
576  }
577 
579 protected:
580  cl_mem my_cl_mem;
581  tbb::atomic<opencl_device::device_id_type> my_curr_device_id;
582  void* my_host_ptr;
583  Factory *my_factory;
584 
588 };
589 
590 template <typename Factory>
591 class opencl_buffer_impl : public opencl_memory<Factory> {
592  size_t my_size;
593 public:
594  opencl_buffer_impl( size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
595  cl_int err;
596  this->my_cl_mem = clCreateBuffer( this->my_factory->context(), CL_MEM_ALLOC_HOST_PTR, size, NULL, &err );
597  enforce_cl_retcode( err, "Failed to create an OpenCL buffer" );
598  }
599 
600  // The constructor for subbuffers.
601  opencl_buffer_impl( cl_mem m, size_t index, size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
602  cl_int err;
603  cl_buffer_region region = { index, size };
604  this->my_cl_mem = clCreateSubBuffer( m, 0, CL_BUFFER_CREATE_TYPE_REGION, &region, &err );
605  enforce_cl_retcode( err, "Failed to create an OpenCL subbuffer" );
606  }
607 
608  size_t size() const {
609  return my_size;
610  }
611 
613  this->my_factory->enqueue_map_buffer( device, *this, dmsg );
614  }
615 
616 #if TBB_USE_ASSERT
617  template <typename, typename>
618  friend class opencl_buffer;
619 #endif
620 };
621 
626 };
627 
628 template <typename T, typename Factory = opencl_info::default_opencl_factory>
630 
631 template <typename T, typename Factory = opencl_info::default_opencl_factory>
633 public:
634  typedef cl_mem native_object_type;
636  typedef Factory opencl_factory_type;
637 
638  template<access_type a> using iterator = T*;
639 
640  template <access_type a>
641  iterator<a> access() const {
642  T* ptr = (T*)my_impl->get_host_ptr();
643  __TBB_ASSERT( ptr, NULL );
644  return iterator<a>( ptr );
645  }
646 
647  T* data() const { return &access<read_write>()[0]; }
648 
649  template <access_type a = read_write>
650  iterator<a> begin() const { return access<a>(); }
651 
652  template <access_type a = read_write>
653  iterator<a> end() const { return access<a>()+my_impl->size()/sizeof(T); }
654 
655  size_t size() const { return my_impl->size()/sizeof(T); }
656 
657  T& operator[] ( ptrdiff_t k ) { return begin()[k]; }
658 
660  opencl_buffer( size_t size );
661  opencl_buffer( Factory &f, size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), f ) ) {}
662 
663  cl_mem native_object() const {
664  return my_impl->get_cl_mem();
665  }
666 
667  const opencl_buffer& memory_object() const {
668  return *this;
669  }
670 
671  void send( opencl_device device, opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
672  __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
673  opencl_async_msg<void*, Factory> d = my_impl->send( device, dependency.get_event() );
674  const cl_event *e = d.get_event();
675  if ( e ) dependency.set_event( *e );
676  else dependency.clear_event();
677  }
678  void receive( const opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
679  __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
680  opencl_async_msg<void*, Factory> d = my_impl->receive( dependency.get_event() );
681  const cl_event *e = d.get_event();
682  if ( e ) dependency.set_event( *e );
683  else dependency.clear_event();
684  }
685 
686  opencl_subbuffer<T, Factory> subbuffer( size_t index, size_t size ) const;
687 private:
688  // The constructor for subbuffers.
689  opencl_buffer( Factory &f, cl_mem m, size_t index, size_t size ) : my_impl( std::make_shared<impl_type>( m, index*sizeof(T), size*sizeof(T), f ) ) {}
690 
692 
693  std::shared_ptr<impl_type> my_impl;
694 
695  friend bool operator==(const opencl_buffer<T, Factory> &lhs, const opencl_buffer<T, Factory> &rhs) {
696  return lhs.my_impl == rhs.my_impl;
697  }
698 
699  template <typename>
700  friend class opencl_factory;
701  template <typename, typename>
702  friend class opencl_subbuffer;
703 };
704 
705 template <typename T, typename Factory>
706 class opencl_subbuffer : public opencl_buffer<T, Factory> {
708 public:
710  opencl_subbuffer( const opencl_buffer<T, Factory> &owner, size_t index, size_t size ) :
711  opencl_buffer<T, Factory>( *owner.my_impl->factory(), owner.native_object(), index, size ), my_owner( owner ) {}
712 };
713 
714 template <typename T, typename Factory>
716  return opencl_subbuffer<T, Factory>( *this, index, size );
717 }
718 
719 
720 #define is_typedef(type) \
721  template <typename T> \
722  struct is_##type { \
723  template <typename C> \
724  static std::true_type check( typename C::type* ); \
725  template <typename C> \
726  static std::false_type check( ... ); \
727  \
728  static const bool value = decltype(check<T>(0))::value; \
729  }
730 
731 is_typedef( native_object_type );
732 is_typedef( memory_object_type );
733 
734 template <typename T>
735 typename std::enable_if<is_native_object_type<T>::value, typename T::native_object_type>::type get_native_object( const T &t ) {
736  return t.native_object();
737 }
738 
739 template <typename T>
741  return t;
742 }
743 
744 // send_if_memory_object checks if the T type has memory_object_type and call the send method for the object.
745 template <typename T, typename Factory>
747  const T &t = dmsg.data( false );
748  typedef typename T::memory_object_type mem_obj_t;
749  mem_obj_t mem_obj = t.memory_object();
751  if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
752  mem_obj.send( device, d );
753  if ( d.get_event() ) dmsg.set_event( *d.get_event() );
754 }
755 
756 template <typename T>
758  typedef typename T::memory_object_type mem_obj_t;
759  mem_obj_t mem_obj = t.memory_object();
761  mem_obj.send( device, dmsg );
762 }
763 
764 template <typename T>
766 
767 // receive_if_memory_object checks if the T type has memory_object_type and call the receive method for the object.
768 template <typename T, typename Factory>
770  const T &t = dmsg.data( false );
771  typedef typename T::memory_object_type mem_obj_t;
772  mem_obj_t mem_obj = t.memory_object();
774  if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
775  mem_obj.receive( d );
776  if ( d.get_event() ) dmsg.set_event( *d.get_event() );
777 }
778 
779 template <typename T>
781 
783 public:
784  typedef size_t range_index_type;
785  typedef std::array<range_index_type, 3> nd_range_type;
786 
787  template <typename G = std::initializer_list<int>, typename L = std::initializer_list<int>,
788  typename = typename std::enable_if<!std::is_same<typename std::decay<G>::type, opencl_range>::value>::type>
789  opencl_range(G&& global_work = std::initializer_list<int>({ 0 }), L&& local_work = std::initializer_list<int>({ 0, 0, 0 })) {
790  auto g_it = global_work.begin();
791  auto l_it = local_work.begin();
792  my_global_work_size = { size_t(-1), size_t(-1), size_t(-1) };
793  // my_local_work_size is still uninitialized
794  for (int s = 0; s < 3 && g_it != global_work.end(); ++g_it, ++l_it, ++s) {
795  __TBB_ASSERT(l_it != local_work.end(), "global_work & local_work must have same size");
796  my_global_work_size[s] = *g_it;
797  my_local_work_size[s] = *l_it;
798  }
799  }
800 
801  const nd_range_type& global_range() const { return my_global_work_size; }
802  const nd_range_type& local_range() const { return my_local_work_size; }
803 
804 private:
807 };
808 
809 template <typename DeviceFilter>
810 class opencl_factory {
811 public:
814 
816  public:
817  kernel( const kernel& k ) : my_factory( k.my_factory ) {
818  // Clone my_cl_kernel via opencl_program
819  size_t ret_size = 0;
820 
821  std::vector<char> kernel_name;
822  for ( size_t curr_size = 32;; curr_size <<= 1 ) {
823  kernel_name.resize( curr_size <<= 1 );
824  enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_FUNCTION_NAME, curr_size, kernel_name.data(), &ret_size ), "Failed to get kernel info" );
825  if ( ret_size < curr_size ) break;
826  }
827 
828  cl_program program;
829  enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, &ret_size ), "Failed to get kernel info" );
830  __TBB_ASSERT( ret_size == sizeof(program), NULL );
831 
832  my_cl_kernel = opencl_program< factory_type >( my_factory, program ).get_cl_kernel( kernel_name.data() );
833  }
834 
836  enforce_cl_retcode( clReleaseKernel( my_cl_kernel ), "Failed to release a kernel" );
837  }
838 
839  private:
841 
842  kernel( const cl_kernel& k, factory_type& f ) : my_cl_kernel( k ), my_factory( f ) {}
843 
844  // Data
845  cl_kernel my_cl_kernel;
847 
848  template <typename DeviceFilter_>
849  friend class opencl_factory;
850 
851  template <typename Factory>
852  friend class opencl_program;
853  };
854 
855  typedef kernel kernel_type;
856 
857  // 'range_type' enables kernel_executor with range support
858  // it affects expectations for enqueue_kernel(.....) interface method
860 
863  if ( my_devices.size() ) {
864  for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
865  enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
866  }
867  enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
868  }
869  }
870 
871  bool init( const opencl_device_list &device_list ) {
873  if ( !my_devices.size() ) {
874  my_devices = device_list;
875  return true;
876  }
877  return false;
878  }
879 
880 
881 private:
882  template <typename Factory>
884  cl_event const* e1 = dmsg.get_event();
885  cl_event e2;
886  cl_int err;
887  void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
888  e1 == NULL ? 0 : 1, e1, &e2, &err );
889  enforce_cl_retcode( err, "Failed to map a buffer" );
890  dmsg.data( false ) = ptr;
891  dmsg.set_event( e2 );
892  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
893  }
894 
895 
896  template <typename Factory>
898  cl_event const* e1 = dmsg.get_event();
899  cl_event e2;
901  clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
902  "Failed to unmap a buffer" );
903  dmsg.set_event( e2 );
904  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
905  }
906 
907  // --------- Kernel argument & event list helpers --------- //
908  template <size_t NUM_ARGS, typename T>
909  void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>&, int&, int& place, const T& t ) {
910  auto p = get_native_object(t);
911  enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
912  }
913 
914  template <size_t NUM_ARGS, typename T, typename F>
915  void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const opencl_async_msg<T, F>& msg ) {
916  __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
917 
918  const cl_event * const e = msg.get_event();
919  if (e != NULL) {
920  events[num_events++] = *e;
921  }
922 
923  process_one_arg( kernel, events, num_events, place, msg.data(false) );
924  }
925 
926  template <size_t NUM_ARGS, typename T, typename ...Rest>
927  void process_arg_list( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const T& t, const Rest&... args ) {
928  process_one_arg( kernel, events, num_events, place, t );
929  process_arg_list( kernel, events, num_events, place, args... );
930  }
931 
932  template <size_t NUM_ARGS>
933  void process_arg_list( const kernel_type&, std::array<cl_event, NUM_ARGS>&, int&, int& ) {}
934  // ------------------------------------------- //
935  template <typename T>
936  void update_one_arg( cl_event, T& ) {}
937 
938  template <typename T, typename F>
939  void update_one_arg( cl_event e, opencl_async_msg<T, F>& msg ) {
940  msg.set_event( e );
941  }
942 
943  template <typename T, typename ...Rest>
944  void update_arg_list( cl_event e, T& t, Rest&... args ) {
945  update_one_arg( e, t );
946  update_arg_list( e, args... );
947  }
948 
949  void update_arg_list( cl_event ) {}
950  // ------------------------------------------- //
951 public:
952  template <typename ...Args>
953  void send_kernel( opencl_device device, const kernel_type& kernel, const range_type& work_size, Args&... args ) {
954  std::array<cl_event, sizeof...(Args)> events;
955  int num_events = 0;
956  int place = 0;
957  process_arg_list( kernel, events, num_events, place, args... );
958 
959  const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
960 
961  update_arg_list(e, args...);
962 
963  // Release our own reference to cl_event
964  enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
965  }
966 
967  // ------------------------------------------- //
968  template <typename T, typename ...Rest>
969  void send_data(opencl_device device, T& t, Rest&... args) {
970  send_if_memory_object( device, t );
971  send_data( device, args... );
972  }
973 
975  // ------------------------------------------- //
976 
977 private:
978  cl_event send_kernel_impl( opencl_device device, const cl_kernel& kernel,
979  const range_type& work_size, cl_uint num_events, cl_event* event_list ) {
980  const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
981  const typename range_type::nd_range_type& g_size = work_size.global_range();
982  const typename range_type::nd_range_type& l_size = work_size.local_range();
983  cl_uint s;
984  for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
985  cl_event event;
987  clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
988  g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
989  "Failed to enqueue a kernel" );
990  return event;
991  }
992 
993  // ------------------------------------------- //
994  template <typename T>
995  bool get_event_from_one_arg( cl_event&, const T& ) {
996  return false;
997  }
998 
999  template <typename T, typename F>
1000  bool get_event_from_one_arg( cl_event& e, const opencl_async_msg<T, F>& msg) {
1001  cl_event const *e_ptr = msg.get_event();
1002 
1003  if ( e_ptr != NULL ) {
1004  e = *e_ptr;
1005  return true;
1006  }
1007 
1008  return false;
1009  }
1010 
1011  template <typename T, typename ...Rest>
1012  bool get_event_from_args( cl_event& e, const T& t, const Rest&... args ) {
1013  if ( get_event_from_one_arg( e, t ) ) {
1014  return true;
1015  }
1016 
1017  return get_event_from_args( e, args... );
1018  }
1019 
1020  bool get_event_from_args( cl_event& ) {
1021  return false;
1022  }
1023  // ------------------------------------------- //
1024 
1026  virtual ~finalize_fn() {}
1027  virtual void operator() () {}
1028  };
1029 
1030  template<typename Fn>
1031  struct finalize_fn_leaf : public finalize_fn {
1032  Fn my_fn;
1035  };
1036 
1037  static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data) {
1038  tbb::internal::suppress_unused_warning(event_command_exec_status);
1039  __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1040 
1041  finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1042  __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1043  (*fn_ptr)();
1044 
1045  // Function pointer was created by 'new' & this callback must be called once only
1046  delete fn_ptr;
1047  }
1048 public:
1049  template <typename FinalizeFn, typename ...Args>
1050  void finalize( opencl_device device, FinalizeFn fn, Args&... args ) {
1051  cl_event e;
1052 
1053  if ( get_event_from_args( e, args... ) ) {
1054  enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1055  new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1056  }
1057 
1058  enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1059  }
1060 
1062  std::call_once( my_once_flag, &opencl_factory::init_once, this );
1063  return my_devices;
1064  }
1065 
1066 private:
1069  // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
1070  if ( d1 != opencl_device::host && d2 != opencl_device::host )
1071  return true;
1072  return d1 == d2;
1073  }
1074 private:
1075  opencl_factory( const opencl_factory& );
1077 
1078  cl_context context() {
1079  std::call_once( my_once_flag, &opencl_factory::init_once, this );
1080  return my_cl_context;
1081  }
1082 
1083  void init_once() {
1084  {
1086  if (!my_devices.size())
1087  my_devices = DeviceFilter()( opencl_info::available_devices() );
1088  }
1089 
1090  enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1091  cl_platform_id platform_id = my_devices.begin()->platform_id();
1092  for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1093  enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1094 
1095  std::vector<cl_device_id> cl_device_ids;
1096  for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1097  cl_device_ids.push_back((*d).my_cl_device_id);
1098  }
1099 
1100  cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1101  cl_int err;
1102  cl_context ctx = clCreateContext(context_properties,
1103  (cl_uint)cl_device_ids.size(),
1104  cl_device_ids.data(),
1105  NULL, NULL, &err);
1106  enforce_cl_retcode(err, "Failed to create context");
1107  my_cl_context = ctx;
1108 
1109  size_t device_counter = 0;
1110  for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1111  (*d).my_device_id = device_counter++;
1112  cl_int err2;
1113  cl_command_queue cq;
1114 #if CL_VERSION_2_0
1115  if ((*d).major_version() >= 2) {
1116  if ((*d).out_of_order_exec_mode_on_host_present()) {
1117  cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1118  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1119  } else {
1120  cl_queue_properties props[] = { 0 };
1121  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1122  }
1123  } else
1124 #endif
1125  {
1126  cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1127  // Suppress "declared deprecated" warning for the next line.
1128 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1129 #pragma GCC diagnostic push
1130 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1131 #endif
1132 #if _MSC_VER || __INTEL_COMPILER
1133 #pragma warning( push )
1134 #if __INTEL_COMPILER
1135 #pragma warning (disable: 1478)
1136 #else
1137 #pragma warning (disable: 4996)
1138 #endif
1139 #endif
1140  cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1141 #if _MSC_VER || __INTEL_COMPILER
1142 #pragma warning( pop )
1143 #endif
1144 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1145 #pragma GCC diagnostic pop
1146 #endif
1147  }
1148  enforce_cl_retcode(err2, "Failed to create command queue");
1149  (*d).my_cl_command_queue = cq;
1150  }
1151  }
1152 
1153  std::once_flag my_once_flag;
1155  cl_context my_cl_context;
1156 
1158 
1159  template <typename Factory>
1160  friend class opencl_program;
1161  template <typename Factory>
1162  friend class opencl_buffer_impl;
1163  template <typename Factory>
1164  friend class opencl_memory;
1165 }; // class opencl_factory
1166 
1167 // TODO: consider this namespace as public API
1168 namespace opencl_info {
1169 
1170 // Default types
1171 
1172 template <typename Factory>
1175  __TBB_ASSERT(!f.devices().empty(), "No available devices");
1176  return *(f.devices().begin());
1177  }
1178 };
1179 
1182  opencl_device_list dl;
1183  cl_platform_id platform_id = devices.begin()->platform_id();
1184  for (opencl_device_list::const_iterator it = devices.cbegin(); it != devices.cend(); ++it) {
1185  if (it->platform_id() == platform_id) {
1186  dl.add(*it);
1187  }
1188  }
1189  return dl;
1190  }
1191 };
1192 
1193 class default_opencl_factory : public opencl_factory < default_device_filter >, tbb::internal::no_copy {
1194 public:
1196 
1198 
1199 private:
1200  default_opencl_factory() = default;
1201 };
1202 
1205  return default_factory;
1206 }
1207 
1208 } // namespace opencl_info
1209 
1210 template <typename T, typename Factory>
1211 opencl_buffer<T, Factory>::opencl_buffer( size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), opencl_info::default_factory() ) ) {}
1212 
1213 
1215  SOURCE,
1216  PRECOMPILED,
1217  SPIR
1218 };
1219 
1220 template <typename Factory = opencl_info::default_opencl_factory>
1222 public:
1223  typedef typename Factory::kernel_type kernel_type;
1224 
1225  opencl_program( Factory& factory, opencl_program_type type, const std::string& program_name ) : my_factory( factory ), my_type(type) , my_arg_str( program_name) {}
1226  opencl_program( Factory& factory, const char* program_name ) : opencl_program( factory, std::string( program_name ) ) {}
1227  opencl_program( Factory& factory, const std::string& program_name ) : opencl_program( factory, opencl_program_type::SOURCE, program_name ) {}
1228 
1229  opencl_program( opencl_program_type type, const std::string& program_name ) : opencl_program( opencl_info::default_factory(), type, program_name ) {}
1230  opencl_program( const char* program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1231  opencl_program( const std::string& program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1233 
1235  // Set my_do_once_flag to the called state.
1236  std::call_once( my_do_once_flag, [](){} );
1237  }
1238 
1239  kernel_type get_kernel( const std::string& k ) const {
1240  return kernel_type( get_cl_kernel(k), my_factory );
1241  }
1242 
1243 private:
1244  opencl_program( Factory& factory, cl_program program ) : my_factory( factory ), my_cl_program( program ) {
1245  // Set my_do_once_flag to the called state.
1246  std::call_once( my_do_once_flag, [](){} );
1247  }
1248 
1249  cl_kernel get_cl_kernel( const std::string& k ) const {
1250  std::call_once( my_do_once_flag, [this, &k](){ this->init( k ); } );
1251  cl_int err;
1252  cl_kernel kernel = clCreateKernel( my_cl_program, k.c_str(), &err );
1253  enforce_cl_retcode( err, std::string( "Failed to create kernel: " ) + k );
1254  return kernel;
1255  }
1256 
1257  class file_reader {
1258  public:
1259  file_reader( const std::string& filepath ) {
1260  std::ifstream file_descriptor( filepath, std::ifstream::binary );
1261  if ( !file_descriptor.is_open() ) {
1262  std::string str = std::string( "Could not open file: " ) + filepath;
1263  std::cerr << str << std::endl;
1264  throw str;
1265  }
1266  file_descriptor.seekg( 0, file_descriptor.end );
1267  size_t length = size_t( file_descriptor.tellg() );
1268  file_descriptor.seekg( 0, file_descriptor.beg );
1269  my_content.resize( length );
1270  char* begin = &*my_content.begin();
1271  file_descriptor.read( begin, length );
1272  file_descriptor.close();
1273  }
1274  const char* content() { return &*my_content.cbegin(); }
1275  size_t length() { return my_content.length(); }
1276  private:
1277  std::string my_content;
1278  };
1279 
1281  public:
1282  typedef void (CL_CALLBACK *cl_callback_type)(cl_program, void*);
1283  opencl_program_builder( Factory& f, const std::string& name, cl_program program,
1284  cl_uint num_devices, cl_device_id* device_list,
1285  const char* options, cl_callback_type callback,
1286  void* user_data ) {
1287  cl_int err = clBuildProgram( program, num_devices, device_list, options,
1288  callback, user_data );
1289  if( err == CL_SUCCESS )
1290  return;
1291  std::string str = std::string( "Failed to build program: " ) + name;
1292  if ( err == CL_BUILD_PROGRAM_FAILURE ) {
1293  const opencl_device_list &devices = f.devices();
1294  for ( auto d = devices.begin(); d != devices.end(); ++d ) {
1295  std::cerr << "Build log for device: " << (*d).name() << std::endl;
1296  size_t log_size;
1297  cl_int query_err = clGetProgramBuildInfo(
1298  program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
1299  &log_size );
1300  enforce_cl_retcode( query_err, "Failed to get build log size" );
1301  if( log_size ) {
1302  std::vector<char> output;
1303  output.resize( log_size );
1304  query_err = clGetProgramBuildInfo(
1305  program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG,
1306  output.size(), output.data(), NULL );
1307  enforce_cl_retcode( query_err, "Failed to get build output" );
1308  std::cerr << output.data() << std::endl;
1309  } else {
1310  std::cerr << "No build log available" << std::endl;
1311  }
1312  }
1313  }
1314  enforce_cl_retcode( err, str );
1315  }
1316  };
1317 
1319  public:
1320  template<typename Filter>
1321  opencl_device_filter( cl_uint& num_devices, cl_device_id* device_list,
1322  Filter filter, const char* message ) {
1323  for ( cl_uint i = 0; i < num_devices; ++i )
1324  if ( filter(device_list[i]) ) {
1325  device_list[i--] = device_list[--num_devices];
1326  }
1327  if ( !num_devices )
1328  enforce_cl_retcode( CL_DEVICE_NOT_AVAILABLE, message );
1329  }
1330  };
1331 
1332  void init( const std::string& ) const {
1333  cl_uint num_devices;
1334  enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_NUM_DEVICES, sizeof( num_devices ), &num_devices, NULL ),
1335  "Failed to get OpenCL context info" );
1336  if ( !num_devices )
1337  enforce_cl_retcode( CL_DEVICE_NOT_FOUND, "No supported devices found" );
1338  cl_device_id *device_list = (cl_device_id *)alloca( num_devices*sizeof( cl_device_id ) );
1339  enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_DEVICES, num_devices*sizeof( cl_device_id ), device_list, NULL ),
1340  "Failed to get OpenCL context info" );
1341  const char *options = NULL;
1342  switch ( my_type ) {
1344  file_reader fr( my_arg_str );
1345  const char *s[] = { fr.content() };
1346  const size_t l[] = { fr.length() };
1347  cl_int err;
1348  my_cl_program = clCreateProgramWithSource( my_factory.context(), 1, s, l, &err );
1349  enforce_cl_retcode( err, std::string( "Failed to create program: " ) + my_arg_str );
1351  num_devices, device_list,
1352  []( const opencl_device& d ) -> bool {
1353  return !d.compiler_available() || !d.linker_available();
1354  }, "No one device supports building program from sources" );
1356  my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1357  options, /*callback*/ NULL, /*user data*/NULL );
1358  break;
1359  }
1361  options = "-x spir";
1363  file_reader fr( my_arg_str );
1364  std::vector<const unsigned char*> s(
1365  num_devices, reinterpret_cast<const unsigned char*>(fr.content()) );
1366  std::vector<size_t> l( num_devices, fr.length() );
1367  std::vector<cl_int> bin_statuses( num_devices, -1 );
1368  cl_int err;
1369  my_cl_program = clCreateProgramWithBinary( my_factory.context(), num_devices,
1370  device_list, l.data(), s.data(),
1371  bin_statuses.data(), &err );
1372  if( err != CL_SUCCESS ) {
1373  std::string statuses_str;
1374  for (auto st = bin_statuses.begin(); st != bin_statuses.end(); ++st) {
1375  statuses_str += std::to_string((*st));
1376  }
1377 
1378  enforce_cl_retcode( err, std::string( "Failed to create program, error " + std::to_string( err ) + " : " ) + my_arg_str +
1379  std::string( ", binary_statuses = " ) + statuses_str );
1380  }
1382  my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1383  options, /*callback*/ NULL, /*user data*/NULL );
1384  break;
1385  }
1386  default:
1387  __TBB_ASSERT( false, "Unsupported program type" );
1388  }
1389  }
1390 
1391  Factory& my_factory;
1393  std::string my_arg_str;
1394  mutable cl_program my_cl_program;
1395  mutable std::once_flag my_do_once_flag;
1396 
1397  template <typename DeviceFilter>
1398  friend class opencl_factory;
1399 
1400  template <typename DeviceFilter>
1401  friend class opencl_factory<DeviceFilter>::kernel;
1402 };
1403 
1404 template<typename... Args>
1406 
1407 template<typename JP, typename Factory, typename... Ports>
1408 class opencl_node< tuple<Ports...>, JP, Factory > : public streaming_node< tuple<Ports...>, JP, Factory > {
1409  typedef streaming_node < tuple<Ports...>, JP, Factory > base_type;
1410 public:
1411  typedef typename base_type::kernel_type kernel_type;
1412 
1413  opencl_node( graph &g, const kernel_type& kernel )
1414  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1415  {
1416  tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1417  }
1418 
1419  opencl_node( graph &g, const kernel_type& kernel, Factory &f )
1420  : base_type( g, kernel, opencl_info::default_device_selector <Factory >(), f )
1421  {
1422  tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1423  }
1424 
1425  template <typename DeviceSelector>
1426  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d, Factory &f)
1427  : base_type( g, kernel, d, f)
1428  {
1429  tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1430  }
1431 };
1432 
1433 template<typename JP, typename... Ports>
1434 class opencl_node< tuple<Ports...>, JP > : public opencl_node < tuple<Ports...>, JP, opencl_info::default_opencl_factory > {
1436 public:
1437  typedef typename base_type::kernel_type kernel_type;
1438 
1439  opencl_node( graph &g, const kernel_type& kernel )
1440  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1441  {}
1442 
1443  template <typename DeviceSelector>
1444  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1445  : base_type( g, kernel, d, opencl_info::default_factory() )
1446  {}
1447 };
1448 
1449 template<typename... Ports>
1450 class opencl_node< tuple<Ports...> > : public opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > {
1452 public:
1453  typedef typename base_type::kernel_type kernel_type;
1454 
1455  opencl_node( graph &g, const kernel_type& kernel )
1456  : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1457  {}
1458 
1459  template <typename DeviceSelector>
1460  opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1461  : base_type( g, kernel, d, opencl_info::default_factory() )
1462  {}
1463 };
1464 
1465 } // namespace interfaceX
1466 
1467 using interface11::opencl_node;
1471 using interface11::opencl_buffer;
1472 using interface11::opencl_subbuffer;
1473 using interface11::opencl_device;
1474 using interface11::opencl_device_list;
1475 using interface11::opencl_program;
1477 using interface11::opencl_async_msg;
1478 using interface11::opencl_factory;
1479 using interface11::opencl_range;
1480 
1481 } // namespace flow
1482 } // namespace tbb
1483 #endif /* __TBB_PREVIEW_OPENCL_NODE */
1484 
1486 #undef __TBB_flow_graph_opencl_node_H_include_area
1487 
1488 #endif // __TBB_flow_graph_opencl_node_H
const opencl_buffer & memory_object() const
const nd_range_type & local_range() const
bool get_event_from_one_arg(cl_event &, const T &)
kernel(const cl_kernel &k, factory_type &f)
static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data)
opencl_buffer(Factory &f, cl_mem m, size_t index, size_t size)
cl_kernel get_cl_kernel(const std::string &k) const
opencl_buffer_impl(cl_mem m, size_t index, size_t size, Factory &f)
static void CL_CALLBACK register_callback_func(cl_event, cl_int event_command_exec_status, void *data)
friend bool operator==(const opencl_buffer< T, Factory > &lhs, const opencl_buffer< T, Factory > &rhs)
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
std::enable_if< is_native_object_type< T >::value, typename T::native_object_type >::type get_native_object(const T &t)
#define CODEPTR()
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
T device_info(cl_device_id d, cl_device_info i)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
void send_kernel(opencl_device device, const kernel_type &kernel, const range_type &work_size, Args &... args)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
Release.
Definition: atomic.h:59
void update_one_arg(cl_event e, opencl_async_msg< T, F > &msg)
const opencl_device_list & available_devices()
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp begin
opencl_async_msg< void *, Factory > send(opencl_device device, const cl_event *e)
bool is_same_context(opencl_device::device_id_type d1, opencl_device::device_id_type d2)
bool get_event_from_one_arg(cl_event &e, const opencl_async_msg< T, F > &msg)
bool init(const opencl_device_list &device_list)
bool built_in_kernel_available(const std::string &k) const
opencl_program(Factory &factory, opencl_program_type type, const std::string &program_name)
void finalize(opencl_device device, FinalizeFn fn, Args &... args)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
Base class for types that should not be assigned.
Definition: tbb_stddef.h:322
The graph class.
opencl_program(Factory &factory, const char *program_name)
std::array< range_index_type, 3 > nd_range_type
virtual void map_memory(opencl_device, opencl_async_msg< void *, Factory > &)=0
T event_info(cl_event e, cl_event_info i)
void enqueue_map_buffer(opencl_device device, opencl_buffer_impl< Factory > &buffer, opencl_async_msg< void *, Factory > &dmsg)
opencl_async_msg(const T &data, cl_event event)
kernel_type get_kernel(const std::string &k) const
K key_from_message(const opencl_async_msg< T, Factory > &dmsg)
std::shared_ptr< tbb::atomic< bool > > my_callback_flag_ptr
opencl_program(Factory &factory, cl_program program)
#define __TBB_STATIC_ASSERT(condition, msg)
Definition: tbb_stddef.h:553
opencl_async_msg< void *, Factory > receive(const cl_event *e)
friend bool operator==(opencl_device d1, opencl_device d2)
opencl_subbuffer< T, Factory > subbuffer(size_t index, size_t size) const
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
void receive(const opencl_async_msg< opencl_buffer, Factory > &dependency) const
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const opencl_async_msg< T, F > &msg)
void const char const char int ITT_FORMAT __itt_group_sync x void const char * name
std::enable_if< is_memory_object_type< T >::value >::type receive_if_memory_object(const opencl_async_msg< T, Factory > &dmsg)
void move(tbb_thread &t1, tbb_thread &t2)
Definition: tbb_thread.h:319
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
No ordering.
Definition: atomic.h:61
opencl_async_msg & operator=(const opencl_async_msg &dmsg)
is_typedef(native_object_type)
void const char const char int ITT_FORMAT __itt_group_sync s
opencl_program(Factory &factory, const std::string &program_name)
opencl_node< tuple< Ports... >, queueing, opencl_info::default_opencl_factory > base_type
std::array< size_t, 3 > max_work_item_sizes() const
opencl_device(cl_device_id cl_d_id, device_id_type device_id)
std::enable_if< is_memory_object_type< T >::value >::type send_if_memory_object(opencl_device device, opencl_async_msg< T, Factory > &dmsg)
opencl_range(G &&global_work=std::initializer_list< int >({ 0 }), L &&local_work=std::initializer_list< int >({ 0, 0, 0 }))
Represents acquisition of a mutex.
Definition: spin_mutex.h:53
T platform_info(cl_platform_id p, cl_platform_info i)
opencl_device_list operator()(const opencl_device_list &devices)
bool extension_available(const std::string &ext) const
opencl_device_list(std::initializer_list< opencl_device > il)
Base class for types that should not be copied or assigned.
Definition: tbb_stddef.h:330
void enqueue_unmap_buffer(opencl_device device, opencl_memory< Factory > &memory, opencl_async_msg< void *, Factory > &dmsg)
opencl_subbuffer(const opencl_buffer< T, Factory > &owner, size_t index, size_t size)
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)
opencl_buffer_impl< Factory > impl_type
opencl_program(opencl_program_type type, const std::string &program_name)
void set_command_queue(cl_command_queue cmd_queue)
#define __TBB_override
Definition: tbb_stddef.h:240
Acquire.
Definition: atomic.h:57
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d, Factory &f)
K key_from_message(const T &t)
Definition: flow_graph.h:713
void info(cl_device_info i, T &t) const
A lock that occupies a single byte.
Definition: spin_mutex.h:39
void send(opencl_device device, opencl_async_msg< opencl_buffer, Factory > &dependency) const
void suppress_unused_warning(const T1 &)
Utility template function to prevent "unused" warnings by various compilers.
Definition: tbb_stddef.h:398
opencl_async_msg(const opencl_async_msg &dmsg)
opencl_node< tuple< Ports... >, JP, opencl_info::default_opencl_factory > base_type
opencl_factory & operator=(const opencl_factory &)
const nd_range_type & global_range() const
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
cl_event send_kernel_impl(opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
default_opencl_factory & default_factory()
static void fgt_multiinput_multioutput_node(void *, string_index, void *, void *)
opencl_program(const std::string &program_name)
void update_arg_list(cl_event e, T &t, Rest &... args)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s __itt_frame ITT_FORMAT p const char const char ITT_FORMAT s __itt_counter ITT_FORMAT p __itt_counter unsigned long long ITT_FORMAT lu const wchar_t ITT_FORMAT S __itt_mark_type const wchar_t ITT_FORMAT S __itt_mark_type const char ITT_FORMAT s __itt_mark_type ITT_FORMAT d __itt_caller ITT_FORMAT p __itt_caller ITT_FORMAT p no args const __itt_domain __itt_clock_domain unsigned long long __itt_id ITT_FORMAT lu const __itt_domain __itt_clock_domain unsigned long long __itt_id __itt_id void * fn
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
void map_memory(opencl_device device, opencl_async_msg< void *, Factory > &dmsg) __TBB_override
void enforce_cl_retcode(cl_int err, std::string msg)
opencl_device_filter(cl_uint &num_devices, cl_device_id *device_list, Filter filter, const char *message)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
void const char const char int ITT_FORMAT __itt_group_sync p
void process_arg_list(const kernel_type &, std::array< cl_event, NUM_ARGS > &, int &, int &)
tbb::atomic< opencl_device::device_id_type > my_curr_device_id
void send_data(opencl_device device, T &t, Rest &... args)
opencl_program_builder(Factory &f, const std::string &name, cl_program program, cl_uint num_devices, cl_device_id *device_list, const char *options, cl_callback_type callback, void *user_data)
A stage in a pipeline.
Definition: pipeline.h:64
The graph related classes and functions.

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.