32 #include "_ccl_abstract_wrapper.h"
65 static void ccl_kernel_release_fields(
CCLKernel* krnl) {
68 g_return_if_fail(krnl != NULL);
71 if (krnl->args != NULL)
72 g_hash_table_destroy(krnl->args);
122 g_return_val_if_fail((err) == NULL || *(err) == NULL, NULL);
125 g_return_val_if_fail(prg != NULL, NULL);
128 g_return_val_if_fail(kernel_name != NULL, NULL);
137 cl_kernel kernel = NULL;
141 kernel_name, &ocl_status);
143 CL_SUCCESS != ocl_status, ocl_status, error_handler,
144 "%s: unable to create kernel (OpenCL error %d: %s).",
145 CCL_STRD, ocl_status,
ccl_err(ocl_status));
151 g_assert(err == NULL || *err == NULL);
157 g_assert(err == NULL || *err != NULL);
180 (ccl_wrapper_release_fields) ccl_kernel_release_fields,
181 (ccl_wrapper_release_cl_object) clReleaseKernel, NULL);
208 g_return_if_fail(krnl != NULL);
211 if (krnl->args == NULL) {
212 krnl->args = g_hash_table_new_full(g_direct_hash,
213 g_direct_equal, NULL, (GDestroyNotify) ccl_arg_destroy);
217 g_hash_table_replace(krnl->args, GUINT_TO_POINTER(arg_index),
253 g_return_if_fail(krnl != NULL);
258 void** args_array = NULL;
265 va_start(args_va, krnl);
268 aux_arg = va_arg(args_va,
void*);
272 if (aux_arg != NULL) {
276 while (aux_arg != NULL) {
278 aux_arg = va_arg(args_va,
void*);
284 args_array = g_slice_alloc((num_args + 1) *
sizeof(
void*));
285 va_start(args_va, krnl);
287 for (guint i = 0; i < num_args; ++i) {
288 aux_arg = va_arg(args_va,
void*);
289 args_array[i] = aux_arg;
292 args_array[num_args] = NULL;
303 g_slice_free1((num_args + 1) *
sizeof(
void*), args_array);
337 g_return_if_fail(krnl != NULL);
339 g_return_if_fail(args != NULL);
342 for (guint i = 0; args[i] != NULL; ++i) {
394 cl_uint work_dim,
const size_t* global_work_offset,
395 const size_t* global_work_size,
const size_t* local_work_size,
399 g_return_val_if_fail(krnl != NULL, NULL);
401 g_return_val_if_fail(cq != NULL, NULL);
403 g_return_val_if_fail(err == NULL || *err == NULL, NULL);
415 gpointer arg_index_ptr, arg_ptr;
418 if (krnl->args != NULL) {
419 g_hash_table_iter_init(&iter, krnl->args);
420 while (g_hash_table_iter_next(&iter, &arg_index_ptr, &arg_ptr)) {
421 cl_uint arg_index = GPOINTER_TO_UINT(arg_index_ptr);
424 ccl_arg_size(arg), ccl_arg_value(arg));
426 CL_SUCCESS != ocl_status, ocl_status, error_handler,
427 "%s: unable to set kernel arg %d (OpenCL error %d: %s).",
428 CCL_STRD, arg_index, ocl_status,
ccl_err(ocl_status));
429 g_hash_table_iter_remove(&iter);
436 global_work_size, local_work_size,
437 ccl_event_wait_list_get_num_events(evt_wait_lst),
438 ccl_event_wait_list_get_clevents(evt_wait_lst), &event);
440 CL_SUCCESS != ocl_status, ocl_status, error_handler,
441 "%s: unable to enqueue kernel (OpenCL error %d: %s).",
442 CCL_STRD, ocl_status,
ccl_err(ocl_status));
447 evt = ccl_queue_produce_event(cq, event);
453 g_assert(err == NULL || *err == NULL);
459 g_assert(err == NULL || *err != NULL);
517 cl_uint work_dim,
const size_t* global_work_offset,
518 const size_t* global_work_size,
const size_t* local_work_size,
522 g_return_val_if_fail(krnl != NULL, NULL);
524 g_return_val_if_fail(cq != NULL, NULL);
526 g_return_val_if_fail(err == NULL || *err == NULL, NULL);
533 void** args_array = NULL;
540 va_start(args_va, err);
543 aux_arg = va_arg(args_va,
void*);
547 if (aux_arg != NULL) {
551 while (aux_arg != NULL) {
553 aux_arg = va_arg(args_va,
void*);
559 args_array = g_slice_alloc((num_args + 1) *
sizeof(
void*));
560 va_start(args_va, err);
562 for (guint i = 0; i < num_args; ++i) {
563 aux_arg = va_arg(args_va,
void*);
564 args_array[i] = aux_arg;
567 args_array[num_args] = NULL;
573 global_work_offset, global_work_size, local_work_size,
574 evt_wait_lst, args_array, err);
580 g_slice_free1((num_args + 1) *
sizeof(
void*), args_array);
636 CCLQueue* cq, cl_uint work_dim,
const size_t* global_work_offset,
637 const size_t* global_work_size,
const size_t* local_work_size,
641 g_return_val_if_fail(krnl != NULL, NULL);
643 g_return_val_if_fail(cq != NULL, NULL);
645 g_return_val_if_fail(err == NULL || *err == NULL, NULL);
647 CCLErr* err_internal = NULL;
656 global_work_size, local_work_size, evt_wait_lst, &err_internal);
660 g_assert(err == NULL || *err == NULL);
666 g_assert(err == NULL || *err != NULL);
707 void (CL_CALLBACK * user_func)(
void*),
void* args,
size_t cb_args,
708 cl_uint num_mos,
CCLMemObj*
const* mo_list,
713 g_return_val_if_fail(cq != NULL, NULL);
715 g_return_val_if_fail(user_func != NULL, NULL);
718 g_return_val_if_fail(((num_mos == 0) && (mo_list == NULL))
719 || ((num_mos > 0) && (mo_list != NULL)), NULL);
721 g_return_val_if_fail(err == NULL || *err == NULL, NULL);
726 cl_event
event = NULL;
730 cl_mem* mem_list = NULL;
734 mem_list = g_slice_alloc(
sizeof(cl_mem) * num_mos);
735 for (cl_uint i = 0; i < num_mos; ++i) {
736 mem_list[i] = mo_list[i] != NULL
744 args, cb_args, num_mos, (
const cl_mem*) mem_list, args_mem_loc,
745 ccl_event_wait_list_get_num_events(evt_wait_lst),
746 ccl_event_wait_list_get_clevents(evt_wait_lst), &event);
748 CL_SUCCESS != ocl_status, ocl_status, error_handler,
749 "%s: unable to enqueue native kernel (OpenCL error %d: %s).",
750 CCL_STRD, ocl_status,
ccl_err(ocl_status));
755 evt = ccl_queue_produce_event(cq, event);
761 g_assert(err == NULL || *err == NULL);
767 g_assert(err == NULL || *err != NULL);
773 g_slice_free1(
sizeof(cl_mem) * num_mos, mem_list);
802 g_return_val_if_fail(krnl != NULL, 0);
804 g_return_val_if_fail(err == NULL || *err == NULL, 0);
808 CCLErr* err_internal = NULL;
813 krnl, CL_KERNEL_CONTEXT, cl_context, &err_internal);
827 g_assert(err == NULL || *err == NULL);
833 g_assert(err == NULL || *err != NULL);
854 #define ccl_if_err_not_info_unavailable_propagate_goto( \
855 err, err_internal, error_handler) \
856 if (((err_internal) != NULL) && ((err_internal)->domain == CCL_ERROR) && \
857 ((err_internal)->code == CCL_ERROR_INFO_UNAVAILABLE_OCL)) { \
858 g_warning("In %s: %s", CCL_STRD, (err_internal)->message); \
859 g_clear_error(&(err_internal)); \
861 ccl_if_err_propagate_goto(err, err_internal, error_handler); \
904 cl_uint dims,
const size_t* real_worksize,
size_t* gws,
size_t* lws,
908 g_return_val_if_fail(dev != NULL, CL_FALSE);
910 g_return_val_if_fail(dims > 0, CL_FALSE);
912 g_return_val_if_fail(real_worksize != NULL, CL_FALSE);
914 g_return_val_if_fail(lws != NULL, CL_FALSE);
916 g_return_val_if_fail(err == NULL || *err == NULL, CL_FALSE);
919 size_t wg_size_mult = 0;
920 size_t wg_size_max = 0;
921 size_t wg_size = 1, wg_size_aux;
922 size_t* max_wi_sizes;
928 CCLErr* err_internal = NULL;
932 dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint, &err_internal);
936 "%s: device only supports a maximum of %d dimension(s), "
937 "but %d were requested.",
938 CCL_STRD, dev_dims, dims);
942 dev, CL_DEVICE_MAX_WORK_ITEM_SIZES,
size_t*, &err_internal);
948 for (cl_uint i = 0; i < dims; ++i) {
950 max_wi_sizes[i] = MIN(max_wi_sizes[i], lws[i]);
959 CL_KERNEL_WORK_GROUP_SIZE,
size_t, &err_internal);
960 ccl_if_err_not_info_unavailable_propagate_goto(
961 err, err_internal, error_handler);
963 #ifdef CL_VERSION_1_1
972 if (ocl_ver >= 110) {
976 krnl, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
977 size_t, &err_internal);
978 ccl_if_err_not_info_unavailable_propagate_goto(
979 err, err_internal, error_handler);
984 wg_size_mult = wg_size_max;
990 wg_size_mult = wg_size_max;
999 if ((wg_size_max == 0) && (wg_size_mult == 0)) {
1001 dev, CL_DEVICE_MAX_WORK_GROUP_SIZE,
size_t, &err_internal);
1003 wg_size_mult = wg_size_max;
1007 for (cl_uint i = 0; i < dims; ++i) {
1011 lws[i] = MIN(wg_size_mult, max_wi_sizes[i]);
1017 real_ws *= real_worksize[i];
1024 for (cl_uint i = 0; i < dims; ++i) {
1025 while (lws[i] > real_worksize[i]) {
1033 while (wg_size > wg_size_max) {
1034 wg_size_aux = wg_size;
1035 for (
int i = dims - 1; i >= 0; --i) {
1041 if (wg_size <= wg_size_max)
break;
1047 "%s: Unable to determine a work size within the device limit (%d).",
1048 CCL_STRD, (
int) wg_size_max);
1055 for (cl_uint i = 0; i < dims; ++i) {
1056 gws[i] = ((real_worksize[i] / lws[i])
1057 + (((real_worksize[i] % lws[i]) > 0) ? 1 : 0))
1066 cl_bool lws_are_divisors = CL_TRUE;
1067 for (cl_uint i = 0; i < dims; ++i) {
1069 if (real_worksize[i] % lws[i] != 0) {
1072 lws_are_divisors = CL_FALSE;
1077 if (!lws_are_divisors) {
1080 for (cl_uint i = 0; i < dims; ++i) {
1084 if ((real_worksize[i] % lws[i] != 0)
1085 || (lws[i] * wg_size > wg_size_max))
1090 cl_uint best_lws_i = 1;
1091 for (cl_uint j = 2; j <= real_worksize[i] / 2; ++j) {
1095 if ((wg_size * j > wg_size_max)
1096 || (j > max_wi_sizes[i]))
break;
1100 if (real_worksize[i] % j == 0)
1104 lws[i] = best_lws_i;
1113 g_assert(err == NULL || *err == NULL);
1114 ret_status = CL_TRUE;
1120 g_assert(err == NULL || *err != NULL);
1121 ret_status = CL_FALSE;
1130 #ifdef CL_VERSION_1_2
1151 cl_int ccl_kernel_get_arg_info_adapter(cl_kernel kernel,
void* ptr_arg_indx,
1152 cl_kernel_arg_info param_name,
size_t param_value_size,
void *param_value,
1153 size_t* param_value_size_ret) {
1155 return clGetKernelArgInfo(kernel, GPOINTER_TO_UINT(ptr_arg_indx),
1156 param_name, param_value_size, param_value, param_value_size_ret);
1179 cl_kernel_arg_info param_name,
CCLErr** err) {
1182 g_return_val_if_fail(krnl != NULL, NULL);
1191 CCLErr* err_internal = NULL;
1196 #ifndef CL_VERSION_1_2
1208 "%s: Obtaining kernel argument information requires cf4ocl to be "
1209 "deployed with support for OpenCL version 1.2 or newer.",
1221 "%s: information about kernel arguments requires OpenCL" \
1222 " version 1.2 or newer.", CCL_STRD);
1225 fake_wrapper.cl_object = GUINT_TO_POINTER(idx);
1229 (
CCLWrapper*) krnl, &fake_wrapper, param_name, 0,
1236 g_assert(err == NULL || *err == NULL);
1242 g_assert(err == NULL || *err != NULL);
void ccl_kernel_destroy(CCLKernel *krnl)
Decrements the reference count of the kernel wrapper object.
CCLEvent * ccl_kernel_set_args_and_enqueue_ndrange_v(CCLKernel *krnl, CCLQueue *cq, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, CCLEventWaitList *evt_wait_lst, void **args, CCLErr **err)
Set kernel arguments and enqueue it for execution on a device.
#define CCL_OCL_ERROR
Resolves to error category identifying string, in this case an error in the OpenCL library...
#define ccl_kernel_get_workgroup_info_scalar(krnl, dev, param_name, param_type, err)
Macro which returns a scalar kernel workgroup information value.
Definition of a wrapper class and its methods for OpenCL kernel objects.
#define ccl_if_err_create_goto(err, quark, error_condition, error_code, label, msg,...)
If error is detected (error_code != no_error_code), create an error object (CCLErr) and go to the spe...
const CCLArg * ccl_arg_skip
Use this constant to skip kernel arguments in the ccl_kernel_set_args(), ccl_kernel_set_args_v(), ccl_kernel_set_args_and_enqueue_ndrange() and ccl_kernel_set_args_and_enqueue_ndrange_v() functions.
GPtrArray * CCLEventWaitList
A list of event objects on which enqueued commands can wait.
Useful definitions used internally by cf4ocl.
#define ccl_memobj_unwrap(mo)
Get the OpenCL cl_mem object.
CCLWrapperInfo * ccl_kernel_get_arg_info(CCLKernel *krnl, cl_uint idx, cl_kernel_arg_info param_name, CCLErr **err)
Get a CCLWrapperInfo kernel argument information object.
The context wrapper class.
#define ccl_if_err_propagate_goto(err_dest, err_src, label)
Same as ccl_if_err_goto(), but rethrows error in a source CCLErr object to a new destination CCLErr o...
cl_uint ccl_kernel_get_opencl_version(CCLKernel *krnl, CCLErr **err)
Get the OpenCL version of the platform associated with this kernel.
Base class for memory object wrappers, i.e., CCLBuffer and CCLImage.
Command queue wrapper class.
const char * ccl_err(int code)
Convert OpenCL error code to a readable string.
void ccl_event_wait_list_clear(CCLEventWaitList *evt_wait_lst)
Clears an event wait list.
CCLContext * ccl_context_new_wrap(cl_context context)
Get the context wrapper for the given OpenCL context.
cl_uint ccl_context_get_opencl_version(CCLContext *ctx, CCLErr **err)
Get the OpenCL version of the platform associated with this context.
void ccl_kernel_set_args(CCLKernel *krnl,...)
Set all kernel arguments.
cl_bool ccl_kernel_suggest_worksizes(CCLKernel *krnl, CCLDevice *dev, cl_uint dims, const size_t *real_worksize, size_t *gws, size_t *lws, CCLErr **err)
Suggest appropriate local (and optionally global) work sizes for the given real work size...
#define CCL_ERROR
Resolves to error category identifying string, in this case an error in cf4ocl.
CCLWrapperInfo * ccl_wrapper_get_info(CCLWrapper *wrapper1, CCLWrapper *wrapper2, cl_uint param_name, size_t min_size, CCLInfo info_type, cl_bool use_cache, CCLErr **err)
Get information about any wrapped OpenCL object.
Definition of a wrapper class and its methods for OpenCL program objects.
Base class for all OpenCL wrappers.
#define ccl_kernel_get_info_scalar(krnl, param_name, param_type, err)
Macro which returns a scalar kernel information value.
#define ccl_context_unref(ctx)
Alias to ccl_context_destroy().
#define CCL_UNUSED(x)
Macro to avoid warning in unused variables.
#define ccl_kernel_unwrap(krnl)
Get the OpenCL kernel object.
#define ccl_device_get_info_array(dev, param_name, param_type, err)
Macro which returns an array device information value.
CCLEvent * ccl_kernel_enqueue_native(CCLQueue *cq, void(*user_func)(void *), void *args, size_t cb_args, cl_uint num_mos, CCLMemObj *const *mo_list, const void **args_mem_loc, CCLEventWaitList *evt_wait_lst, CCLErr **err)
Enqueues a command to execute a native C/C++ function not compiled using the OpenCL compiler...
#define ccl_device_get_info_scalar(dev, param_name, param_type, err)
Macro which returns a scalar device information value.
CCLKernel * ccl_kernel_new_wrap(cl_kernel kernel)
Get the kernel wrapper for the given OpenCL kernel.
CCLEvent * ccl_kernel_enqueue_ndrange(CCLKernel *krnl, CCLQueue *cq, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, CCLEventWaitList *evt_wait_lst, CCLErr **err)
Enqueues a kernel for execution on a device.
Class which represents information about a wrapped OpenCL object.
void ccl_kernel_set_arg(CCLKernel *krnl, cl_uint arg_index, void *arg)
Set one kernel argument.
#define ccl_program_unwrap(prg)
Get the OpenCL program object.
GError CCLErr
Error handling class.
Request information about kernel arguments.
void ccl_kernel_set_args_v(CCLKernel *krnl, void **args)
Set all kernel arguments.
#define ccl_queue_unwrap(cq)
Get the OpenCL command queue object.
The operation is not supported by the version of the selected OpenCL platform.
CCLKernel * ccl_kernel_new(CCLProgram *prg, const char *kernel_name, CCLErr **err)
Create a new kernel wrapper object.
CCLEvent * ccl_kernel_set_args_and_enqueue_ndrange(CCLKernel *krnl, CCLQueue *cq, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, CCLEventWaitList *evt_wait_lst, CCLErr **err,...)
Set kernel arguments and enqueue it for execution on a device.