41 #define CCL_PROF_CMP_INT(x, y, ord) (((ord) == CCL_PROF_SORT_ASC) \
42 ? (((x) > (y)) ? 1 : (((x) < (y)) ? -1 : 0)) \
43 : (((x) < (y)) ? 1 : (((x) > (y)) ? -1 : 0)))
54 #define CCL_PROF_CMP_STR(s1, s2, ord) (((ord) == CCL_PROF_SORT_ASC) \
55 ? g_strcmp0(s1, s2) : g_strcmp0(s2, s1))
66 #define ccl_prof_get_sort(userdata) \
67 {0x0F & *((int*) userdata), 0xF0 & *((int*) userdata)}
74 typedef struct ccl_prof_sort_data {
105 GHashTable* event_names;
111 GHashTable* event_name_ids;
178 cl_ulong total_events_time;
185 cl_ulong total_events_eff_time;
214 .zero_start = CL_TRUE
232 static CCLProfInst* ccl_prof_inst_new(
const char* event_name,
233 const char* queue_name, cl_uint
id, cl_ulong instant,
258 static void ccl_prof_inst_destroy(
CCLProfInst* instant) {
260 g_return_if_fail(instant != NULL);
278 static gint ccl_prof_inst_comp(
279 gconstpointer a, gconstpointer b, gpointer userdata) {
284 CCLProfSort sort = ccl_prof_get_sort(userdata);
294 result = CCL_PROF_CMP_INT(ev_inst1->
id, ev_inst2->
id,
296 if (result != 0)
return result;
298 return sort.order ? 1 : -1;
300 return sort.order ? -1 : 1;
303 g_return_val_if_reached(0);
316 static CCLProfAgg* ccl_prof_agg_new(
const char* event_name) {
330 static void ccl_prof_agg_destroy(
CCLProfAgg* agg) {
331 g_return_if_fail(agg != NULL);
348 static gint ccl_prof_agg_comp(
349 gconstpointer a, gconstpointer b, gpointer userdata) {
354 CCLProfSort sort = ccl_prof_get_sort(userdata);
371 g_return_val_if_reached(0);
397 static CCLProfInfo* ccl_prof_info_new(
const char* event_name,
398 cl_command_type command_type,
const char* queue_name,
399 cl_ulong t_queued, cl_ulong t_submit, cl_ulong t_start,
423 static void ccl_prof_info_destroy(
CCLProfInfo* info) {
424 g_return_if_fail(info != NULL);
441 static gint ccl_prof_info_comp(
442 gconstpointer a, gconstpointer b, gpointer userdata) {
447 CCLProfSort sort = ccl_prof_get_sort(userdata);
478 return CCL_PROF_CMP_INT(ev1->
t_end, ev2->
t_end, sort.order);
482 g_return_val_if_reached(0);
498 static CCLProfOverlap* ccl_prof_overlap_new(
const char* event1_name,
499 const char* event2_name, cl_ulong duration) {
521 g_return_if_fail(ovlp != NULL);
538 static gint ccl_prof_overlap_comp(
539 gconstpointer a, gconstpointer b, gpointer userdata) {
546 CCLProfSort sort = ccl_prof_get_sort(userdata);
567 g_return_val_if_reached(0);
584 static void ccl_prof_add_event(
CCLProf* prof,
const char* cq_name,
588 g_return_if_fail(err == NULL || *err == NULL);
590 g_return_if_fail(prof != NULL);
592 g_return_if_fail(cq_name != NULL);
594 g_return_if_fail(evt != NULL);
597 cl_uint* event_name_id;
601 cl_ulong instant_queued, instant_submit, instant_start, instant_end;
603 cl_command_type command_type;
608 CCLErr* err_internal = NULL;
611 const char* event_name;
618 evt, CL_PROFILING_COMMAND_QUEUED, cl_ulong, &err_internal);
623 evt, CL_PROFILING_COMMAND_SUBMIT, cl_ulong, &err_internal);
628 evt, CL_PROFILING_COMMAND_START, cl_ulong, &err_internal);
633 evt, CL_PROFILING_COMMAND_END, cl_ulong, &err_internal);
638 evt, CL_EVENT_COMMAND_TYPE, cl_command_type, &err_internal);
643 event_id = ++prof->num_events;
647 if (!g_hash_table_contains(prof->event_names, event_name)) {
650 GUINT_TO_POINTER(g_hash_table_size(prof->event_names));
653 (gpointer) event_name,
654 (gpointer) event_name_id);
658 if (instant_end > instant_start) {
661 evinst_start = ccl_prof_inst_new(event_name, cq_name, event_id,
663 prof->instants = g_list_prepend(
664 prof->instants, (gpointer) evinst_start);
667 evinst_end = ccl_prof_inst_new(event_name, cq_name, event_id,
669 prof->instants = g_list_prepend(
670 prof->instants, (gpointer) evinst_end);
673 if (instant_start < prof->t_start)
674 prof->t_start = instant_start;
678 g_info(
"Event '%s' did not use device time. As such its "\
679 "start and end instants will not be added to the list of "\
680 "event instants.", event_name);
685 prof->infos = g_list_prepend(prof->infos,
686 (gpointer) ccl_prof_info_new(event_name, command_type, cq_name,
687 instant_queued, instant_submit, instant_start, instant_end));
690 g_assert(err == NULL || *err == NULL);
695 g_assert(err == NULL || *err != NULL);
714 static void ccl_prof_process_queues(
CCLProf* prof,
CCLErr** err) {
717 g_return_if_fail(err == NULL || *err == NULL);
719 g_return_if_fail(prof != NULL);
727 cl_command_queue_properties qprop;
729 CCLErr* err_internal = NULL;
732 g_hash_table_iter_init(&iter, prof->queues);
733 while (g_hash_table_iter_next(&iter, &cq_name, &cq)) {
737 cl_command_queue_properties, &err_internal);
742 "%s: the '%s' queue does not have profiling enabled.",
743 G_STRLOC, (
char*) cq_name);
747 ccl_queue_iter_event_init((
CCLQueue*) cq);
748 while ((evt = ccl_queue_iter_event_next((
CCLQueue*) cq))) {
752 prof, (
const char*) cq_name, evt, &err_internal);
753 if ((err_internal != NULL) &&
755 (err_internal->code == CL_PROFILING_INFO_NOT_AVAILABLE))
764 g_info(
"The '%s' event does not have profiling info",
766 g_clear_error(&err_internal);
778 g_assert(err == NULL || *err == NULL);
783 g_assert(err == NULL || *err != NULL);
800 static void ccl_prof_calc_agg(
CCLProf* prof) {
803 g_return_if_fail(prof != NULL);
808 GHashTable* agg_table;
816 GList* curr_evinst_container = NULL;
823 agg_table = g_hash_table_new(g_str_hash, g_str_equal);
826 g_hash_table_iter_init(&iter, prof->event_names);
827 while (g_hash_table_iter_next(&iter, &event_name, NULL)) {
828 evagg = ccl_prof_agg_new(event_name);
831 agg_table, event_name, (gpointer) evagg);
836 prof->instants = g_list_sort_with_data(
837 prof->instants, ccl_prof_inst_comp,
838 (gpointer) &sort_type);
841 curr_evinst_container = prof->instants;
842 while (curr_evinst_container) {
846 cl_ulong start_inst, end_inst;
849 curr_evinst = (
CCLProfInst*) curr_evinst_container->data;
850 start_inst = curr_evinst->
instant;
853 curr_evinst_container = curr_evinst_container->next;
854 curr_evinst = (
CCLProfInst*) curr_evinst_container->data;
855 end_inst = curr_evinst->
instant;
861 prof->total_events_time += end_inst - start_inst;
864 curr_evinst_container = curr_evinst_container->next;
868 g_hash_table_iter_init(&iter, agg_table);
869 while (g_hash_table_iter_next(&iter, &event_name, &value_agg)) {
874 ((double) prof->total_events_time);
878 prof->aggs = g_hash_table_get_values(agg_table);
881 g_hash_table_destroy(agg_table);
893 static void ccl_prof_calc_overlaps(
CCLProf* prof) {
896 g_return_if_fail(prof != NULL);
899 cl_ulong total_overlap = 0;
901 cl_ulong* overlap_matrix = NULL;
903 cl_uint num_event_names;
905 GHashTable* overlaps = NULL;
907 GHashTable* occurring_events = NULL;
911 GList* curr_evinst_container;
914 num_event_names = g_hash_table_size(prof->event_names);
917 overlap_matrix = g_slice_alloc0(
918 sizeof(cl_ulong) * num_event_names * num_event_names);
921 overlaps = g_hash_table_new_full(g_direct_hash, g_direct_equal,
922 NULL, (GDestroyNotify) g_hash_table_destroy);
925 occurring_events = g_hash_table_new(g_int_hash, g_int_equal);
929 prof->instants = g_list_sort_with_data(prof->instants,
930 ccl_prof_inst_comp, (gpointer) &sort_type);
933 curr_evinst_container = prof->instants;
934 while (curr_evinst_container) {
941 GHashTable* inner_table = NULL;
946 gpointer key_eid, ueid_curr_ev, ueid_occu_ev;
948 cl_uint eid_key1, eid_key2;
950 cl_ulong eff_overlap;
953 curr_evinst = (
CCLProfInst*) curr_evinst_container->data;
961 g_hash_table_iter_init(&iter, occurring_events);
962 while (g_hash_table_iter_next (&iter, &key_eid, NULL)) {
965 eid_key1 = curr_evinst->
id <= *((cl_uint*) key_eid)
967 : *((cl_uint*) key_eid);
969 eid_key2 = curr_evinst->
id > *((cl_uint*) key_eid)
971 : *((cl_uint*) key_eid);
974 if (!g_hash_table_lookup_extended(overlaps,
975 GUINT_TO_POINTER(eid_key1), NULL,
976 (gpointer) &inner_table)) {
981 inner_table = g_hash_table_new(
982 g_direct_hash, g_direct_equal);
984 overlaps, GUINT_TO_POINTER(eid_key1), inner_table);
990 GUINT_TO_POINTER(eid_key2),
998 g_hash_table_lookup(prof->event_names,
1006 g_hash_table_remove(occurring_events, &(curr_evinst->
id));
1009 g_hash_table_iter_init(&iter, occurring_events);
1010 while (g_hash_table_iter_next(&iter, &key_eid, &ueid_occu_ev)) {
1012 eid_key1 = curr_evinst->
id <= *((cl_uint*) key_eid)
1014 : *((cl_uint*) key_eid);
1016 eid_key2 = curr_evinst->
id > *((cl_uint*) key_eid)
1018 : *((cl_uint*) key_eid);
1020 inner_table = g_hash_table_lookup(
1021 overlaps, GUINT_TO_POINTER(eid_key1));
1025 *((cl_ulong*) g_hash_table_lookup(
1026 inner_table, GUINT_TO_POINTER(eid_key2)));
1028 ueid_curr_ev = g_hash_table_lookup(
1031 GPOINTER_TO_UINT(ueid_curr_ev)
1032 <= GPOINTER_TO_UINT(ueid_occu_ev)
1033 ? GPOINTER_TO_UINT(ueid_curr_ev)
1034 : GPOINTER_TO_UINT(ueid_occu_ev);
1036 GPOINTER_TO_UINT(ueid_curr_ev)
1037 > GPOINTER_TO_UINT(ueid_occu_ev)
1038 ? GPOINTER_TO_UINT(ueid_curr_ev)
1039 : GPOINTER_TO_UINT(ueid_occu_ev);
1040 overlap_matrix[ueid_min * num_event_names + ueid_max] +=
1042 total_overlap += eff_overlap;
1047 curr_evinst_container = curr_evinst_container->next;
1051 for (cl_uint i = 0; i < num_event_names; i++) {
1052 for (cl_uint j = 0; j < num_event_names; j++) {
1053 if (overlap_matrix[i * num_event_names + j] > 0) {
1056 (
const char*) g_hash_table_lookup(
1057 prof->event_name_ids, GUINT_TO_POINTER(i)),
1058 (
const char*) g_hash_table_lookup(
1059 prof->event_name_ids, GUINT_TO_POINTER(j)),
1060 overlap_matrix[i * num_event_names + j]);
1062 prof->overlaps = g_list_prepend(
1063 prof->overlaps, (gpointer) ovlp);
1069 prof->total_events_eff_time = prof->total_events_time - total_overlap;
1072 g_slice_free1(
sizeof(cl_ulong) * num_event_names * num_event_names,
1076 g_hash_table_destroy(overlaps);
1079 g_hash_table_destroy(occurring_events);
1102 prof->t_start = CL_ULONG_MAX;
1120 g_return_if_fail(prof != NULL);
1123 if (prof->event_names != NULL)
1124 g_hash_table_destroy(prof->event_names);
1127 if (prof->event_name_ids != NULL)
1128 g_hash_table_destroy(prof->event_name_ids);
1131 if (prof->queues != NULL)
1132 g_hash_table_destroy(prof->queues);
1135 if (prof->instants != NULL)
1137 prof->instants, (GDestroyNotify) ccl_prof_inst_destroy);
1140 if (prof->infos != NULL)
1142 prof->infos, (GDestroyNotify) ccl_prof_info_destroy);
1145 if (prof->aggs != NULL)
1147 prof->aggs, (GDestroyNotify) ccl_prof_agg_destroy);
1150 if (prof->overlaps != NULL)
1152 prof->overlaps, (GDestroyNotify) ccl_prof_overlap_destroy);
1155 if (prof->summary != NULL)
1156 g_free(prof->summary);
1159 if (prof->timer != NULL)
1160 g_timer_destroy(prof->timer);
1180 g_return_if_fail(prof != NULL);
1183 prof->timer = g_timer_new();
1198 g_return_if_fail(prof != NULL);
1201 g_timer_stop(prof->timer);
1218 g_return_val_if_fail(prof != NULL, 0.0);
1221 return g_timer_elapsed(prof->timer, NULL);
1238 g_return_if_fail(prof != NULL);
1240 g_return_if_fail(cq != NULL);
1242 g_return_if_fail(prof->calc == FALSE);
1245 if (prof->queues == NULL) {
1246 prof->queues = g_hash_table_new_full(
1247 g_str_hash, g_direct_equal, NULL,
1252 if (g_hash_table_contains(prof->queues, cq_name))
1253 g_warning(
"Profile object already contains a queue named '%s'." \
1254 "The existing queue will be replaced.", cq_name);
1257 g_hash_table_replace(prof->queues, (gpointer) cq_name, cq);
1283 g_return_val_if_fail(prof != NULL, CL_FALSE);
1285 g_return_val_if_fail(err == NULL || *err == NULL, CL_FALSE);
1287 g_return_val_if_fail(prof->calc == FALSE, CL_FALSE);
1289 g_return_val_if_fail(prof->queues != NULL, CL_FALSE);
1292 CCLErr* err_internal = NULL;
1298 GHashTableIter iter;
1301 gpointer p_evt_name, p_id;
1304 prof->event_names = g_hash_table_new(g_str_hash, g_str_equal);
1307 ccl_prof_process_queues(prof, &err_internal);
1311 prof->event_name_ids = g_hash_table_new(g_direct_hash, g_direct_equal);
1313 g_hash_table_iter_init(&iter, prof->event_names);
1314 while (g_hash_table_iter_next(&iter, &p_evt_name, &p_id)) {
1315 g_hash_table_insert(prof->event_name_ids, p_id, p_evt_name);
1319 ccl_prof_calc_agg(prof);
1322 ccl_prof_calc_overlaps(prof);
1325 g_assert(err == NULL || *err == NULL);
1332 g_assert(err == NULL || *err != NULL);
1353 CCLProf* prof,
const char* event_name) {
1356 g_return_val_if_fail(prof != NULL, NULL);
1358 g_return_val_if_fail(event_name != NULL, NULL);
1360 g_return_val_if_fail(prof->calc == TRUE, NULL);
1364 GList* agg_container = prof->aggs;
1365 while (agg_container != NULL) {
1366 const char* curr_event_name =
1367 ((
CCLProfAgg*) agg_container->data)->event_name;
1368 if (g_strcmp0(event_name, curr_event_name) == 0) {
1372 agg_container = agg_container->next;
1393 g_return_if_fail(prof != NULL);
1395 g_return_if_fail(prof->calc == TRUE);
1398 prof->aggs = g_list_sort_with_data(
1399 prof->aggs, ccl_prof_agg_comp, &sort);
1402 prof->agg_iter = prof->aggs;
1418 g_return_val_if_fail(prof != NULL, NULL);
1420 g_return_val_if_fail(prof->calc == TRUE, NULL);
1426 if (prof->agg_iter != NULL) {
1429 prof->agg_iter = prof->agg_iter->next;
1452 g_return_if_fail(prof != NULL);
1454 g_return_if_fail(prof->calc == TRUE);
1457 prof->infos = g_list_sort_with_data(
1458 prof->infos, ccl_prof_info_comp, &sort);
1461 prof->info_iter = prof->infos;
1476 g_return_val_if_fail(prof != NULL, NULL);
1478 g_return_val_if_fail(prof->calc == TRUE, NULL);
1484 if (prof->info_iter != NULL) {
1487 prof->info_iter = prof->info_iter->next;
1510 g_return_if_fail(prof != NULL);
1512 g_return_if_fail(prof->calc == TRUE);
1515 prof->instants = g_list_sort_with_data(
1516 prof->instants, ccl_prof_inst_comp, &sort);
1519 prof->inst_iter = prof->instants;
1535 g_return_val_if_fail(prof != NULL, NULL);
1537 g_return_val_if_fail(prof->calc == TRUE, NULL);
1543 if (prof->inst_iter != NULL) {
1546 prof->inst_iter = prof->inst_iter->next;
1570 g_return_if_fail(prof != NULL);
1572 g_return_if_fail(prof->calc == TRUE);
1575 prof->overlaps = g_list_sort_with_data(
1576 prof->overlaps, ccl_prof_overlap_comp, &sort);
1579 prof->overlap_iter = prof->overlaps;
1594 g_return_val_if_fail(prof != NULL, NULL);
1596 g_return_val_if_fail(prof->calc == TRUE, NULL);
1602 if (prof->overlap_iter != NULL) {
1605 prof->overlap_iter = prof->overlap_iter->next;
1627 g_return_val_if_fail(prof != NULL, 0);
1629 g_return_val_if_fail(prof->calc == TRUE, 0);
1632 return prof->total_events_time;
1653 g_return_val_if_fail(prof != NULL, 0);
1655 g_return_val_if_fail(prof->calc == TRUE, 0);
1658 return prof->total_events_eff_time;
1678 g_return_if_fail(prof != NULL);
1680 g_return_if_fail(prof->calc == TRUE);
1683 const char* summary;
1691 g_printf(
"%s", summary);
1712 CCLProf* prof,
int agg_sort,
int ovlp_sort) {
1715 g_return_val_if_fail(prof != NULL, NULL);
1717 g_return_val_if_fail(prof->calc == TRUE, NULL);
1724 GString* str_obj = g_string_new(
"\n");
1727 g_string_append_printf(str_obj,
1728 " Aggregate times by event :\n");
1729 g_string_append_printf(str_obj,
1730 " ------------------------------------------------------------------\n");
1731 g_string_append_printf(str_obj,
1732 " | Event name | Rel. time (%%) | Abs. time (s) |\n");
1733 g_string_append_printf(str_obj,
1734 " ------------------------------------------------------------------\n");
1737 g_string_append_printf(str_obj,
1738 " | %-30.30s | %13.4f | %13.4e |\n",
1743 g_string_append_printf(str_obj,
1744 " ------------------------------------------------------------------\n");
1747 if (prof->total_events_time > 0) {
1748 g_string_append_printf(str_obj,
1749 " | Total | %13.4e |\n",
1750 prof->total_events_time * 1e-9);
1751 g_string_append_printf(str_obj,
1752 " ---------------------------------\n");
1757 if (g_list_length(prof->overlaps) > 0) {
1759 g_string_append_printf(str_obj,
1760 " Event overlaps :\n");
1761 g_string_append_printf(str_obj,
1762 " ------------------------------------------------------------------\n");
1763 g_string_append_printf(str_obj,
1764 " | Event 1 | Event2 | Overlap (s) |\n");
1765 g_string_append_printf(str_obj,
1766 " ------------------------------------------------------------------\n");
1770 g_string_append_printf(str_obj,
" | %-22.22s | %-22.22s | %12.4e |\n",
1773 g_string_append_printf(str_obj,
1774 " ------------------------------------------------------------------\n");
1776 g_string_append_printf(str_obj,
1777 " | Total | %12.4e |\n",
1778 (prof->total_events_time - prof->total_events_eff_time) * 1e-9);
1779 g_string_append_printf(str_obj,
1780 " -----------------------------------------\n");
1781 g_string_append_printf(str_obj,
1782 " Tot. of all events (eff.) : %es\n",
1783 prof->total_events_eff_time * 1e-9);
1785 g_string_append_printf(str_obj,
1786 " Event overlaps : None\n");
1791 double t_ellapsed = g_timer_elapsed(prof->timer, NULL);
1792 g_string_append_printf(str_obj,
1793 " Total ellapsed time : %es\n", t_ellapsed);
1794 g_string_append_printf(str_obj,
1795 " Time spent in device : %.2f%%\n",
1796 prof->total_events_eff_time * 1e-9 * 100 / t_ellapsed);
1797 g_string_append_printf(str_obj,
1798 " Time spent in host : %.2f%%\n",
1799 100 - prof->total_events_eff_time * 1e-9 * 100 / t_ellapsed);
1801 g_string_append_printf(str_obj,
"\n");
1804 if (prof->summary != NULL)
1805 g_free(prof->summary);
1808 prof->summary = g_string_free(str_obj, FALSE);
1811 return (
const char*) prof->summary;
1848 g_return_val_if_fail(prof != NULL, CL_FALSE);
1850 g_return_val_if_fail(stream != NULL, CL_FALSE);
1852 g_return_val_if_fail(err == NULL || *err == NULL, CL_FALSE);
1854 g_return_val_if_fail(prof->calc == TRUE, CL_FALSE);
1863 cl_ulong t_start = 0;
1872 t_start = prof->t_start;
1878 write_status = fprintf(stream,
"%s%s%s%s%lu%s%lu%s%s%s%s%s",
1883 (
unsigned long) (curr_ev->
t_start - t_start),
1885 (
unsigned long) (curr_ev->
t_end - t_start),
1894 "Error while exporting profiling information" \
1895 "(writing to stream).");
1900 g_assert(err == NULL || *err == NULL);
1901 ret_status = CL_TRUE;
1906 g_assert(err == NULL || *err != NULL);
1907 ret_status = CL_FALSE;
1935 g_return_val_if_fail(prof != NULL, CL_FALSE);
1937 g_return_val_if_fail(filename != NULL, CL_FALSE);
1939 g_return_val_if_fail(err == NULL || *err == NULL, CL_FALSE);
1941 g_return_val_if_fail(prof->calc == TRUE, CL_FALSE);
1947 CCLErr* err_internal = NULL;
1950 FILE* fp = fopen(filename,
"w");
1953 "Unable to open file '%s' for exporting.", filename);
1960 g_assert(err == NULL || *err == NULL);
1966 g_assert(err == NULL || *err != NULL);
1988 export_options = export_opts;
2000 return export_options;
const char * ccl_event_get_final_name(CCLEvent *evt)
Get the final event name for profiling purposes.
Sort event profiling info instances by end time.
cl_bool ccl_prof_export_info_file(CCLProf *prof, const char *filename, CCLErr **err)
Helper function which exports profiling info to a given file, automatically opening and closing the f...
const char * queue_name
Name of command queue associated with event.
cl_ulong t_submit
Device time counter in nanoseconds when the command identified by event that has been enqueued is sub...
Sort event profiling info instances by queued time.
void ccl_prof_iter_overlap_init(CCLProf *prof, int sort)
Initialize an iterator for overlap instances.
double ccl_prof_time_elapsed(CCLProf *prof)
If profiling has started but not stopped, returns the time since the profiling started.
#define CCL_OCL_ERROR
Resolves to error category identifying string, in this case an error in the OpenCL library...
cl_ulong duration
Overlap duration in nanoseconds.
CCLProfAggSort
Sort criteria for aggregate event info instances.
cl_ulong ccl_prof_get_duration(CCLProf *prof)
Get duration of all events in nanoseconds.
Representation of an overlap of events.
Sort overlaps by event name.
#define ccl_queue_get_info_scalar(cq, param_name, param_type, err)
Macro which returns a scalar command queue information value.
#define ccl_if_err_create_goto(err, quark, error_condition, error_code, label, msg,...)
If error is detected (error_code != no_error_code), create an error object (CCLErr) and go to the spe...
const char * event_name
Name of event which the instant refers to.
cl_bool ccl_prof_calc(CCLProf *prof, CCLErr **err)
Determine aggregate statistics for the given profile object.
Useful definitions used internally by cf4ocl.
#define ccl_event_get_profiling_info_scalar(evt, param_name, param_type, err)
Macro which returns a scalar event profiling information value.
void ccl_prof_iter_agg_init(CCLProf *prof, int sort)
Initialize an iterator for profiled aggregate event instances.
cl_bool zero_start
Start at instant 0 (TRUE, default), or start at oldest instant returned by OpenCL (FALSE)...
Sort ascending (default).
#define ccl_if_err_propagate_goto(err_dest, err_src, label)
Same as ccl_if_err_goto(), but rethrows error in a source CCLErr object to a new destination CCLErr o...
Sort overlaps by overlap duration.
Command queue wrapper class.
CCLProf * ccl_prof_new()
Create a new profile object.
Sort event profiling info instances by event name.
#define ccl_queue_ref(cq)
Increase the reference count of the command queue object.
const char * evname_delim
Event name delimiter, defaults to empty string.
cl_ulong t_start
Device time in nanoseconds when the command identified by event starts execution on the device...
cl_ulong ccl_prof_get_eff_duration(CCLProf *prof)
Get effective duration of all events in nanoseconds, i.e.
Sort event profiling info instances by submit time.
void ccl_prof_stop(CCLProf *prof)
Stops the global profiler timer.
Definition of classes and methods for profiling OpenCL events.
Sort event instants by event id.
void ccl_queue_gc(CCLQueue *cq)
Release all events associated with the command queue.
const CCLProfInfo * ccl_prof_iter_info_next(CCLProf *prof)
Return the next event profiling info instance.
void ccl_prof_start(CCLProf *prof)
Starts the global profiler timer.
const char * ccl_prof_get_summary(CCLProf *prof, int agg_sort, int ovlp_sort)
Get a summary with the profiling info.
const CCLProfAgg * ccl_prof_get_agg(CCLProf *prof, const char *event_name)
Return aggregate statistics for events with the given name.
const CCLProfOverlap * ccl_prof_iter_overlap_next(CCLProf *prof)
Return the next overlap instance.
#define CCL_ERROR
Resolves to error category identifying string, in this case an error in cf4ocl.
Sort aggregate event data instances by name.
cl_command_type command_type
Type of command which produced the event.
cl_uint id
Event instant ID.
#define ccl_event_get_info_scalar(evt, param_name, param_type, err)
Macro which returns a scalar event information value.
Sort event profiling info instances by start time.
CCLProfOverlapSort
Sort criteria for overlaps (CCLProfOverlap).
Sort aggregate event data instances by time.
void ccl_prof_destroy(CCLProf *prof)
Destroy a profile object.
cl_bool ccl_prof_export_info(CCLProf *prof, FILE *stream, CCLErr **err)
Export event profiling information to a given stream.
void ccl_prof_iter_inst_init(CCLProf *prof, int sort)
Initialize an iterator for event instant instances.
const char * queue_delim
Queue name delimiter, defaults to empty string.
CCLProfExportOptions ccl_prof_get_export_opts()
Get current export options.
void ccl_prof_set_export_opts(CCLProfExportOptions export_opts)
Set export options using a CCLProfExportOptions struct.
cl_ulong t_queued
Device time in nanoseconds when the command identified by event is enqueued in a command-queue by the...
Object information is unavailable.
const CCLProfInst * ccl_prof_iter_inst_next(CCLProf *prof)
Return the next event instant instance.
void ccl_prof_add_queue(CCLProf *prof, const char *cq_name, CCLQueue *cq)
Add a command queue wrapper for profiling.
const char * event2_name
Name of second overlapping event.
cl_ulong instant
Event instant in nanoseconds from current device time counter.
Sort event profiling info instances by queue name.
void ccl_prof_iter_info_init(CCLProf *prof, int sort)
Initialize an iterator for event profiling info instances.
GError CCLErr
Error handling class.
Error writing to a stream.
const char * event1_name
Name of first overlapping event.
const char * separator
Field separator, defaults to tab (\t).
CCLProfInfoSort
Sort criteria for event profiling info instances.
void ccl_prof_print_summary(CCLProf *prof)
Print a summary of the profiling info.
void ccl_queue_destroy(CCLQueue *cq)
Decrements the reference count of the command queue wrapper object.
CCLProfSortOrder
Sort order for the profile module iterators.
double relative_time
Relative time of events with name equal to CCLProfAgg::event_name.
Profile class, contains profiling information of OpenCL queues and events.
CCLProfInstSort
Sort criteria for event instants (CCLProfInst).
cl_ulong t_end
Device time in nanoseconds when the command identified by event has finished execution on the device...
CCLProfInstType type
Type of event instant (CCL_PROF_INST_TYPE_START or CCL_PROF_INST_TYPE_END).
const char * newline
Newline character, Defaults to Unix newline (\n).
cl_ulong absolute_time
Total (absolute) time of events with name equal to CCLProfAgg::event_name.
CCLProfInstType
Type of event instant (CCLProfInst).
const CCLProfAgg * ccl_prof_iter_agg_next(CCLProf *prof)
Return the next aggregate statistic instance.
Sort event instants by instant.