Loading... ## Android AARCH64 平台的 OpenCL 配置 ### 开发环境 IDE: Android Studio 3.4.1 Android: 7.1 minSdkVersion: 25 targetSdkVersion: 26 JNI CMake: 3.4.1 ABI: arm64-v8a OpenCL: 1.2 ### 配置 OpenCL 使用项目中的 so 库 这里以编译 `openclTest.cpp` 为 `libopenclTest.so` 并导入 OpenCL 的动态库为例 > 注1: 下列 so 库需要从开发板上 pull 到项目中,其中 `libGLES_mali.so` 用于驱动 OpenCL,其他库为依赖库 > 注2: 不同平台的驱动库位于不同位置,可以下载 OpenCL-Z 查看 > 注3: `set_target_properties` 中第一项参数设置了生成库的名称,例如设置为 `openclTest`,则生成 `libopenclTest.so` 库,这里使用了 `lib_*` 作为前缀,则生成 `liblib_*.so` 库,实际开发中要避免这种情况 查看依赖库 ```bash objdump -x libGLES_mali.so | grep NEEDED ``` 目录结构 ```bash opencltest ├─ app │ ├─ build │ ├─ libs │ └─ src │ ├─ androidTest │ ├─ main │ │ ├─ java │ │ │ └─ com │ │ │ └─ example │ │ │ └─ opencltest │ │ │ MainActivity.java │ │ ├─ jni # C/C++ 源码目录 │ │ │ └─ openclTest.cpp │ │ ├─ jniLibs # JNI 需要调用的运行库 │ │ │ └─ arm64-v8a # 对应 ABI 版本建立文件夹 │ │ │ ├─ libbinder.so │ │ │ ├─ libc++.so │ │ │ ├─ libc.so │ │ │ ├─ libcrypto.so │ │ │ ├─ libcutils.so │ │ │ ├─ libdl.so │ │ │ ├─ libGLES_mali.so │ │ │ ├─ libhardware.so │ │ │ ├─ liblog.so │ │ │ ├─ libm.so │ │ │ ├─ libui.so │ │ │ ├─ libutils.so │ │ │ └─ libz.so │ │ └─ res │ └─ test └─ gradle ``` `CMakeLists.txt` 增加配置 ```cmake add_library(openclTest SHARED src/main/jni/openclTest.cpp ) add_library(lib_opencl SHARED IMPORTED) set_target_properties(lib_opencl PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libGLES_mali.so ) add_library(lib_z SHARED IMPORTED) set_target_properties(lib_z PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libz.so ) add_library(lib_log SHARED IMPORTED) set_target_properties(lib_log PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/liblog.so ) add_library(lib_utils SHARED IMPORTED) set_target_properties(lib_utils PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libutils.so ) add_library(lib_ui SHARED IMPORTED) set_target_properties(lib_ui PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libui.so ) add_library(lib_cutils SHARED IMPORTED) set_target_properties(lib_cutils PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libcutils.so ) add_library(lib_binder SHARED IMPORTED) set_target_properties(lib_binder PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libbinder.so ) add_library(lib_crypto SHARED IMPORTED) set_target_properties(lib_crypto PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libcrypto.so ) add_library(lib_dl SHARED IMPORTED) set_target_properties(lib_dl PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libdl.so ) add_library(lib_hardware SHARED IMPORTED) set_target_properties(lib_hardware PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libhardware.so ) add_library(lib_c++ SHARED IMPORTED) set_target_properties(lib_c++ PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libc++.so ) add_library(lib_c SHARED IMPORTED) set_target_properties(lib_c PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libc.so ) add_library(lib_m SHARED IMPORTED) set_target_properties(lib_m PROPERTIES IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libm.so ) target_link_libraries(openclTest ${log-lib} lib_opencl ) ``` ### 配置 OpenCL 使用 dlopen 打开开发板上的运行库 这种配置方法需要从 [Github - KhronosGroup/OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers) 仓库下载 OpenCL 的头文件,并且需要自行实现 OpenCL 的函数 > 注: 需要在源码中定义 `CL_TARGET_OPENCL_VERSION 120`,否则会报找不到版本号的错误 目录结构 ```bash opencltest ├─ app │ ├─ build │ ├─ libs │ └─ src │ ├─ androidTest │ ├─ main │ │ ├─ java │ │ │ └─ com │ │ │ └─ example │ │ │ └─ opencltest │ │ │ MainActivity.java │ │ ├─ jni # C/C++ 源码目录 │ │ │ ├─ dlopencl.cpp # dlopen 打开运行库,实现 OpenCL 中的函数 │ │ │ ├─ openclTest.cpp │ │ │ └─ include # JNI 调用的头文件 │ │ │ ├─ dlopencl.h # 定义 OpenCL 中的函数 │ │ │ └─ CL # OpenCL 需要使用的头文件 │ │ │ ├─ cl.h │ │ │ ├─ cl_ext.h │ │ │ ├─ cl_gl.h │ │ │ ├─ cl_gl_ext.h │ │ │ ├─ cl_platform.h │ │ │ ├─ cl_version.h │ │ │ └─ opencl.h │ │ └─ res │ └─ test └─ gradle ``` `CMakeLists.txt` 增加配置 ```cmake include_directories(${PROJECT_SOURCE_DIR}/src/main/jni/include) add_library(openclTest SHARED src/main/jni/openclTest.cpp ) add_library(lib_dlopencl SHARED src/main/jni/dlopencl.cpp ) target_link_libraries(openclTest ${log-lib} lib_dlopencl ) ``` **需要导入的 `dlopencl.h` 和 `dlopencl.cpp` 写在文末** ## OpenCL 使用 以下均以使用 dlopen 导入运行库的方式为例 1. 定义 OpenCL 版本 ```cpp #define CL_TARGET_OPENCL_VERSION 120 ``` 2. 包含头文件 ```cpp #include <CL/cl.h> #include "dlopencl.h" ``` 3. 使用 Logcat 打印日志 由于 C 语言工作在 JNI 层,无法获取控制台,导致了 `printf()` 函数失效,这里使用 `__android_log_print` 方法打印日志到 Logcat ```cpp #include <android/log.h> #define DEBUG #ifdef DEBUG #define LOG "LOG-TAG" #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG, __VA_ARGS__) #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG, __VA_ARGS__) #define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG, __VA_ARGS__) #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG, __VA_ARGS__) #define LOGF(...) __android_log_print(ANDROID_LOG_FATAL, LOG, __VA_ARGS__) #else #define LOG #define LOGD(...) #define LOGI(...) #define LOGW(...) #define LOGE(...) #define LOGF(...) #endif ``` 使用方法同 `printf()`,例: ```cpp LOGI("Device ID: %d", device_id); ``` 4. 创建用于初始化 OpenCL 的 JNI 接口 建议在同一个文件中编写多个操作 OpenCL 的函数,这里的初始化实际上是将操作 OpenCL 的变量建立为全局变量,通过 JNI 一次调用后其他函数再使用这些被初始化过的变量 定义变量 ```cpp cl_uint num_device; cl_uint num_platform; cl_platform_id *platform; cl_device_id *devices; cl_int err; cl_context context; ``` ```cpp extern "C" JNIEXPORT jint JNICALL Java_com_example_opencltest_MainActivity_initOpencl(JNIEnv *env, jobject instance) { initFns(); LOGI("getPlatformNum"); // 获取可用平台数量 err = clGetPlatformIDs(0, 0, &num_platform); platform = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num_platform); LOGI("getPlatformIDs"); // 获取平台 ID err = clGetPlatformIDs(num_platform, platform, nullptr); if(err < 0) { LOGE("clGetPlatformIDs failed"); return -1; } LOGI("getDeviceNum"); // 获取可用设备数量 err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, 0, nullptr, &num_device); devices = (cl_device_id*)malloc(sizeof(cl_device_id)*num_device); LOGI("getDeviceIDs"); // 获取设备 ID err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, num_device, devices, nullptr); if (err < 0) { LOGE("clGetDeviceIDs failed"); return -1; } return 0; } ``` ## 使用 OpenCL 执行 Kernel 函数 通常编写 OpenCL 中的 Kernel 函数要求单独写在 `*.cl` 文件中,调用时使用 `fopen()` 打开,将里面的代码作为字符串读入之后再进行 runtime 编译,比较麻烦,这里直接将整个 Kernel 函数写成字符串 本例中因为测试原因没有释放掉创建的资源 > 可以使用允许正则替换的编辑器例如 VSCode,先写好 Kernel 函数,然后使用正则表达式 `(.*)` 选择所有行并将每一行作为一个参数(`$1`),替换为 `"$1\\n"`,例如替换前: `int a = 0;`,替换后: `"int a = 0;\n"` > 注1: 经测试发现 OpenCL 在 PC 端允许每次调用任意数量的矢量数据,而在开发板上只允许一次调用 1.2.3.4.8.16 个 PC 端允许的矢量调用: ```cpp Integralgraph.s0 Integralgraph.s01 Integralgraph.s012 Integralgraph.s0123 Integralgraph.s01234 Integralgraph.s0123456 Integralgraph.s01234567 Integralgraph.s012345678 Integralgraph.s0123456789 Integralgraph.s0123456789a Integralgraph.s0123456789ab Integralgraph.s0123456789abc Integralgraph.s0123456789abcd Integralgraph.s0123456789abcde Integralgraph.s0123456789abcdef ``` 开发板允许的矢量调用: ```cpp Integralgraph.s0 Integralgraph.s01 Integralgraph.s012 Integralgraph.s0123 Integralgraph.s01234567 Integralgraph.s0123456789abcdef ``` 所以在编写运行于 AARCH64 架构的 OpenCL Kernel 函数时需要将被调用的矢量数组分组相加 例如在 PC 端为 ```cpp TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s0123456789abcde); ``` 在开发板中需要改为 ```cpp TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde); ``` > 注2: 测试中发现 PC 端的 OpenCL 遇到数组越界问题会直接跳过不予处理,但在 AARCH64 中会导致执行失败 > 注3: AARCH64 平台上 OpenCL 能申请到的可调用内存远小于 PC 端,如果出现输入或输出的数据超出 OpenCL 申请到的内存可能会导致数据输出不完整,读取数据时错误代码返回 -14 等问题 先在 Activity 中调用 initOpencl(),初始化 OpenCL 及其平台和设备 ```cpp // 定义积分图宽度 #define CLL_IMAGE_W (400) // 定义积分图高度 #define CLL_IMAGE_H (80) // 构建 Kernel 函数字符串 const char *clkernel[] = { "__kernel void kernel_Integralgraph_45int(__global int16 * grayImage,\n" " __global int16 * Integralgraph,\n" " __global unsigned * const p_height) {\n" " int x = get_global_id(0);\n" " int height = *p_height;\n" " int width = get_global_size(0);\n" " __local int index, index1, index2, index3, index4, index5;\n" " __local bool flagx0, flagxw, flagy1, flagy2;\n" " __local int16 TableInteg1, TableInteg2, TableInteg3;\n" " flagx0 = min(0, -x);\n" " flagxw = min(0, x - width + 1);\n" " for (int j = 0; j < height; j++) {\n" " flagy1 = min(0, -j);\n" " flagy2 = min(0, 1 - j);\n" " index = j * width + x;\n" " if(j==0) {\n" " Integralgraph[index] = (int16)grayImage[index];\n" " } else if(j==1) {\n" " index1 = (j - 1)*width + x - 1;\n" " index3 = (j - 1)*width + x + 1;\n" " index4 = (j - 1)*width + x;\n" " TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde);\n" " TableInteg3 = (int16)(Integralgraph[index4].s12345678, Integralgraph[index4].s9abc, Integralgraph[index4].sdef, Integralgraph[index3].s0*flagxw);\n" " Integralgraph[index] = (int16)((int16)TableInteg1*flagy1 + (int16)TableInteg3*flagy1 + (int16)grayImage[index4] * flagy1 + (int16)grayImage[index]);\n" " } else {\n" " index1 = (j - 1)*width + x - 1;\n" " index2 = (j - 2)*width + x;\n" " index3 = (j - 1)*width + x + 1;\n" " index4 = (j - 1)*width + x;\n" " TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde);\n" " TableInteg2 = (int16)(Integralgraph[index2].s0*flagx0, Integralgraph[index2].s12345678, Integralgraph[index2].s9ab, Integralgraph[index2].scde, Integralgraph[index2].sf*flagxw);\n" " TableInteg3 = (int16)(Integralgraph[index4].s12345678, Integralgraph[index4].s9abc, Integralgraph[index4].sdef, Integralgraph[index3].s0*flagxw);\n" " Integralgraph[index] = (int16)((int16)TableInteg1*flagy1 + (int16)TableInteg3*flagy1 - (int16)TableInteg2 * flagy2 + (int16)grayImage[index4] * flagy1 + (int16)grayImage[index]);\n" " }\n" " }\n" "}\n" }; // 创建 Context context = clCreateContext(nullptr, 1, devices, nullptr, nullptr, &err); if(err < 0) { LOGE("Create context failed, error code: [%d]", err); return -1; } int inputData[CLL_IMAGE_W * CLL_IMAGE_H]; int outputData[CLL_IMAGE_W * CLL_IMAGE_H]; int width = CLL_IMAGE_W; int height = CLL_IMAGE_H; // 初始化输入输出数据 for (int i = 0; i < height; i++) { for (int j = 0; j < width; j++) { inputData[i * width + j] = 1; outputData[i * width + j] = 0; } } clock_t time_start; clock_t time_finish; double total_time; char *program_log; cl_command_queue queue; cl_kernel kernel; size_t log_size; cl_program program; // 创建命令队列 queue = clCreateCommandQueue(context, devices[0], 0, &err); if (err < 0) { LOGE("Create command queue failed, error code: [%d]", err); } // 创建程序 program = clCreateProgramWithSource(context, sizeof(clkernel) / sizeof(clkernel[0]), clkernel, nullptr, nullptr); // 构建/编译程序 err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr); if (err < 0) { clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size); program_log = (char*)malloc(sizeof(log_size)); // 查询构建/编译过程中的 log clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, program_log, nullptr); LOGE("program_build_info: \n[%s]\n", program_log); free(program_log); } // 创建 Kernel kernel = clCreateKernel(program, "kernel_Integralgraph_45int", &err); if (err < 0) { LOGE("Create kernel failed, error code: [%d]", err); } // 创建用于输入参数、输出数据的内存空间 cl_mem meminput_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * height * width, inputData, &err); cl_mem memoutput_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * height * width, nullptr, &err); cl_mem memHeight_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &height, &err); // 向 Kernel 传递参数 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &meminput_buffer); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &memoutput_buffer); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &memHeight_buffer); size_t global_work_offset[2] = {0, 0}; size_t localThreads[2] = {1, 1}; size_t globalThreads[2] = {(size_t)(width / 16), 1}; // 获得程序开始执行的时间戳 time_start = clock(); // 排布工作组与工作项并执行 err = clEnqueueNDRangeKernel(queue, kernel, 2, global_work_offset, globalThreads, localThreads, 0, nullptr, nullptr); if (err < 0) { LOGE("Run Kernel failed, error code: [%d]", err); } // 获得程序执行完成的时间戳 time_finish = clock(); // 计算运行时间 total_time = (double)(time_finish - time_start) / CLOCKS_PER_SEC; LOGI("Total time: [%f]s", total_time); // 从输出的内存空间中读取数据 err = clEnqueueReadBuffer(queue, memoutput_buffer, CL_TRUE, 0, sizeof(int) * height * width, outputData, 0, nullptr, nullptr); if (err < 0) { LOGE("Read buffer failed, error code: [%d]", err); } // 输出数据,积分图中的每一行拼接为一条 LOG char outputDataTemp[2048]; for (int i = 0; i < height ; i++) { // 清空字符串,拼接下一行 memset(outputDataTemp, 0x00, 2048); for (int j = 0; j < width ; j++) { // 将一行数据拼接在一个字符串中 sprintf(outputDataTemp, "%s %d", outputDataTemp, outputData[i * width + j]); } // LOG打印 LOGI("line [%d]\n%s", i, outputDataTemp); } ``` ## 附加 ### dlopencl.h ```cpp #ifndef __AOPENCL_CL_H #define __AOPENCL_CL_H #ifdef __APPLE__ #include <OpenCL/cl_platform.h> #else #include <CL/cl_platform.h> #endif #ifdef __cplusplus extern "C" { #endif #define IAH() //#define IAH() printf("File:%s, Line:%d\n",__FILE__, __LINE__); void initFns(); /* Platform API */ #define clGetPlatformIDs aclGetPlatformIDs cl_int (*aclGetPlatformIDs)(cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */); #define clGetPlatformInfo aclGetPlatformInfo cl_int (*aclGetPlatformInfo)(cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); /* Device APIs */ #define clGetDeviceIDs aclGetDeviceIDs cl_int (*aclGetDeviceIDs)(cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, cl_device_id * /* devices */, cl_uint * /* num_devices */); #define clGetDeviceInfo aclGetDeviceInfo cl_int (*aclGetDeviceInfo)(cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); /* Context APIs */ #define clCreateContext aclCreateContext cl_context (*aclCreateContext)(const cl_context_properties * /* properties */, cl_uint /* num_devices */, const cl_device_id * /* devices */, void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */); #define clCreateContextFromType aclCreateContextFromType cl_context (*aclCreateContextFromType)(const cl_context_properties * /* properties */, cl_device_type /* device_type */, void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */); #define clRetainContext aclRetainContext cl_int (*aclRetainContext)(cl_context /* context */); #define clReleaseContext aclReleaseContext cl_int (*aclReleaseContext)(cl_context /* context */); #define clGetContextInfo aclGetContextInfo cl_int (*aclGetContextInfo)(cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); /* Command Queue APIs */ #define clCreateCommandQueue aclCreateCommandQueue cl_command_queue (*aclCreateCommandQueue)(cl_context /* context */, cl_device_id /* device */, cl_command_queue_properties /* properties */, cl_int * /* errcode_ret */); #define clRetainCommandQueue aclRetainCommandQueue cl_int (*aclRetainCommandQueue)(cl_command_queue /* command_queue */); #define clReleaseCommandQueue aclReleaseCommandQueue cl_int (*aclReleaseCommandQueue)(cl_command_queue /* command_queue */); #define clGetCommandQueueInfo aclGetCommandQueueInfo cl_int (*aclGetCommandQueueInfo)(cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); /* Memory Object APIs */ #define clCreateBuffer aclCreateBuffer cl_mem (*aclCreateBuffer)(cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void * /* host_ptr */, cl_int * /* errcode_ret */); #define clCreateSubBuffer aclCreateSubBuffer cl_mem (*aclCreateSubBuffer)(cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, const void * /* buffer_create_info */, cl_int * /* errcode_ret */); #define clRetainMemObject aclRetainMemObject cl_int (*aclRetainMemObject)(cl_mem /* memobj */); #define clReleaseMemObject aclReleaseMemObject cl_int (*aclReleaseMemObject)(cl_mem /* memobj */); #define clGetSupportedImageFormats aclGetSupportedImageFormats cl_int (*aclGetSupportedImageFormats)(cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */, cl_uint /* num_entries */, cl_image_format * /* image_formats */, cl_uint * /* num_image_formats */); #define clGetMemObjectInfo aclGetMemObjectInfo cl_int (*aclGetMemObjectInfo)(cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); #define clGetImageInfo aclGetImageInfo cl_int (*aclGetImageInfo)(cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); #define clSetMemObjectDestructorCallback aclSetMemObjectDestructorCallback cl_int (*aclSetMemObjectDestructorCallback)( cl_mem /* memobj */, void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), void * /*user_data */ ); /* Sampler APIs */ #define clCreateSampler aclCreateSampler cl_sampler (*aclCreateSampler)(cl_context /* context */, cl_bool /* normalized_coords */, cl_addressing_mode /* addressing_mode */, cl_filter_mode /* filter_mode */, cl_int * /* errcode_ret */); #define clRetainSampler aclRetainSampler cl_int (*aclRetainSampler)(cl_sampler /* sampler */); #define clReleaseSampler aclReleaseSampler cl_int (*aclReleaseSampler)(cl_sampler /* sampler */); #define clGetSamplerInfo aclGetSamplerInfo cl_int (*aclGetSamplerInfo)(cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); /* Program Object APIs */ #define clCreateProgramWithSource aclCreateProgramWithSource cl_program (*aclCreateProgramWithSource)(cl_context /* context */, cl_uint /* count */, const char ** /* strings */, const size_t * /* lengths */, cl_int * /* errcode_ret */); #define clCreateProgramWithBinary aclCreateProgramWithBinary cl_program (*aclCreateProgramWithBinary)(cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const size_t * /* lengths */, const unsigned char ** /* binaries */, cl_int * /* binary_status */, cl_int * /* errcode_ret */); #define clRetainProgram aclRetainProgram cl_int (*aclRetainProgram)(cl_program /* program */); #define clReleaseProgram aclReleaseProgram cl_int (*aclReleaseProgram)(cl_program /* program */); #define clBuildProgram aclBuildProgram cl_int (*aclBuildProgram)(cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */); #define clGetProgramInfo aclGetProgramInfo cl_int (*aclGetProgramInfo)(cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); #define clGetProgramBuildInfo aclGetProgramBuildInfo cl_int (*aclGetProgramBuildInfo)(cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); /* Kernel Object APIs */ #define clCreateKernel aclCreateKernel cl_kernel (*aclCreateKernel)(cl_program /* program */, const char * /* kernel_name */, cl_int * /* errcode_ret */); #define clCreateKernelsInProgram aclCreateKernelsInProgram cl_int (*aclCreateKernelsInProgram)(cl_program /* program */, cl_uint /* num_kernels */, cl_kernel * /* kernels */, cl_uint * /* num_kernels_ret */); #define clRetainKernel aclRetainKernel cl_int (*aclRetainKernel)(cl_kernel /* kernel */); #define clReleaseKernel aclReleaseKernel cl_int (*aclReleaseKernel)(cl_kernel /* kernel */); #define clSetKernelArg aclSetKernelArg cl_int (*aclSetKernelArg)(cl_kernel /* kernel */, cl_uint /* arg_index */, size_t /* arg_size */, const void * /* arg_value */); #define clGetKernelInfo aclGetKernelInfo cl_int (*aclGetKernelInfo)(cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); #define clGetKernelWorkGroupInfo aclGetKernelWorkGroupInfo cl_int (*aclGetKernelWorkGroupInfo)(cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); /* Event Object APIs */ #define clWaitForEvents aclWaitForEvents cl_int (*aclWaitForEvents)(cl_uint /* num_events */, const cl_event * /* event_list */); #define clGetEventInfo aclGetEventInfo cl_int (*aclGetEventInfo)(cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); #define clCreateUserEvent aclCreateUserEvent cl_event (*aclCreateUserEvent)(cl_context /* context */, cl_int * /* errcode_ret */); #define clRetainEvent aclRetainEvent cl_int (*aclRetainEvent)(cl_event /* event */); #define clReleaseEvent aclReleaseEvent cl_int (*aclReleaseEvent)(cl_event /* event */); #define clSetUserEventStatus aclSetUserEventStatus cl_int (*aclSetUserEventStatus)(cl_event /* event */, cl_int /* execution_status */); #define clSetEventCallback aclSetEventCallback cl_int (*aclSetEventCallback)( cl_event /* event */, cl_int /* command_exec_callback_type */, void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void * /* user_data */); /* Profiling APIs */ #define clGetEventProfilingInfo aclGetEventProfilingInfo cl_int (*aclGetEventProfilingInfo)(cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */); /* Flush and Finish APIs */ #define clFlush aclFlush cl_int (*aclFlush)(cl_command_queue /* command_queue */); #define clFinish aclFinish cl_int (*aclFinish)(cl_command_queue /* command_queue */); /* Enqueued Commands APIs */ #define clEnqueueReadBuffer aclEnqueueReadBuffer cl_int (*aclEnqueueReadBuffer)(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, size_t /* offset */, size_t /* size */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueReadBufferRect aclEnqueueReadBufferRect cl_int (*aclEnqueueReadBufferRect)(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, size_t /* host_slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueWriteBuffer aclEnqueueWriteBuffer cl_int (*aclEnqueueWriteBuffer)(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, size_t /* offset */, size_t /* size */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueWriteBufferRect aclEnqueueWriteBufferRect cl_int (*aclEnqueueWriteBufferRect)(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, size_t /* host_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueCopyBuffer aclEnqueueCopyBuffer cl_int (*aclEnqueueCopyBuffer)(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, size_t /* src_offset */, size_t /* dst_offset */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueCopyBufferRect aclEnqueueCopyBufferRect cl_int (*aclEnqueueCopyBufferRect)(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, const size_t * /* src_origin */, const size_t * /* dst_origin */, const size_t * /* region */, size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */, size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueReadImage aclEnqueueReadImage cl_int (*aclEnqueueReadImage)(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* row_pitch */, size_t /* slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueWriteImage aclEnqueueWriteImage cl_int (*aclEnqueueWriteImage)(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* input_row_pitch */, size_t /* input_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueCopyImage aclEnqueueCopyImage cl_int (*aclEnqueueCopyImage)(cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */, const size_t * /* src_origin[3] */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueCopyImageToBuffer aclEnqueueCopyImageToBuffer cl_int (*aclEnqueueCopyImageToBuffer)(cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */, const size_t * /* src_origin[3] */, const size_t * /* region[3] */, size_t /* dst_offset */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueCopyBufferToImage aclEnqueueCopyBufferToImage cl_int (*aclEnqueueCopyBufferToImage)(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */, size_t /* src_offset */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); void * (*aclEnqueueMapBuffer)(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, size_t /* offset */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */); void * (*aclEnqueueMapImage)(cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t * /* image_row_pitch */, size_t * /* image_slice_pitch */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, cl_int * /* errcode_ret */); #define clEnqueueUnmapMemObject aclEnqueueUnmapMemObject cl_int (*aclEnqueueUnmapMemObject)(cl_command_queue /* command_queue */, cl_mem /* memobj */, void * /* mapped_ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueNDRangeKernel aclEnqueueNDRangeKernel cl_int (*aclEnqueueNDRangeKernel)(cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */, const size_t * /* global_work_offset */, const size_t * /* global_work_size */, const size_t * /* local_work_size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueTask aclEnqueueTask cl_int (*aclEnqueueTask)(cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #define clEnqueueNativeKernel aclEnqueueNativeKernel cl_int (*aclEnqueueNativeKernel)(cl_command_queue /* command_queue */, void (CL_CALLBACK * /*user_func*/)(void *), void * /* args */, size_t /* cb_args */, cl_uint /* num_mem_objects */, const cl_mem * /* mem_list */, const void ** /* args_mem_loc */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */); #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS //#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1! /* * WARNING: * This API introduces mutable state into the OpenCL implementation. It has been REMOVED * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. * * Software developers previously relying on this API are instructed to set the command queue * properties when creating the queue, instead. */ #define clSetCommandQueueProperty aclSetCommandQueueProperty cl_int (*aclSetCommandQueueProperty)(cl_command_queue /* command_queue */, cl_command_queue_properties /* properties */, cl_bool /* enable */, cl_command_queue_properties * /* old_properties */); #endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS #define clCreateImage2D aclCreateImage2D cl_mem (*aclCreateImage2D)(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_row_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */); #define clCreateImage3D aclCreateImage3D cl_mem (*aclCreateImage3D)(cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */, size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */, size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void * /* host_ptr */, cl_int * /* errcode_ret */); #define clEnqueueMarker aclEnqueueMarker cl_int (*aclEnqueueMarker)(cl_command_queue /* command_queue */, cl_event * /* event */); #define clEnqueueWaitForEvents aclEnqueueWaitForEvents cl_int (*aclEnqueueWaitForEvents)(cl_command_queue /* command_queue */, cl_uint /* num_events */, const cl_event * /* event_list */); #define clEnqueueBarrier aclEnqueueBarrier cl_int (*aclEnqueueBarrier)(cl_command_queue /* command_queue */); #define clUnloadCompiler aclUnloadCompiler cl_int (*aclUnloadCompiler)(void); void * (*aclGetExtensionFunctionAddress)(const char * /* func_name */); #endif #ifdef __cplusplus } #endif #endif /* __AOPENCL_CL_H */ ``` ### dlopencl.cpp 根据实际情况自行修改 so_path 的路径 ```cpp #define CL_TARGET_OPENCL_VERSION 120 #include "CL/cl.h" #include <dlfcn.h> #include <cstdio> #include <string.h> #include "dlopencl.h" int loadedCL; void *getCLHandle() { LOGD("get_handle"); void *res = nullptr; char *so_path = (char*)"/system/vendor/lib64/egl/libGLES_mali.so"; res = dlopen(so_path, RTLD_LAZY); if (res == nullptr) { LOGD("Open library failed"); } else { LOGD("Loaded library name: [%s]", so_path); } return res; } void initFns() { loadedCL = 0; void *handle = getCLHandle(); if (handle == nullptr) return; /* Platform API */ IAH(); aclGetPlatformIDs = (cl_int (*)(cl_uint /* num_entries */, cl_platform_id * /* platforms */, cl_uint * /* num_platforms */)) dlsym(handle, "clGetPlatformIDs"); IAH(); aclGetPlatformInfo = (cl_int (*)(cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetPlatformInfo"); /* Device APIs */ IAH(); aclGetDeviceIDs = (cl_int (*)(cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, cl_device_id * /* devices */, cl_uint * /* num_devices */)) dlsym(handle, "clGetDeviceIDs"); IAH(); aclGetDeviceInfo = (cl_int (*)(cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetDeviceInfo"); /* Context APIs */ IAH(); aclCreateContext = (cl_context (*)(const cl_context_properties * /* properties */, cl_uint /* num_devices */, const cl_device_id * /* devices */, void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */)) dlsym(handle, "clCreateContext"); IAH(); aclCreateContextFromType = (cl_context (*)(const cl_context_properties * /* properties */, cl_device_type /* device_type */, void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), void * /* user_data */, cl_int * /* errcode_ret */)) dlsym(handle, "clCreateContextFromType"); IAH(); aclRetainContext = (cl_int (*)(cl_context /* context */)) dlsym(handle, "clRetainContext"); IAH(); aclReleaseContext = (cl_int (*)(cl_context /* context */)) dlsym(handle, "clReleaseContext"); IAH(); aclGetContextInfo = (cl_int (*)(cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetContextInfo"); /* Command Queue APIs */ IAH(); aclCreateCommandQueue = (cl_command_queue (*)(cl_context /* context */, cl_device_id /* device */, cl_command_queue_properties /* properties */, cl_int * /* errcode_ret */)) dlsym(handle, "clCreateCommandQueue"); IAH(); aclRetainCommandQueue = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clRetainCommandQueue"); IAH(); aclReleaseCommandQueue = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clReleaseCommandQueue"); IAH(); aclGetCommandQueueInfo = (cl_int (*)(cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetCommandQueueInfo"); /* Memory Object APIs */ IAH(); aclCreateBuffer = (cl_mem (*)(cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void * /* host_ptr */, cl_int * /* errcode_ret */)) dlsym(handle, "clCreateBuffer"); IAH(); aclCreateSubBuffer = (cl_mem (*)(cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, const void * /* buffer_create_info */, cl_int * /* errcode_ret */)) dlsym(handle, "clCreateSubBuffer"); IAH(); aclRetainMemObject = (cl_int (*)(cl_mem /* memobj */)) dlsym(handle, "clRetainMemObject"); IAH(); aclReleaseMemObject = (cl_int (*)(cl_mem /* memobj */)) dlsym(handle, "clReleaseMemObject"); IAH(); aclGetSupportedImageFormats = (cl_int (*)(cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */, cl_uint /* num_entries */, cl_image_format * /* image_formats */, cl_uint * /* num_image_formats */)) dlsym(handle, "clGetSupportedImageFormats"); IAH(); aclGetMemObjectInfo = (cl_int (*)(cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetMemObjectInfo"); IAH(); aclGetImageInfo = (cl_int (*)(cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetImageInfo"); IAH(); aclSetMemObjectDestructorCallback = (cl_int (*)( cl_mem /* memobj */, void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), void * /*user_data */ )) dlsym(handle, "clSetMemObjectDestructorCallback"); /* Sampler APIs */ IAH(); aclCreateSampler = (cl_sampler (*)(cl_context /* context */, cl_bool /* normalized_coords */, cl_addressing_mode /* addressing_mode */, cl_filter_mode /* filter_mode */, cl_int * /* errcode_ret */)) dlsym(handle, "clCreateSampler"); IAH(); aclRetainSampler = (cl_int (*)(cl_sampler /* sampler */)) dlsym(handle, "clRetainSampler"); IAH(); aclReleaseSampler = (cl_int (*)(cl_sampler /* sampler */)) dlsym(handle, "clReleaseSampler"); IAH(); aclGetSamplerInfo = (cl_int (*)(cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetSamplerInfo"); /* Program Object APIs */ IAH(); aclCreateProgramWithSource = (cl_program (*)(cl_context /* context */, cl_uint /* count */, const char ** /* strings */, const size_t * /* lengths */, cl_int * /* errcode_ret */)) dlsym(handle, "clCreateProgramWithSource"); IAH(); aclCreateProgramWithBinary = (cl_program (*)(cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const size_t * /* lengths */, const unsigned char ** /* binaries */, cl_int * /* binary_status */, cl_int * /* errcode_ret */)) dlsym(handle, "clCreateProgramWithBinary"); IAH(); aclRetainProgram = (cl_int (*)(cl_program /* program */)) dlsym(handle, "clRetainProgram"); IAH(); aclReleaseProgram = (cl_int (*)(cl_program /* program */)) dlsym(handle, "clReleaseProgram"); IAH(); aclBuildProgram = (cl_int (*)(cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */, const char * /* options */, void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), void * /* user_data */)) dlsym(handle, "clBuildProgram"); IAH(); aclGetProgramInfo = (cl_int (*)(cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetProgramInfo"); IAH(); aclGetProgramBuildInfo = (cl_int (*)(cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetProgramBuildInfo"); /* Kernel Object APIs */ IAH(); aclCreateKernel = (cl_kernel (*)(cl_program /* program */, const char * /* kernel_name */, cl_int * /* errcode_ret */)) dlsym(handle, "clCreateKernel"); IAH(); aclCreateKernelsInProgram = (cl_int (*)(cl_program /* program */, cl_uint /* num_kernels */, cl_kernel * /* kernels */, cl_uint * /* num_kernels_ret */)) dlsym(handle, "clCreateKernelsInProgram"); IAH(); aclRetainKernel = (cl_int (*)(cl_kernel /* kernel */)) dlsym(handle, "clRetainKernel"); IAH(); aclReleaseKernel = (cl_int (*)(cl_kernel /* kernel */)) dlsym(handle, "clReleaseKernel"); IAH(); aclSetKernelArg = (cl_int (*)(cl_kernel /* kernel */, cl_uint /* arg_index */, size_t /* arg_size */, const void * /* arg_value */)) dlsym(handle, "clSetKernelArg"); IAH(); aclGetKernelInfo = (cl_int (*)(cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetKernelInfo"); IAH(); aclGetKernelWorkGroupInfo = (cl_int (*)(cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetKernelWorkGroupInfo"); /* Event Object APIs */ IAH(); aclWaitForEvents = (cl_int (*)(cl_uint /* num_events */, const cl_event * /* event_list */)) dlsym(handle, "clWaitForEvents"); IAH(); aclGetEventInfo = (cl_int (*)(cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetEventInfo"); IAH(); aclCreateUserEvent = (cl_event (*)(cl_context /* context */, cl_int * /* errcode_ret */)) dlsym(handle, "clCreateUserEvent"); IAH(); aclRetainEvent = (cl_int (*)(cl_event /* event */)) dlsym(handle, "clRetainEvent"); IAH(); aclReleaseEvent = (cl_int (*)(cl_event /* event */)) dlsym(handle, "clReleaseEvent"); IAH(); aclSetUserEventStatus = (cl_int (*)(cl_event /* event */, cl_int /* execution_status */)) dlsym(handle, "clSetUserEventStatus"); IAH(); aclSetEventCallback = (cl_int (*)( cl_event /* event */, cl_int /* command_exec_callback_type */, void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void * /* user_data */)) dlsym(handle, "clSetEventCallback"); /* Profiling APIs */ IAH(); aclGetEventProfilingInfo = (cl_int (*)(cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */)) dlsym(handle, "clGetEventProfilingInfo"); /* Flush and Finish APIs */ IAH(); aclFlush = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clFlush"); IAH(); aclFinish = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clFinish"); /* Enqueued Commands APIs */ IAH(); aclEnqueueReadBuffer = (cl_int (*)(cl_command_queue /* command_queue */, ``` 最后修改:2022 年 01 月 03 日 © 允许规范转载