Skip to content

Commit

Permalink
Fix ze_peak explicit scaling benchmark
Browse files Browse the repository at this point in the history
The explicit scaling code for ze_peak violates L0 spec and has no
overlap between sub-devices. This PR corrects these issues.

Signed-off-by: Wenbin Lu <wenbin.lu@intel.com>
  • Loading branch information
lyu committed Oct 16, 2024
1 parent 32527cc commit d719a18
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 30 deletions.
5 changes: 2 additions & 3 deletions perf_tests/ze_peak/src/global_bw.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (C) 2019 Intel Corporation
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -37,7 +37,7 @@ void ZePeak::ze_peak_global_bw(L0Context &context) {
numItems = numItems - (numItems % context.sub_device_count);
if (verbose)
std::cout << "splitting the total work items ::" << numItems
<< "across subdevices ::" << context.sub_device_count
<< " across subdevices ::" << context.sub_device_count
<< std::endl;
numItems = set_workgroups(context, numItems / context.sub_device_count,
&workgroup_info);
Expand Down Expand Up @@ -121,7 +121,6 @@ void ZePeak::ze_peak_global_bw(L0Context &context) {
throw std::runtime_error("zeCommandListAppendMemoryCopy failed: " +
std::to_string(result));
}
i++;
}
} else {
result = zeCommandListAppendMemoryCopy(
Expand Down
67 changes: 40 additions & 27 deletions perf_tests/ze_peak/src/ze_peak.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (C) 2019 Intel Corporation
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -40,8 +40,7 @@ std::vector<uint8_t> L0Context::load_binary_file(const std::string &file_path) {
binary_file.resize(length);
stream.read(reinterpret_cast<char *>(binary_file.data()), length);
if (verbose)
std::cout << "Binary file loaded"
<< "\n";
std::cout << "Binary file loaded\n";
stream.close();

return binary_file;
Expand Down Expand Up @@ -144,6 +143,7 @@ void L0Context::print_ze_device_properties(
<< "\n"
<< " * UUID : " << id << "\n"
<< " * coreClockRate : " << std::dec << props.coreClockRate << "\n"
<< " * maxMemAllocSize : " << props.maxMemAllocSize << " bytes\n"
<< std::endl;
}

Expand Down Expand Up @@ -846,8 +846,11 @@ long double ZePeak::run_kernel(L0Context context, ze_kernel_handle_t &function,

if (type == TimingMeasurement::BANDWIDTH) {
if (context.sub_device_count) {
SUCCESS_OR_TERMINATE(
zeCommandListReset(context.cmd_list[current_sub_device_id]));
if (current_sub_device_id == 0) {
for (uint32_t i = 0; i < context.sub_device_count; i++) {
SUCCESS_OR_TERMINATE(zeCommandListReset(context.cmd_list[i]));
}
}
} else {
SUCCESS_OR_TERMINATE(zeCommandListReset(context.command_list));
}
Expand All @@ -864,6 +867,12 @@ long double ZePeak::run_kernel(L0Context context, ze_kernel_handle_t &function,
throw std::runtime_error("zeCommandListAppendLaunchKernel failed: " +
std::to_string(result));
}
result = zeCommandListAppendBarrier(
context.cmd_list[current_sub_device_id], nullptr, 0, nullptr);
if (result) {
throw std::runtime_error("zeCommandListAppendBarrier failed: " +
std::to_string(result));
}
} else {
result = zeCommandListAppendLaunchKernel(
context.command_list, function,
Expand Down Expand Up @@ -894,35 +903,39 @@ long double ZePeak::run_kernel(L0Context context, ze_kernel_handle_t &function,

for (uint32_t i = 0; i < warmup_iterations; i++) {
run_command_queue(context);
synchronize_command_queue(context);
}
if (verbose)
std::cout << "Warmup finished\n";

if (context.sub_device_count) {
if (context.sub_device_count == current_sub_device_id + 1) {
current_sub_device_id = 0;
while (current_sub_device_id < context.sub_device_count) {
synchronize_command_queue(context);
current_sub_device_id++;
}
current_sub_device_id = context.sub_device_count - 1;
}
} else {
synchronize_command_queue(context);
if (context.sub_device_count) {
SUCCESS_OR_TERMINATE(
zeCommandListReset(context.cmd_list[current_sub_device_id]));
for (uint32_t i = 0; i < iters; i++) {
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(
context.cmd_list[current_sub_device_id], function,
&workgroup_info.thread_group_dimensions, nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(
context.cmd_list[current_sub_device_id], nullptr, 0, nullptr));
}
SUCCESS_OR_TERMINATE(
zeCommandListClose(context.cmd_list[current_sub_device_id]));
}

timer.start();
for (uint32_t i = 0; i < iters; i++) {
if (context.sub_device_count) {
run_command_queue(context);

if (context.sub_device_count) {
if (context.sub_device_count == current_sub_device_id + 1) {
current_sub_device_id = 0;
while (current_sub_device_id < context.sub_device_count) {
synchronize_command_queue(context);
current_sub_device_id++;
}
current_sub_device_id = context.sub_device_count - 1;
if (context.sub_device_count == current_sub_device_id + 1) {
current_sub_device_id = 0;
while (current_sub_device_id < context.sub_device_count) {
synchronize_command_queue(context);
current_sub_device_id++;
}
} else {
current_sub_device_id = context.sub_device_count - 1;
}
} else {
for (uint32_t i = 0; i < iters; i++) {
run_command_queue(context);
synchronize_command_queue(context);
}
}
Expand Down

0 comments on commit d719a18

Please sign in to comment.