Skip to content

Commit 27c5271

Browse files
Add performance hints when kernel's argument requires aux translation
Resolves: NEO-2931 Change-Id: I3756265d0d8a774805b0b35088b7477b09b5a7bf Signed-off-by: Jobczyk, Lukasz <lukasz.jobczyk@intel.com>
1 parent e0e19c2 commit 27c5271

File tree

4 files changed

+39
-2
lines changed

4 files changed

+39
-2
lines changed

runtime/context/driver_diagnostics.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ const char *DriverDiagnostics::hintFormat[] = {
5353
"Performance hint: Local workgroup sizes { %u, %u, %u } selected for this workload ( kernel name: %s ) may not be optimal, consider using following local workgroup size: { %u, %u, %u }.", //BAD_LOCAL_WORKGROUP_SIZE
5454
"Performance hint: Kernel %s register pressure is too high, spill fills will be generated, additional surface needs to be allocated of size %u, consider simplifying your kernel.", //REGISTER_PRESSURE_TOO_HIGH
5555
"Performance hint: Kernel %s private memory usage is too high and exhausts register space, additional surface needs to be allocated of size %u, consider reducing amount of private memory used, avoid using private memory arrays.", //PRIVATE_MEMORY_USAGE_TOO_HIGH
56-
"Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance." //KERNEL_REQUIRES_COHERENCY
56+
"Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance.", //KERNEL_REQUIRES_COHERENCY
57+
"Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"" //KERNEL_ARGUMENT_AUX_TRANSLATION
5758
};
5859
} // namespace NEO

runtime/context/driver_diagnostics.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ enum PerformanceHints {
4646
BAD_LOCAL_WORKGROUP_SIZE,
4747
REGISTER_PRESSURE_TOO_HIGH,
4848
PRIVATE_MEMORY_USAGE_TOO_HIGH,
49-
KERNEL_REQUIRES_COHERENCY
49+
KERNEL_REQUIRES_COHERENCY,
50+
KERNEL_ARGUMENT_AUX_TRANSLATION
5051
};
5152

5253
class DriverDiagnostics {

runtime/kernel/kernel.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2149,6 +2149,12 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF
21492149
auto buffer = castToObject<Buffer>(getKernelArg(i));
21502150
if (buffer && buffer->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
21512151
memObjsForAuxTranslation.insert(buffer);
2152+
2153+
auto &context = this->program->getContext();
2154+
if (context.isProvidingPerformanceHints()) {
2155+
context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION,
2156+
kernelInfo.name.c_str(), i, kernelInfo.kernelArgInfo.at(i).name.c_str());
2157+
}
21522158
}
21532159
}
21542160
}

unit_tests/context/driver_diagnostics_tests.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,35 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenConte
424424
context->release();
425425
}
426426

427+
TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithBuffersForAuxTranslationThenContextProvidesProperHint) {
428+
DebugManagerStateRestore dbgRestore;
429+
DebugManager.flags.PrintDriverDiagnostics.set(1);
430+
431+
auto pDevice = castToObject<Device>(devices[0]);
432+
MockKernelWithInternals mockKernel(*pDevice, context);
433+
MockBuffer buffer;
434+
cl_mem clMem = &buffer;
435+
436+
buffer.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
437+
mockKernel.kernelInfo.kernelArgInfo.resize(1);
438+
mockKernel.kernelInfo.kernelArgInfo.at(0).kernelArgPatchInfoVector.resize(1);
439+
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
440+
mockKernel.mockKernel->initialize();
441+
mockKernel.mockKernel->auxTranslationRequired = true;
442+
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
443+
444+
testing::internal::CaptureStdout();
445+
MemObjsForAuxTranslation memObjects;
446+
mockKernel.mockKernel->fillWithBuffersForAuxTranslation(memObjects);
447+
448+
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION],
449+
mockKernel.mockKernel->getKernelInfo().name.c_str(), 0, mockKernel.mockKernel->getKernelInfo().kernelArgInfo.at(0).name.c_str());
450+
451+
std::string output = testing::internal::GetCapturedStdout();
452+
EXPECT_NE(0u, output.size());
453+
EXPECT_TRUE(containsHint(expectedHint, userData));
454+
}
455+
427456
TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) {
428457

429458
auto pDevice = castToObject<Device>(devices[0]);

0 commit comments

Comments
 (0)