mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
### Description Limit SoC core detection via 2 level cache core logic to Intel and Hybrid processors. ### Motivation and Context The following code was added to add support for a new class of CPU cores present in Intel’s next generation Intel Core Ultra mobile processors. This code is essential to avoid placing threads on low performing SoC cores that don’t have L3 cache. SoC cores are meant to specialize in system bringup and help improve responsiveness and power usage, in other words they are not meant to run compute heavy AI workloads. In order to avoid broad exposure of this logic, it is currently designed to be restricted to Intel platforms that have hybrid enabled. --------- Co-authored-by: Sheil Kumar <sheilk@microsoft.com>
107 lines
3.2 KiB
C++
107 lines
3.2 KiB
C++
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
#include "lib/Api/pch/pch.h"
|
|
|
|
#include "HardwareCoreEnumerator.h"
|
|
|
|
namespace WINMLP {
|
|
|
|
struct LogicalProcessorInformation {
|
|
std::unique_ptr<char[]> Buffer;
|
|
size_t Length;
|
|
};
|
|
|
|
struct CoreCounter {
|
|
uint32_t PhysicalCores = 0;
|
|
uint32_t Num2CacheCores = 0;
|
|
};
|
|
|
|
static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
|
|
DWORD length = 0;
|
|
DWORD rc = GetLogicalProcessorInformationEx(relationship, nullptr, &length);
|
|
|
|
assert(rc == FALSE);
|
|
|
|
auto processorInformationBytes = std::make_unique<char[]>(length);
|
|
|
|
rc = GetLogicalProcessorInformationEx(
|
|
relationship, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(processorInformationBytes.get()), &length
|
|
);
|
|
|
|
assert(rc == TRUE);
|
|
|
|
return {std::move(processorInformationBytes), length};
|
|
}
|
|
|
|
uint32_t CountSetBits(DWORD input) {
|
|
uint32_t c;
|
|
for (c = 0; input; c++) {
|
|
input &= input - 1;
|
|
}
|
|
return c;
|
|
}
|
|
|
|
static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
|
|
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);
|
|
|
|
CoreCounter cores;
|
|
DWORD dwLevel2GroupMask = 0;
|
|
DWORD dwLevel3GroupMask = 0;
|
|
size_t read = 0;
|
|
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX currentProcessorInfo = NULL;
|
|
|
|
while ((read + FIELD_OFFSET(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Processor)) < logicalProcessorInformation.Length
|
|
) {
|
|
currentProcessorInfo =
|
|
reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(logicalProcessorInformation.Buffer.get() + read);
|
|
if ((read + currentProcessorInfo->Size) > logicalProcessorInformation.Length) {
|
|
break;
|
|
}
|
|
|
|
switch (currentProcessorInfo->Relationship) {
|
|
case RelationProcessorCore:
|
|
cores.PhysicalCores++;
|
|
break;
|
|
case RelationCache:
|
|
if (currentProcessorInfo->Cache.Level == 2) {
|
|
dwLevel2GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
|
|
} else if (currentProcessorInfo->Cache.Level == 3) {
|
|
dwLevel3GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
|
|
}
|
|
break;
|
|
}
|
|
|
|
read += currentProcessorInfo->Size;
|
|
}
|
|
|
|
cores.Num2CacheCores = CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
|
|
return cores;
|
|
}
|
|
|
|
uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
|
|
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
|
|
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
|
|
auto cores = GetNumberOPhysicalAndEngineeringCores();
|
|
|
|
const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
|
|
int regs_leaf0[4];
|
|
int regs_leaf7[4];
|
|
__cpuid(regs_leaf0, 0);
|
|
__cpuid(regs_leaf7, 0x7);
|
|
|
|
auto isIntel = (kVendorID_Intel[0] == regs_leaf0[1]) && (kVendorID_Intel[1] == regs_leaf0[2]) &&
|
|
(kVendorID_Intel[2] == regs_leaf0[3]);
|
|
|
|
auto isHybrid = (regs_leaf7[3] & (1 << 15));
|
|
|
|
if (isIntel && isHybrid) {
|
|
// We want to use the number of physical cores, but exclude soc cores
|
|
// On Intel Hybrid processors, numSocCores == cores.Num2CacheCores
|
|
return cores.PhysicalCores - cores.Num2CacheCores;
|
|
}
|
|
|
|
return cores.PhysicalCores;
|
|
}
|
|
|
|
} // namespace WINMLP
|