mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
Disable __cpuid check on arm64 builds as intrinsic is not available Motivation Breaking the arm64 build. Co-authored-by: Sheil Kumar <sheilk@microsoft.com>
109 lines
3.3 KiB
C++
109 lines
3.3 KiB
C++
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
#include "lib/Api/pch/pch.h"
|
|
|
|
#include "HardwareCoreEnumerator.h"
|
|
|
|
namespace WINMLP {
|
|
|
|
struct LogicalProcessorInformation {
|
|
std::unique_ptr<char[]> Buffer;
|
|
size_t Length;
|
|
};
|
|
|
|
struct CoreCounter {
|
|
uint32_t PhysicalCores = 0;
|
|
uint32_t Num2CacheCores = 0;
|
|
};
|
|
|
|
static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
|
|
DWORD length = 0;
|
|
DWORD rc = GetLogicalProcessorInformationEx(relationship, nullptr, &length);
|
|
|
|
assert(rc == FALSE);
|
|
|
|
auto processorInformationBytes = std::make_unique<char[]>(length);
|
|
|
|
rc = GetLogicalProcessorInformationEx(
|
|
relationship, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(processorInformationBytes.get()), &length
|
|
);
|
|
|
|
assert(rc == TRUE);
|
|
|
|
return {std::move(processorInformationBytes), length};
|
|
}
|
|
|
|
uint32_t CountSetBits(DWORD input) {
|
|
uint32_t c;
|
|
for (c = 0; input; c++) {
|
|
input &= input - 1;
|
|
}
|
|
return c;
|
|
}
|
|
|
|
static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
|
|
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);
|
|
|
|
CoreCounter cores;
|
|
DWORD dwLevel2GroupMask = 0;
|
|
DWORD dwLevel3GroupMask = 0;
|
|
size_t read = 0;
|
|
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX currentProcessorInfo = NULL;
|
|
|
|
while ((read + FIELD_OFFSET(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Processor)) < logicalProcessorInformation.Length
|
|
) {
|
|
currentProcessorInfo =
|
|
reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(logicalProcessorInformation.Buffer.get() + read);
|
|
if ((read + currentProcessorInfo->Size) > logicalProcessorInformation.Length) {
|
|
break;
|
|
}
|
|
|
|
switch (currentProcessorInfo->Relationship) {
|
|
case RelationProcessorCore:
|
|
cores.PhysicalCores++;
|
|
break;
|
|
case RelationCache:
|
|
if (currentProcessorInfo->Cache.Level == 2) {
|
|
dwLevel2GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
|
|
} else if (currentProcessorInfo->Cache.Level == 3) {
|
|
dwLevel3GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
|
|
}
|
|
break;
|
|
}
|
|
|
|
read += currentProcessorInfo->Size;
|
|
}
|
|
|
|
cores.Num2CacheCores = CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
|
|
return cores;
|
|
}
|
|
|
|
uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
|
|
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
|
|
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
|
|
auto cores = GetNumberOPhysicalAndEngineeringCores();
|
|
|
|
#if !defined(_M_ARM64) && !defined(__aarch64__)
|
|
const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
|
|
int regs_leaf0[4];
|
|
int regs_leaf7[4];
|
|
__cpuid(regs_leaf0, 0);
|
|
__cpuid(regs_leaf7, 0x7);
|
|
|
|
auto isIntel = (kVendorID_Intel[0] == regs_leaf0[1]) && (kVendorID_Intel[1] == regs_leaf0[2]) &&
|
|
(kVendorID_Intel[2] == regs_leaf0[3]);
|
|
|
|
auto isHybrid = (regs_leaf7[3] & (1 << 15));
|
|
|
|
if (isIntel && isHybrid) {
|
|
// We want to use the number of physical cores, but exclude soc cores
|
|
// On Intel Hybrid processors, numSocCores == cores.Num2CacheCores
|
|
return cores.PhysicalCores - cores.Num2CacheCores;
|
|
}
|
|
#endif
|
|
|
|
return cores.PhysicalCores;
|
|
}
|
|
|
|
} // namespace WINMLP
|