From 7607b00b745c006e4da3e1ae22c3fdee83a455ff Mon Sep 17 00:00:00 2001 From: "Vinals Gangolells, Guillem" Date: Tue, 16 Jun 2026 13:54:48 -0700 Subject: [PATCH] Added new D3D12 PSO compilation metrics This commit adds 3 new metrics related to D3D12 PSO compilation: - D3D12 PSO Compile Count: D3D12 PSO compilation events started every second. - D3D12 PSO Compile Time: D3D12 PSO compile time in milliseconds per second (across all CPUs). - D3D12 PSO Compile Busy: Percent of time with D3D12 PSO compilation taking place. This commit also adds a new supported provider (Microsoft-Windows-Direct3D12). Note that: D3D12 PSO compilation events are not tied to a specific present. This new events are tied to the process itself and are a "new" type of metric (per-process, ETW based but not tied to a present). This commit includes very small changes to support the new metric type. --- .../Interprocess/source/DataStores.cpp | 16 +-- .../Interprocess/source/DataStores.h | 22 +++- .../Interprocess/source/Interprocess.cpp | 76 ++++++------ .../Interprocess/source/Interprocess.h | 14 +-- .../Interprocess/source/OwnedDataSegment.h | 6 +- .../Interprocess/source/ShmNamer.cpp | 3 +- .../Interprocess/source/ShmNamer.h | 4 +- .../Interprocess/source/metadata/EnumUnit.h | 2 +- .../Interprocess/source/metadata/MetricList.h | 3 + .../PresentMonAPI2/PresentMonAPI.h | 3 + .../PresentMonAPI2Tests/EtlLoggerTests.cpp | 4 +- .../PresentMonAPI2Tests/FirstFrameWait.h | 8 +- .../InterimBroadcasterTests.cpp | 56 ++++----- .../PresentMonAPI2Tests/IpcComponentTests.cpp | 4 +- .../PresentMonAPI2Tests/MultiClientTests.cpp | 16 +-- .../PresentMonAPI2Tests/TestCommands.h | 6 +- .../PresentMonMiddleware/DynamicQuery.cpp | 32 ++++- .../FrameMetricsSource.cpp | 8 +- .../PresentMonMiddleware/FrameMetricsSource.h | 4 +- .../PresentMonMiddleware/MetricBinding.cpp | 102 +++++++++++++++- .../PresentMonMiddleware/MetricBinding.h | 3 +- .../PresentMonMiddleware/Middleware.cpp | 2 +- .../PresentMonMiddleware/ProcessDataRate.h | 114 ++++++++++++++++++ .../ActionExecutionContext.cpp | 2 +- .../PresentMonService/FrameBroadcaster.h | 37 ++++-- .../MockPresentMonSession.cpp | 13 +- .../PresentMonService/PresentMon.cpp | 3 +- .../PresentMonService/PresentMon.h | 5 +- .../PresentMonService/PresentMonSession.cpp | 10 +- .../RealtimePresentMonSession.cpp | 24 +++- .../RealtimePresentMonSession.h | 3 +- .../UnitTests/ProcessDataRateTests.cpp | 106 ++++++++++++++++ IntelPresentMon/UnitTests/UnitTests.vcxproj | 1 + .../UnitTests/UnitTests.vcxproj.filters | 1 + IntelPresentMon/metrics.csv | 3 + .../ETW/Microsoft_Windows_Direct3D12.h | 41 +++++++ PresentData/PresentData.vcxproj | 1 + PresentData/PresentData.vcxproj.filters | 3 + PresentData/PresentMonTraceConsumer.cpp | 62 +++++++++- PresentData/PresentMonTraceConsumer.hpp | 37 +++++- PresentData/PresentMonTraceSession.cpp | 17 ++- Tools/collect_etw_info.cmd | 5 + 42 files changed, 729 insertions(+), 153 deletions(-) create mode 100644 IntelPresentMon/PresentMonMiddleware/ProcessDataRate.h create mode 100644 IntelPresentMon/UnitTests/ProcessDataRateTests.cpp create mode 100644 PresentData/ETW/Microsoft_Windows_Direct3D12.h diff --git a/IntelPresentMon/Interprocess/source/DataStores.cpp b/IntelPresentMon/Interprocess/source/DataStores.cpp index bb0bf8eaf..6e1ddd003 100644 --- a/IntelPresentMon/Interprocess/source/DataStores.cpp +++ b/IntelPresentMon/Interprocess/source/DataStores.cpp @@ -1,4 +1,4 @@ -#include "DataStores.h" +#include "DataStores.h" #include "MetricCapabilities.h" #include "IntrospectionTransfer.h" #include "IntrospectionDataTypeMapping.h" @@ -129,9 +129,11 @@ namespace pmon::ipc } } - size_t FrameDataStore::CalculateSegmentBytes(const DataStoreSizingInfo& sizing) + size_t ProcessDataStore::CalculateSegmentBytes(const DataStoreSizingInfo& sizing) { - const size_t payloadBytes = sizing.ringSamples * sizeof(FrameData); + const size_t framePayloadBytes = sizing.ringSamples * sizeof(FrameData); + const size_t processDataPayloadBytes = sizing.ringSamples * sizeof(ProcessDataSample); + const size_t payloadBytes = framePayloadBytes + processDataPayloadBytes; size_t scaledBytes = ScaleBytes_(payloadBytes, kFrameScaleMul_, kFrameScaleDiv_); if (scaledBytes < payloadBytes + kFixedLeewayBytes_) { @@ -140,13 +142,13 @@ namespace pmon::ipc const size_t leewayBytes = scaledBytes - payloadBytes; const size_t totalBytes = util::PadToAlignment(scaledBytes, kSegmentAlignmentBytes_); pmlog_verb(util::log::V::ipc_sto)(std::format( - "ipc frame sizing | ring_samples:{} payload_bytes:{} scaled_bytes:{} fixed_leeway_bytes:{} leeway_bytes:{} alignment:{} total_bytes:{}", - sizing.ringSamples, payloadBytes, scaledBytes, kFixedLeewayBytes_, + "ipc process sizing | ring_samples:{} frame_payload_bytes:{} process_data_payload_bytes:{} payload_bytes:{} scaled_bytes:{} fixed_leeway_bytes:{} leeway_bytes:{} alignment:{} total_bytes:{}", + sizing.ringSamples, framePayloadBytes, processDataPayloadBytes, payloadBytes, scaledBytes, kFixedLeewayBytes_, leewayBytes, kSegmentAlignmentBytes_, totalBytes)); return totalBytes; } - StaticMetricValue FrameDataStore::FindStaticMetric(PM_METRIC metric) const + StaticMetricValue ProcessDataStore::FindStaticMetric(PM_METRIC metric) const { switch (metric) { case PM_METRIC_APPLICATION: @@ -157,7 +159,7 @@ namespace pmon::ipc return bookkeeping.startQpc; default: throw util::Except(PM_STATUS_QUERY_MALFORMED, - "Static metric not handled by frame data store"); + "Static metric not handled by process data store"); } } diff --git a/IntelPresentMon/Interprocess/source/DataStores.h b/IntelPresentMon/Interprocess/source/DataStores.h index 11d15cc99..0c6601870 100644 --- a/IntelPresentMon/Interprocess/source/DataStores.h +++ b/IntelPresentMon/Interprocess/source/DataStores.h @@ -18,6 +18,15 @@ namespace pmon::ipc using FrameData = util::metrics::FrameData; using FrameHistoryRing = HistoryRing; + // Per-process ETW event samples (not tied to a frame). PSO compile is the first consumer; + // additional metrics add fields here and bindings that read processData. + struct ProcessDataSample + { + double psoCompileDurationMs = 0.; + uint64_t eventCompleteQpc = 0; + }; + using ProcessDataHistoryRing = HistoryRing; + class MetricCapabilities; namespace intro { @@ -32,7 +41,7 @@ namespace pmon::ipc // Frame + telemetry: ring sample capacity and optional override size. size_t ringSamples = 0; std::optional overrideBytes; - // Frame-only: backpressure behavior for frame rings. + // Process (target) store: backpressure behavior for frame and process data rings. bool backpressured = false; }; @@ -45,15 +54,16 @@ namespace pmon::ipc int64_t, const char*>; - struct FrameDataStore + struct ProcessDataStore { - FrameDataStore(ShmSegmentManager& segMan, size_t cap, bool backpressured) + ProcessDataStore(ShmSegmentManager& segMan, size_t cap, bool backpressured) : frameData{ cap, segMan.get_allocator(), backpressured }, + processData{ cap, segMan.get_allocator(), backpressured }, statics{ .applicationName{ segMan.get_allocator() } } {} - FrameDataStore(ShmSegmentManager& segMan, const DataStoreSizingInfo& sizing) - : FrameDataStore(segMan, sizing.ringSamples, sizing.backpressured) + ProcessDataStore(ShmSegmentManager& segMan, const DataStoreSizingInfo& sizing) + : ProcessDataStore(segMan, sizing.ringSamples, sizing.backpressured) {} // values that never change over the life of a target, available for use with metric queries // often lazy initialized upon receipt of the first present/frame @@ -72,6 +82,7 @@ namespace pmon::ipc bool isPlayback = false; } bookkeeping{}; FrameHistoryRing frameData; + ProcessDataHistoryRing processData; StaticMetricValue FindStaticMetric(PM_METRIC metric) const; @@ -133,4 +144,3 @@ namespace pmon::ipc const DataStoreSizingInfo& sizing, PM_DEVICE_TYPE deviceType); } - diff --git a/IntelPresentMon/Interprocess/source/Interprocess.cpp b/IntelPresentMon/Interprocess/source/Interprocess.cpp index 6a754e256..e8f27d681 100644 --- a/IntelPresentMon/Interprocess/source/Interprocess.cpp +++ b/IntelPresentMon/Interprocess/source/Interprocess.cpp @@ -1,4 +1,4 @@ -#include "../../CommonUtilities/win/WinAPI.h" +#include "../../CommonUtilities/win/WinAPI.h" #include "Interprocess.h" #include "IntrospectionTransfer.h" #include "IntrospectionPopulators.h" @@ -161,46 +161,46 @@ namespace pmon::ipc return namer_; } // data store access - std::shared_ptr> - CreateOrGetFrameDataSegment(uint32_t pid, bool backpressured) override + std::shared_ptr> + CreateOrGetProcessDataSegment(uint32_t pid, bool backpressured) override { // resolve out existing or new weak ptr, try and lock - auto& pWeak = frameShmWeaks_[pid]; + auto& pWeak = processShmWeaks_[pid]; auto pFrameData = pWeak.lock(); if (!pFrameData) { // if weak ptr was new (or expired), lock will not work and we need to construct // make a frame data store as shared ptr - const auto segmentName = namer_.MakeFrameName(pid); + const auto segmentName = namer_.MakeProcessName(pid); const DataStoreSizingInfo sizing{ .ringSamples = frameRingSamples_, .backpressured = backpressured, }; - pFrameData = std::shared_ptr>( - new OwnedDataSegment( + pFrameData = std::shared_ptr>( + new OwnedDataSegment( segmentName, sizing, static_cast(Permissions_{ Permissions_::kReadOnly })), - [pid, segmentName](OwnedDataSegment* pSegment) { - pmlog_dbg("Frame data segment destroyed") + [pid, segmentName](OwnedDataSegment* pSegment) { + pmlog_dbg("Process data segment destroyed") .pmwatch(pid) .pmwatch(segmentName); delete pSegment; }); // store a weak reference pWeak = pFrameData; - pmlog_dbg("Frame data segment created") + pmlog_dbg("Process data segment created") .pmwatch(pid) .pmwatch(segmentName) .pmwatch(backpressured); } // remove stale elements to keep map lean - for (auto it = frameShmWeaks_.begin(); it != frameShmWeaks_.end(); ) { + for (auto it = processShmWeaks_.begin(); it != processShmWeaks_.end(); ) { if (it->second.expired()) { - const auto segmentName = namer_.MakeFrameName(it->first); - pmlog_dbg("Frame data segment released") + const auto segmentName = namer_.MakeProcessName(it->first); + pmlog_dbg("Process data segment released") .pmwatch(it->first) .pmwatch(segmentName); - it = frameShmWeaks_.erase(it); + it = processShmWeaks_.erase(it); } else { ++it; @@ -208,25 +208,25 @@ namespace pmon::ipc } return pFrameData; } - std::shared_ptr> - GetFrameDataSegment(uint32_t pid) override + std::shared_ptr> + GetProcessDataSegment(uint32_t pid) override { - if (auto i = frameShmWeaks_.find(pid); i != frameShmWeaks_.end()) { + if (auto i = processShmWeaks_.find(pid); i != processShmWeaks_.end()) { if (auto pSegment = i->second.lock()) { return pSegment; } // if weak ptr has expired, garbage collect from the map - const auto segmentName = namer_.MakeFrameName(pid); - pmlog_dbg("Frame data segment released") + const auto segmentName = namer_.MakeProcessName(pid); + pmlog_dbg("Process data segment released") .pmwatch(pid) .pmwatch(segmentName); - frameShmWeaks_.erase(i); + processShmWeaks_.erase(i); } return {}; } - std::vector GetFramePids() const override + std::vector GetProcessDataPids() const override { - return frameShmWeaks_ | vi::filter([](auto&& p) {return !p.second.expired(); }) | + return processShmWeaks_ | vi::filter([](auto&& p) {return !p.second.expired(); }) | vi::keys | rn::to(); } GpuDataStore& GetGpuDataStore(uint32_t deviceId) override @@ -305,7 +305,7 @@ namespace pmon::ipc bool introCpuComplete_ = false; std::optional> systemShm_; - std::unordered_map>> frameShmWeaks_; + std::unordered_map>> processShmWeaks_; std::unordered_map> gpuShms_; }; @@ -352,39 +352,39 @@ namespace pmon::ipc // responsibility to track this resource return root.ApiClone(blockAllocator); } - void OpenFrameDataStore(uint32_t pid) override + void OpenProcessDataStore(uint32_t pid) override { // If already open, nothing to do - if (frameShms_.find(pid) != frameShms_.end()) { + if (processShms_.find(pid) != processShms_.end()) { return; } - const auto segName = namer_.MakeFrameName(pid); - frameShms_.emplace( + const auto segName = namer_.MakeProcessName(pid); + processShms_.emplace( std::piecewise_construct, std::forward_as_tuple(pid), std::forward_as_tuple(segName) ); - pmlog_dbg("Frame data segment opened") + pmlog_dbg("Process data segment opened") .pmwatch(pid) .pmwatch(segName); } - void CloseFrameDataStore(uint32_t pid) override + void CloseProcessDataStore(uint32_t pid) override { - if (auto it = frameShms_.find(pid); it != frameShms_.end()) { - const auto segName = namer_.MakeFrameName(pid); - pmlog_dbg("Frame data segment closed") + if (auto it = processShms_.find(pid); it != processShms_.end()) { + const auto segName = namer_.MakeProcessName(pid); + pmlog_dbg("Process data segment closed") .pmwatch(pid) .pmwatch(segName); - frameShms_.erase(it); + processShms_.erase(it); } } // data store access - const FrameDataStore& GetFrameDataStore(uint32_t pid) const override + const ProcessDataStore& GetProcessDataStore(uint32_t pid) const override { - const auto it = frameShms_.find(pid); - if (it == frameShms_.end()) { - throw std::runtime_error{ "Frame data segment not open for this PID" }; + const auto it = processShms_.find(pid); + if (it == processShms_.end()) { + throw std::runtime_error{ "Process data segment not open for this PID" }; } return it->second.GetStore(); } @@ -432,7 +432,7 @@ namespace pmon::ipc std::optional> systemShm_; std::unordered_map> gpuShms_; - std::unordered_map> frameShms_; + std::unordered_map> processShms_; }; } diff --git a/IntelPresentMon/Interprocess/source/Interprocess.h b/IntelPresentMon/Interprocess/source/Interprocess.h index 54074f96f..915b27850 100644 --- a/IntelPresentMon/Interprocess/source/Interprocess.h +++ b/IntelPresentMon/Interprocess/source/Interprocess.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include #include @@ -27,9 +27,9 @@ namespace pmon::ipc virtual const ShmNamer& GetNamer() const = 0; // data store access - virtual std::shared_ptr> CreateOrGetFrameDataSegment(uint32_t pid, bool backpressured) = 0; - virtual std::shared_ptr> GetFrameDataSegment(uint32_t pid) = 0; - virtual std::vector GetFramePids() const = 0; + virtual std::shared_ptr> CreateOrGetProcessDataSegment(uint32_t pid, bool backpressured) = 0; + virtual std::shared_ptr> GetProcessDataSegment(uint32_t pid) = 0; + virtual std::vector GetProcessDataPids() const = 0; virtual GpuDataStore& GetGpuDataStore(uint32_t deviceId) = 0; virtual SystemDataStore& GetSystemDataStore() = 0; }; @@ -43,11 +43,11 @@ namespace pmon::ipc // data store access // not const because of the backpressure case // TODO: consider more separation of backpressure and broadcast cases - virtual const FrameDataStore& GetFrameDataStore(uint32_t pid) const = 0; + virtual const ProcessDataStore& GetProcessDataStore(uint32_t pid) const = 0; virtual const GpuDataStore& GetGpuDataStore(uint32_t deviceId) const = 0; virtual const SystemDataStore& GetSystemDataStore() const = 0; - virtual void OpenFrameDataStore(uint32_t pid) = 0; - virtual void CloseFrameDataStore(uint32_t pid) = 0; + virtual void OpenProcessDataStore(uint32_t pid) = 0; + virtual void CloseProcessDataStore(uint32_t pid) = 0; }; std::unique_ptr MakeServiceComms(std::string prefix, diff --git a/IntelPresentMon/Interprocess/source/OwnedDataSegment.h b/IntelPresentMon/Interprocess/source/OwnedDataSegment.h index bf37f524c..6e4d18ecf 100644 --- a/IntelPresentMon/Interprocess/source/OwnedDataSegment.h +++ b/IntelPresentMon/Interprocess/source/OwnedDataSegment.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "SharedMemoryTypes.h" #include "DataStores.h" #include "../../CommonUtilities/log/Log.h" @@ -21,8 +21,8 @@ namespace pmon::ipc nullptr, perms }, pData_{ MakeStore_(sizing) } { - if constexpr (std::is_same_v) { - pmlog_dbg("Shm segment populated (Frame)") + if constexpr (std::is_same_v) { + pmlog_dbg("Shm segment populated (Process)") .pmwatch(segmentName) .pmwatch(GetBytesTotal()) .pmwatch(GetBytesUsed()) diff --git a/IntelPresentMon/Interprocess/source/ShmNamer.cpp b/IntelPresentMon/Interprocess/source/ShmNamer.cpp index 4a1ca2491..26394525b 100644 --- a/IntelPresentMon/Interprocess/source/ShmNamer.cpp +++ b/IntelPresentMon/Interprocess/source/ShmNamer.cpp @@ -1,4 +1,3 @@ -#pragma once #include "ShmNamer.h" #include #include @@ -26,7 +25,7 @@ namespace pmon::ipc { return std::format("{}_{}_gpu_{}", prefix_, salt_, deviceId); } - std::string ShmNamer::MakeFrameName(uint32_t pid) const + std::string ShmNamer::MakeProcessName(uint32_t pid) const { return std::format("{}_{}_tgt_{}", prefix_, salt_, pid); } diff --git a/IntelPresentMon/Interprocess/source/ShmNamer.h b/IntelPresentMon/Interprocess/source/ShmNamer.h index c1a0c7075..6fe75fba0 100644 --- a/IntelPresentMon/Interprocess/source/ShmNamer.h +++ b/IntelPresentMon/Interprocess/source/ShmNamer.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include @@ -14,7 +14,7 @@ namespace pmon::ipc std::string MakeIntrospectionReadyName() const; std::string MakeSystemName() const; std::string MakeGpuName(uint32_t deviceId) const; - std::string MakeFrameName(uint32_t pid) const; + std::string MakeProcessName(uint32_t pid) const; const std::string& GetSalt() const; const std::string& GetPrefix() const; private: diff --git a/IntelPresentMon/Interprocess/source/metadata/EnumUnit.h b/IntelPresentMon/Interprocess/source/metadata/EnumUnit.h index 60ed95a70..692fced3f 100644 --- a/IntelPresentMon/Interprocess/source/metadata/EnumUnit.h +++ b/IntelPresentMon/Interprocess/source/metadata/EnumUnit.h @@ -23,7 +23,7 @@ X_(UNIT, KILOHERTZ, "Kilohertz", "kHz", "Frequency in thousands of cycles per second") \ X_(UNIT, MEGAHERTZ, "Megahertz", "MHz", "Frequency in millions of cycles per second") \ X_(UNIT, GIGAHERTZ, "Gigahertz", "GHz", "Frequency in billions of cycles per second") \ - X_(UNIT, CELSIUS, "Degrees Celsius", (const char*)u8"°C", "Temperature in degrees Celsius") \ + X_(UNIT, CELSIUS, "Degrees Celsius", (const char*)u8"�C", "Temperature in degrees Celsius") \ X_(UNIT, RPM, "Revolutions per Minute", "RPM", "Angular speed in revolutions per minute") \ X_(UNIT, BITS_PER_SECOND, "Bits per Second", "bps", "Bandwidth / data throughput in bits per second") \ X_(UNIT, KILOBITS_PER_SECOND, "Kilobits per Second", "kbps", "Bandwidth / data throughput in kilobits per second") \ diff --git a/IntelPresentMon/Interprocess/source/metadata/MetricList.h b/IntelPresentMon/Interprocess/source/metadata/MetricList.h index c39a55980..82477a9fe 100644 --- a/IntelPresentMon/Interprocess/source/metadata/MetricList.h +++ b/IntelPresentMon/Interprocess/source/metadata/MetricList.h @@ -97,4 +97,7 @@ X_(PM_METRIC_BETWEEN_APP_START, PM_METRIC_TYPE_FRAME_EVENT, PM_UNIT_MILLISECONDS, PM_DATA_TYPE_VOID, PM_DATA_TYPE_DOUBLE, 0, PM_DEVICE_TYPE_INDEPENDENT, FULL_STATS) \ X_(PM_METRIC_FLIP_DELAY, PM_METRIC_TYPE_FRAME_EVENT, PM_UNIT_MILLISECONDS, PM_DATA_TYPE_VOID, PM_DATA_TYPE_DOUBLE, 0, PM_DEVICE_TYPE_INDEPENDENT, FULL_STATS) \ X_(PM_METRIC_SESSION_START_QPC, PM_METRIC_TYPE_STATIC, PM_UNIT_QPC, PM_DATA_TYPE_UINT64, PM_DATA_TYPE_UINT64, 0, PM_DEVICE_TYPE_INDEPENDENT, PM_STAT_NONE) \ + X_(PM_METRIC_D3D12_PSO_COMPILE_COUNT, PM_METRIC_TYPE_DYNAMIC, PM_UNIT_HERTZ, PM_DATA_TYPE_DOUBLE, PM_DATA_TYPE_DOUBLE, 0, PM_DEVICE_TYPE_INDEPENDENT, PM_STAT_AVG) \ + X_(PM_METRIC_D3D12_PSO_COMPILE_TIME, PM_METRIC_TYPE_DYNAMIC, PM_UNIT_MILLISECONDS, PM_DATA_TYPE_DOUBLE, PM_DATA_TYPE_DOUBLE, 0, PM_DEVICE_TYPE_INDEPENDENT, PM_STAT_AVG) \ + X_(PM_METRIC_D3D12_PSO_COMPILE_BUSY_PERCENT, PM_METRIC_TYPE_DYNAMIC, PM_UNIT_PERCENT, PM_DATA_TYPE_DOUBLE, PM_DATA_TYPE_DOUBLE, 0, PM_DEVICE_TYPE_INDEPENDENT, PM_STAT_AVG) \ X_(PM_METRIC_COUNT_, PM_METRIC_TYPE_STATIC, PM_UNIT_DIMENSIONLESS, PM_DATA_TYPE_VOID, PM_DATA_TYPE_VOID, PM_ENUM_NULL_ENUM, PM_DEVICE_TYPE_INDEPENDENT, PM_STAT_NONE) diff --git a/IntelPresentMon/PresentMonAPI2/PresentMonAPI.h b/IntelPresentMon/PresentMonAPI2/PresentMonAPI.h index 74819ee64..5ba3e4e20 100644 --- a/IntelPresentMon/PresentMonAPI2/PresentMonAPI.h +++ b/IntelPresentMon/PresentMonAPI2/PresentMonAPI.h @@ -141,6 +141,9 @@ extern "C" { PM_METRIC_PROCESS_ID, PM_METRIC_SESSION_START_QPC, PM_METRIC_CPU_CORE_TEMPERATURE, + PM_METRIC_D3D12_PSO_COMPILE_COUNT, + PM_METRIC_D3D12_PSO_COMPILE_TIME, + PM_METRIC_D3D12_PSO_COMPILE_BUSY_PERCENT, PM_METRIC_COUNT_, // sentry to mark end of metric list; not an actual query metric }; diff --git a/IntelPresentMon/PresentMonAPI2Tests/EtlLoggerTests.cpp b/IntelPresentMon/PresentMonAPI2Tests/EtlLoggerTests.cpp index e78ca95fe..49714de43 100644 --- a/IntelPresentMon/PresentMonAPI2Tests/EtlLoggerTests.cpp +++ b/IntelPresentMon/PresentMonAPI2Tests/EtlLoggerTests.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022-2023 Intel Corporation +// Copyright (C) 2022-2023 Intel Corporation // SPDX-License-Identifier: MIT #include "../CommonUtilities/win/WinAPI.h" #include "CppUnitTest.h" @@ -53,7 +53,7 @@ namespace EtlLoggerTests // verify initial status const auto status = fixture_.service->QueryStatus(); Assert::AreEqual(0ull, status.trackedPids.size()); - Assert::AreEqual(0ull, status.frameStorePids.size()); + Assert::AreEqual(0ull, status.processStorePids.size()); Assert::AreEqual(16u, status.telemetryPeriodMs); Assert::IsFalse((bool)status.etwFlushPeriodMs); } diff --git a/IntelPresentMon/PresentMonAPI2Tests/FirstFrameWait.h b/IntelPresentMon/PresentMonAPI2Tests/FirstFrameWait.h index e873b134c..64d2a57af 100644 --- a/IntelPresentMon/PresentMonAPI2Tests/FirstFrameWait.h +++ b/IntelPresentMon/PresentMonAPI2Tests/FirstFrameWait.h @@ -55,8 +55,8 @@ namespace pmon::tests { mid::ActionClient client{ ctrlPipe }; auto pComms = ipc::MakeMiddlewareComms(client.GetShmPrefix(), client.GetShmSalt()); - pComms->OpenFrameDataStore(pid); - const auto warmupRange = WaitForFirstFrameRange(pComms->GetFrameDataStore(pid).frameData, label, waitLimit); + pComms->OpenProcessDataStore(pid); + const auto warmupRange = WaitForFirstFrameRange(pComms->GetProcessDataStore(pid).frameData, label, waitLimit); return warmupRange.second > 0; } @@ -68,7 +68,7 @@ namespace pmon::tests { mid::ActionClient client{ ctrlPipe }; auto pComms = ipc::MakeMiddlewareComms(client.GetShmPrefix(), client.GetShmSalt()); - pComms->OpenFrameDataStore(pid); - return WaitForFirstFrame(pComms->GetFrameDataStore(pid).frameData, label, waitLimit); + pComms->OpenProcessDataStore(pid); + return WaitForFirstFrame(pComms->GetProcessDataStore(pid).frameData, label, waitLimit); } } diff --git a/IntelPresentMon/PresentMonAPI2Tests/InterimBroadcasterTests.cpp b/IntelPresentMon/PresentMonAPI2Tests/InterimBroadcasterTests.cpp index e0d26d777..c0284ef12 100644 --- a/IntelPresentMon/PresentMonAPI2Tests/InterimBroadcasterTests.cpp +++ b/IntelPresentMon/PresentMonAPI2Tests/InterimBroadcasterTests.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022-2023 Intel Corporation +// Copyright (C) 2022-2023 Intel Corporation // SPDX-License-Identifier: MIT #include "../CommonUtilities/win/WinAPI.h" #include "../CommonUtilities/Env.h" @@ -153,7 +153,7 @@ namespace InterimBroadcasterTests // verify initial status const auto status = fixture_.service->QueryStatus(); Assert::AreEqual(0ull, status.trackedPids.size()); - Assert::AreEqual(0ull, status.frameStorePids.size()); + Assert::AreEqual(0ull, status.processStorePids.size()); Assert::AreEqual(16u, status.telemetryPeriodMs); Assert::IsFalse((bool)status.etwFlushPeriodMs); } @@ -913,10 +913,10 @@ namespace InterimBroadcasterTests client.DispatchSync(svc::acts::StartTracking::Params{ .targetPid = pres.GetId() }); // open the store - pComms->OpenFrameDataStore(pres.GetId()); + pComms->OpenProcessDataStore(pres.GetId()); // verify static data - auto& store = pComms->GetFrameDataStore(pres.GetId()); + auto& store = pComms->GetProcessDataStore(pres.GetId()); Assert::AreEqual(pres.GetId(), store.bookkeeping.processId); const std::string staticAppName = store.statics.applicationName.c_str(); Assert::AreEqual("PresentBench.exe"s, staticAppName); @@ -929,9 +929,9 @@ namespace InterimBroadcasterTests auto pres = fixture_.LaunchPresenter(); client.DispatchSync(svc::acts::StartTracking::Params{ .targetPid = pres.GetId() }); - pComms->OpenFrameDataStore(pres.GetId()); + pComms->OpenProcessDataStore(pres.GetId()); - AssertSegmentRejectsWrite_(namer.MakeFrameName(pres.GetId())); + AssertSegmentRejectsWrite_(namer.MakeProcessName(pres.GetId())); } TEST_METHOD(TrackUntrack) { @@ -943,32 +943,32 @@ namespace InterimBroadcasterTests client.DispatchSync(svc::acts::StartTracking::Params{ .targetPid = pres.GetId() }); // verify the store exists - pComms->OpenFrameDataStore(pres.GetId()); + pComms->OpenProcessDataStore(pres.GetId()); // verify the service tracking, as expected { const auto sta = fixture_.service->QueryStatus(); Assert::AreEqual(1ull, sta.trackedPids.size()); Assert::IsTrue(sta.trackedPids.contains(pres.GetId())); - Assert::AreEqual(1ull, sta.frameStorePids.size()); - Assert::IsTrue(sta.frameStorePids.contains(pres.GetId())); + Assert::AreEqual(1ull, sta.processStorePids.size()); + Assert::IsTrue(sta.processStorePids.contains(pres.GetId())); } // stop tracking client.DispatchSync(svc::acts::StopTracking::Params{ .targetPid = pres.GetId() }); // close the segment - pComms->CloseFrameDataStore(pres.GetId()); + pComms->CloseProcessDataStore(pres.GetId()); // verify the service not tracking, as expected { const auto sta = fixture_.service->QueryStatus(); Assert::AreEqual(0ull, sta.trackedPids.size()); - Assert::AreEqual(0ull, sta.frameStorePids.size()); + Assert::AreEqual(0ull, sta.processStorePids.size()); } // verify segment can no longer be opened - Assert::ExpectException([&] {pComms->OpenFrameDataStore(pres.GetId()); }); + Assert::ExpectException([&] {pComms->OpenProcessDataStore(pres.GetId()); }); } // make sure we get frames over time TEST_METHOD(ReadFrames) @@ -984,8 +984,8 @@ namespace InterimBroadcasterTests client.DispatchSync(svc::acts::StartTracking::Params{ .targetPid = pres.GetId() }); // open the store - pComms->OpenFrameDataStore(pres.GetId()); - auto& frames = pComms->GetFrameDataStore(pres.GetId()).frameData; + pComms->OpenProcessDataStore(pres.GetId()); + auto& frames = pComms->GetProcessDataStore(pres.GetId()).frameData; pmon::tests::WaitForFirstFrame(frames, "realtime-read"); @@ -1028,8 +1028,8 @@ namespace InterimBroadcasterTests std::this_thread::sleep_for(1ms); client.DispatchSync(svc::acts::StartTracking::Params{ .targetPid = pres.GetId() }); - pComms->OpenFrameDataStore(pres.GetId()); - auto& ring = pComms->GetFrameDataStore(pres.GetId()).frameData; + pComms->OpenProcessDataStore(pres.GetId()); + auto& ring = pComms->GetProcessDataStore(pres.GetId()).frameData; pmon::tests::WaitForFirstFrame(ring, "rt-wrap-no-miss"); @@ -1098,8 +1098,8 @@ namespace InterimBroadcasterTests std::this_thread::sleep_for(1ms); client.DispatchSync(svc::acts::StartTracking::Params{ .targetPid = pres.GetId() }); - pComms->OpenFrameDataStore(pres.GetId()); - auto& ring = pComms->GetFrameDataStore(pres.GetId()).frameData; + pComms->OpenProcessDataStore(pres.GetId()); + auto& ring = pComms->GetProcessDataStore(pres.GetId()).frameData; auto range = ring.GetSerialRange(); for (size_t i = 0; i < 20 && range.first == 0; ++i) { @@ -1140,10 +1140,10 @@ namespace InterimBroadcasterTests client.DispatchSync(svc::acts::StartTracking::Params{ .targetPid = pid, .isPlayback = true }); // open the store - pComms->OpenFrameDataStore(pid); + pComms->OpenProcessDataStore(pid); // verify static data - auto& store = pComms->GetFrameDataStore(pid); + auto& store = pComms->GetProcessDataStore(pid); pmon::tests::WaitForFirstFrame(store.frameData, "paced-playback-static"); Assert::AreEqual(pid, store.bookkeeping.processId); const std::string staticAppName = store.statics.applicationName.c_str(); @@ -1164,8 +1164,8 @@ namespace InterimBroadcasterTests client.DispatchSync(svc::acts::StartTracking::Params{ .targetPid = pid, .isPlayback = true }); // open the store - pComms->OpenFrameDataStore(pid); - auto& frames = pComms->GetFrameDataStore(pid).frameData; + pComms->OpenProcessDataStore(pid); + auto& frames = pComms->GetProcessDataStore(pid).frameData; pmon::tests::WaitForFirstFrame(frames, "paced-playback-read"); @@ -1209,10 +1209,10 @@ namespace InterimBroadcasterTests client.DispatchSync(svc::acts::StartTracking::Params{ .targetPid = pid, .isPlayback = true }); // open the store - pComms->OpenFrameDataStore(pid); + pComms->OpenProcessDataStore(pid); // verify static data - auto& store = pComms->GetFrameDataStore(pid); + auto& store = pComms->GetProcessDataStore(pid); pmon::tests::WaitForFirstFrame(store.frameData, "backpressured-playback-static"); Assert::AreEqual(pid, store.bookkeeping.processId); const std::string staticAppName = store.statics.applicationName.c_str(); @@ -1235,8 +1235,8 @@ namespace InterimBroadcasterTests .targetPid = pid, .isPlayback = true, .isBackpressured = true }); // open the store - pComms->OpenFrameDataStore(pid); - auto& ring = pComms->GetFrameDataStore(pid).frameData; + pComms->OpenProcessDataStore(pid); + auto& ring = pComms->GetProcessDataStore(pid).frameData; pmon::tests::WaitForFirstFrame(ring, "backpressured-playback"); @@ -1331,8 +1331,8 @@ namespace InterimBroadcasterTests client.DispatchSync(svc::acts::StartTracking::Params{ .targetPid = pid, .isPlayback = true, .isBackpressured = true }); - pComms->OpenFrameDataStore(pid); - auto& ring = pComms->GetFrameDataStore(pid).frameData; + pComms->OpenProcessDataStore(pid); + auto& ring = pComms->GetProcessDataStore(pid).frameData; pmon::tests::WaitForFirstFrame(ring, "pb-wrap-backpressure"); diff --git a/IntelPresentMon/PresentMonAPI2Tests/IpcComponentTests.cpp b/IntelPresentMon/PresentMonAPI2Tests/IpcComponentTests.cpp index 30feb04eb..cd210e1a3 100644 --- a/IntelPresentMon/PresentMonAPI2Tests/IpcComponentTests.cpp +++ b/IntelPresentMon/PresentMonAPI2Tests/IpcComponentTests.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022-2025 Intel Corporation +// Copyright (C) 2022-2025 Intel Corporation // SPDX-License-Identifier: MIT #include "../CommonUtilities/win/WinAPI.h" @@ -531,7 +531,7 @@ namespace IpcComponentTests const auto segName = std::format("pm_ipc_backpressure_test_seg_{}", static_cast(::GetCurrentProcessId())); - ipc::OwnedDataSegment seg{ segName, sizing }; + ipc::OwnedDataSegment seg{ segName, sizing }; auto& ring = seg.GetStore().frameData; ipc::FrameData sample{}; diff --git a/IntelPresentMon/PresentMonAPI2Tests/MultiClientTests.cpp b/IntelPresentMon/PresentMonAPI2Tests/MultiClientTests.cpp index e9ef23516..c69d644fb 100644 --- a/IntelPresentMon/PresentMonAPI2Tests/MultiClientTests.cpp +++ b/IntelPresentMon/PresentMonAPI2Tests/MultiClientTests.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022-2023 Intel Corporation +// Copyright (C) 2022-2023 Intel Corporation // SPDX-License-Identifier: MIT #include "../CommonUtilities/win/WinAPI.h" #include "CppUnitTest.h" @@ -53,7 +53,7 @@ namespace MultiClientTests // verify initial status const auto status = fixture_.service->QueryStatus(); Assert::AreEqual(0ull, status.trackedPids.size()); - Assert::AreEqual(0ull, status.frameStorePids.size()); + Assert::AreEqual(0ull, status.processStorePids.size()); Assert::AreEqual(16u, status.telemetryPeriodMs); Assert::IsFalse((bool)status.etwFlushPeriodMs); } @@ -488,7 +488,7 @@ namespace MultiClientTests { const auto status = fixture_.service->QueryStatus(); Assert::AreEqual(1ull, status.trackedPids.size()); - Assert::AreEqual(1ull, status.frameStorePids.size()); + Assert::AreEqual(1ull, status.processStorePids.size()); } // one client quits client1.Quit(); @@ -496,7 +496,7 @@ namespace MultiClientTests { const auto status = fixture_.service->QueryStatus(); Assert::AreEqual(1ull, status.trackedPids.size()); - Assert::AreEqual(1ull, status.frameStorePids.size()); + Assert::AreEqual(1ull, status.processStorePids.size()); } // other client quits client2.Quit(); @@ -504,7 +504,7 @@ namespace MultiClientTests { const auto status = fixture_.service->QueryStatus(); Assert::AreEqual(0ull, status.trackedPids.size()); - Assert::AreEqual(0ull, status.frameStorePids.size()); + Assert::AreEqual(0ull, status.processStorePids.size()); } } // verify process untrack (stream stop) when clients die suddenly @@ -524,7 +524,7 @@ namespace MultiClientTests { const auto status = fixture_.service->QueryStatus(); Assert::AreEqual(1ull, status.trackedPids.size()); - Assert::AreEqual(1ull, status.frameStorePids.size()); + Assert::AreEqual(1ull, status.processStorePids.size()); } // one client dies client1.Murder(); @@ -533,7 +533,7 @@ namespace MultiClientTests { const auto status = fixture_.service->QueryStatus(); Assert::AreEqual(1ull, status.trackedPids.size()); - Assert::AreEqual(1ull, status.frameStorePids.size()); + Assert::AreEqual(1ull, status.processStorePids.size()); } // other client dies client2.Murder(); @@ -542,7 +542,7 @@ namespace MultiClientTests { const auto status = fixture_.service->QueryStatus(); Assert::AreEqual(0ull, status.trackedPids.size()); - Assert::AreEqual(0ull, status.frameStorePids.size()); + Assert::AreEqual(0ull, status.processStorePids.size()); } } // test a large number of clients running diff --git a/IntelPresentMon/PresentMonAPI2Tests/TestCommands.h b/IntelPresentMon/PresentMonAPI2Tests/TestCommands.h index e7fe932c1..43a244be9 100644 --- a/IntelPresentMon/PresentMonAPI2Tests/TestCommands.h +++ b/IntelPresentMon/PresentMonAPI2Tests/TestCommands.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include #include @@ -15,14 +15,14 @@ namespace pmon::test { // new ipc tracking std::set trackedPids; - std::set frameStorePids; + std::set processStorePids; uint32_t telemetryPeriodMs; std::optional etwFlushPeriodMs; template void serialize(Archive& ar) { - ar(trackedPids, frameStorePids, telemetryPeriodMs, etwFlushPeriodMs); + ar(trackedPids, processStorePids, telemetryPeriodMs, etwFlushPeriodMs); } }; } diff --git a/IntelPresentMon/PresentMonMiddleware/DynamicQuery.cpp b/IntelPresentMon/PresentMonMiddleware/DynamicQuery.cpp index 791d42857..02a5ea933 100644 --- a/IntelPresentMon/PresentMonMiddleware/DynamicQuery.cpp +++ b/IntelPresentMon/PresentMonMiddleware/DynamicQuery.cpp @@ -1,6 +1,8 @@ -#include "DynamicQuery.h" +#include "DynamicQuery.h" #include "FrameMetricsSource.h" #include "QueryValidation.h" +#include "ProcessDataRate.h" +#include "../CommonUtilities/mc/FrameMetricsMemberMap.h" #include "../PresentMonAPIWrapperCommon/Introspection.h" #include "../Interprocess/source/SystemDeviceId.h" #include "../Interprocess/source/Interprocess.h" @@ -100,9 +102,19 @@ static bool IsFrameTimeOrFpsMetric_(PM_METRIC metric) } } +static bool HasFrameMetricBinding_(PM_METRIC metric) +{ + return util::DispatchEnumValue( + metric, + [&]() -> bool { + return util::metrics::HasFrameMetricMember; + }, + false); +} + static uint64_t GetTargetStartQpc_(ipc::MiddlewareComms& comms, uint32_t processId) { - return uint64_t(processId ? comms.GetFrameDataStore(processId).bookkeeping.startQpc : 0); + return uint64_t(processId ? comms.GetProcessDataStore(processId).bookkeeping.startQpc : 0); } static std::string BuildElapsedSinceTargetStartText_(uint64_t targetStartQpc, uint64_t nowTimestamp, double qpcPeriodSeconds) @@ -135,6 +147,7 @@ PM_DYNAMIC_QUERY::PM_DYNAMIC_QUERY(std::span qels, double wind std::unordered_map telemetryBindings; MetricBinding* frameBinding = nullptr; + MetricBinding* processDataBinding = nullptr; size_t blobCursor = 0; for (auto& qel : qels) { @@ -142,13 +155,24 @@ PM_DYNAMIC_QUERY::PM_DYNAMIC_QUERY(std::span qels, double wind const auto metricView = introRoot.FindMetric(qel.metric); const auto metricType = metricView.GetType(); const bool isStaticMetric = metricType == PM_METRIC_TYPE_STATIC; - const bool isFrameMetric = !isStaticMetric && qel.deviceId == ipc::kUniversalDeviceId; + const bool isProcessDataMetric = IsProcessDataMetric(qel.metric); + const bool isFrameMetric = !isStaticMetric && !isProcessDataMetric && + qel.deviceId == ipc::kUniversalDeviceId && HasFrameMetricBinding_(qel.metric); if (isStaticMetric) { auto bindingPtr = MakeStaticMetricBinding(qel, middleware); binding = bindingPtr.get(); ringMetricPtrs_.push_back(std::move(bindingPtr)); } - else if (qel.deviceId == ipc::kUniversalDeviceId) { + else if (isProcessDataMetric) { + binding = processDataBinding; + if (!binding) { + auto bindingPtr = MakeProcessDataMetricBinding(qel, qpcPeriodSeconds_); + binding = bindingPtr.get(); + processDataBinding = binding; + ringMetricPtrs_.push_back(std::move(bindingPtr)); + } + } + else if (isFrameMetric) { binding = frameBinding; if (!binding) { auto bindingPtr = MakeFrameMetricBinding(qel); diff --git a/IntelPresentMon/PresentMonMiddleware/FrameMetricsSource.cpp b/IntelPresentMon/PresentMonMiddleware/FrameMetricsSource.cpp index fe8261873..8509aeaf2 100644 --- a/IntelPresentMon/PresentMonMiddleware/FrameMetricsSource.cpp +++ b/IntelPresentMon/PresentMonMiddleware/FrameMetricsSource.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2025 Intel Corporation +// Copyright (C) 2025 Intel Corporation #include "FrameMetricsSource.h" #include @@ -182,8 +182,8 @@ namespace pmon::mid progressCallback_{ std::move(progressCallback) } { // open the data store from ipc - comms_.OpenFrameDataStore(processId_); - pStore_ = &comms_.GetFrameDataStore(processId_); + comms_.OpenProcessDataStore(processId_); + pStore_ = &comms_.GetProcessDataStore(processId_); const auto range = pStore_->frameData.GetSerialRange(); nextFrameSerial_ = range.first; } @@ -195,7 +195,7 @@ namespace pmon::mid if (pStore_ == nullptr) { return; } - comms_.CloseFrameDataStore(processId_); + comms_.CloseProcessDataStore(processId_); pStore_ = nullptr; swapChains_.clear(); } diff --git a/IntelPresentMon/PresentMonMiddleware/FrameMetricsSource.h b/IntelPresentMon/PresentMonMiddleware/FrameMetricsSource.h index 7e57dfa24..4e98a75ed 100644 --- a/IntelPresentMon/PresentMonMiddleware/FrameMetricsSource.h +++ b/IntelPresentMon/PresentMonMiddleware/FrameMetricsSource.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include #include @@ -118,7 +118,7 @@ namespace pmon::mid void ProcessNewFrames_(); ipc::MiddlewareComms& comms_; - const ipc::FrameDataStore* pStore_ = nullptr; + const ipc::ProcessDataStore* pStore_ = nullptr; uint32_t processId_ = 0; size_t perSwapChainCapacity_ = 0; size_t nextFrameSerial_ = 0; diff --git a/IntelPresentMon/PresentMonMiddleware/MetricBinding.cpp b/IntelPresentMon/PresentMonMiddleware/MetricBinding.cpp index f8b057c17..615e66b8e 100644 --- a/IntelPresentMon/PresentMonMiddleware/MetricBinding.cpp +++ b/IntelPresentMon/PresentMonMiddleware/MetricBinding.cpp @@ -1,13 +1,16 @@ -#include "../CommonUtilities/win/WinAPI.h" +#include "../CommonUtilities/win/WinAPI.h" #include "MetricBinding.h" #include "FrameMetricsSource.h" #include "Middleware.h" +#include "ProcessDataRate.h" #include "../Interprocess/source/Interprocess.h" #include "../Interprocess/source/IntrospectionHelpers.h" #include "../Interprocess/source/IntrospectionDataTypeMapping.h" #include "../Interprocess/source/SystemDeviceId.h" #include "../CommonUtilities/Memory.h" +#include "../CommonUtilities/mc/FrameMetricsMemberMap.h" #include +#include #include namespace pmon::mid @@ -324,6 +327,97 @@ namespace pmon::mid bool needsConversion_ = false; }; + class ProcessDataMetricBinding_ : public MetricBinding + { + public: + explicit ProcessDataMetricBinding_(double qpcPeriodSeconds) + : + qpcPeriodSeconds_{ qpcPeriodSeconds } + { + } + + void Poll(const DynamicQueryWindow& window, uint8_t* pBlobBase, ipc::MiddlewareComms& comms, + const SwapChainState* pSwapChain, uint32_t processId) const override + { + (void)pSwapChain; + (void)window; + (void)comms; + + if (processId == 0) { + for (const auto& avgOffsetEntry : avgOffsets_) { + *reinterpret_cast(pBlobBase + avgOffsetEntry.second) = 0.; + } + return; + } + + const ipc::ProcessDataHistoryRing& ring = comms.GetProcessDataStore(processId).processData; + uint64_t compileCount = 0; + double compileDurationMsSum = 0.; + std::vector busyIntervals; + const bool needBusyPercent = avgOffsets_.find(PM_METRIC_D3D12_PSO_COMPILE_BUSY_PERCENT) != avgOffsets_.end(); + if (needBusyPercent) { + busyIntervals.reserve(ring.Size()); + } + const auto serialRange = ring.GetSerialRange(); + for (size_t serial = serialRange.first; serial < serialRange.second; ++serial) { + const ipc::ProcessDataSample& sample = ring.At(serial); + const uint64_t endQpc = sample.eventCompleteQpc; + const uint64_t durationQpc = PsoCompileDurationMsToQpc( + sample.psoCompileDurationMs, qpcPeriodSeconds_); + const uint64_t startQpc = endQpc >= durationQpc ? endQpc - durationQpc : 0; + if (startQpc >= window.oldest && startQpc <= window.newest) { + ++compileCount; + } + uint64_t clipStart = 0; + uint64_t clipEnd = 0; + if (PsoCompileClipToWindow(startQpc, endQpc, window.oldest, window.newest, clipStart, clipEnd)) { + compileDurationMsSum += PsoCompileQpcToDurationMs(clipEnd - clipStart, qpcPeriodSeconds_); + if (needBusyPercent) { + busyIntervals.push_back(PsoCompileQpcInterval{ clipStart, clipEnd }); + } + } + } + + const uint64_t windowQpc = window.newest >= window.oldest ? + window.newest - window.oldest : 0; + const uint64_t mergedBusyQpc = needBusyPercent ? MergePsoCompileBusyQpc(std::move(busyIntervals)) : 0; + + for (const auto& [metricId, dataOffset] : avgOffsets_) { + double value = 0.; + if (metricId == PM_METRIC_D3D12_PSO_COMPILE_COUNT) { + value = PsoCompileCountRate(compileCount, windowQpc, qpcPeriodSeconds_); + } + else if (metricId == PM_METRIC_D3D12_PSO_COMPILE_TIME) { + value = PsoCompileTimeRateMsPerSecond(compileDurationMsSum, windowQpc, qpcPeriodSeconds_); + } + else if (metricId == PM_METRIC_D3D12_PSO_COMPILE_BUSY_PERCENT) { + value = PsoCompileBusyPercent(mergedBusyQpc, windowQpc); + } + *reinterpret_cast(pBlobBase + dataOffset) = value; + } + } + + void Finalize() override + { + } + + void AddMetricStat(PM_QUERY_ELEMENT& qel, const pmapi::intro::Root& intro) override + { + (void)intro; + if (qel.stat != PM_STAT_AVG) { + throw util::Except(PM_STATUS_QUERY_MALFORMED, + "D3D12 PSO compile metrics only support PM_STAT_AVG."); + } + qel.dataSize = sizeof(double); + qel.dataOffset = (uint64_t)util::PadToAlignment((size_t)qel.dataOffset, sizeof(double)); + avgOffsets_[qel.metric] = qel.dataOffset; + } + + private: + double qpcPeriodSeconds_; + std::unordered_map avgOffsets_; + }; + template struct TelemetryBindingBridger_ { @@ -365,4 +459,10 @@ namespace pmon::mid { return std::make_unique(middleware, qel); } + + std::unique_ptr MakeProcessDataMetricBinding(PM_QUERY_ELEMENT& qel, double qpcPeriodSeconds) + { + (void)qel; + return std::make_unique(qpcPeriodSeconds); + } } diff --git a/IntelPresentMon/PresentMonMiddleware/MetricBinding.h b/IntelPresentMon/PresentMonMiddleware/MetricBinding.h index 4e9d1b8ae..c8f99b950 100644 --- a/IntelPresentMon/PresentMonMiddleware/MetricBinding.h +++ b/IntelPresentMon/PresentMonMiddleware/MetricBinding.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include #include @@ -40,4 +40,5 @@ namespace pmon::mid std::unique_ptr MakeFrameMetricBinding(PM_QUERY_ELEMENT& qel); std::unique_ptr MakeTelemetryMetricBinding(PM_QUERY_ELEMENT& qel, const pmapi::intro::Root& intro); std::unique_ptr MakeStaticMetricBinding(PM_QUERY_ELEMENT& qel, Middleware& middleware); + std::unique_ptr MakeProcessDataMetricBinding(PM_QUERY_ELEMENT& qel, double qpcPeriodSeconds); } diff --git a/IntelPresentMon/PresentMonMiddleware/Middleware.cpp b/IntelPresentMon/PresentMonMiddleware/Middleware.cpp index f6e1598c8..8b825ce74 100644 --- a/IntelPresentMon/PresentMonMiddleware/Middleware.cpp +++ b/IntelPresentMon/PresentMonMiddleware/Middleware.cpp @@ -295,7 +295,7 @@ namespace pmon::mid return pComms_->GetSystemDataStore().FindStaticMetric(element.metric); } if (element.deviceId == ipc::kUniversalDeviceId) { - return pComms_->GetFrameDataStore(processId).FindStaticMetric(element.metric); + return pComms_->GetProcessDataStore(processId).FindStaticMetric(element.metric); } return pComms_->GetGpuDataStore(element.deviceId).FindStaticMetric(element.metric); }(); diff --git a/IntelPresentMon/PresentMonMiddleware/ProcessDataRate.h b/IntelPresentMon/PresentMonMiddleware/ProcessDataRate.h new file mode 100644 index 000000000..95a910940 --- /dev/null +++ b/IntelPresentMon/PresentMonMiddleware/ProcessDataRate.h @@ -0,0 +1,114 @@ +// Copyright (C) 2026 Intel Corporation +// SPDX-License-Identifier: MIT +#pragma once +#include +#include +#include +#include +#include "../PresentMonAPI2/PresentMonAPI.h" + +namespace pmon::mid +{ + struct PsoCompileQpcInterval + { + uint64_t startQpc = 0; + uint64_t endQpc = 0; + }; + + inline double PsoCompileWindowSeconds(uint64_t windowQpc, double qpcPeriodSeconds) + { + if (windowQpc == 0 || qpcPeriodSeconds <= 0.) { + return 0.; + } + return double(windowQpc) * qpcPeriodSeconds; + } + + inline uint64_t PsoCompileDurationMsToQpc(double durationMs, double qpcPeriodSeconds) + { + if (durationMs <= 0. || qpcPeriodSeconds <= 0.) { + return 0; + } + return (uint64_t)(durationMs / (qpcPeriodSeconds * 1000.0)); + } + + inline double PsoCompileQpcToDurationMs(uint64_t durationQpc, double qpcPeriodSeconds) + { + if (durationQpc == 0 || qpcPeriodSeconds <= 0.) { + return 0.; + } + return double(durationQpc) * qpcPeriodSeconds * 1000.0; + } + + inline bool PsoCompileClipToWindow(uint64_t startQpc, uint64_t endQpc, uint64_t windowOldest, uint64_t windowNewest, + uint64_t& clipStart, uint64_t& clipEnd) + { + if (endQpc <= windowOldest || startQpc >= windowNewest) { + return false; + } + clipStart = std::max(startQpc, windowOldest); + clipEnd = std::min(endQpc, windowNewest); + return clipEnd > clipStart; + } + + inline double PsoCompileCountRate(uint64_t compileCount, uint64_t windowQpc, double qpcPeriodSeconds) + { + const double windowSeconds = PsoCompileWindowSeconds(windowQpc, qpcPeriodSeconds); + if (windowSeconds <= 0.) { + return 0.; + } + return double(compileCount) / windowSeconds; + } + + inline double PsoCompileTimeRateMsPerSecond(double compileDurationMsSum, uint64_t windowQpc, double qpcPeriodSeconds) + { + const double windowSeconds = PsoCompileWindowSeconds(windowQpc, qpcPeriodSeconds); + if (windowSeconds <= 0.) { + return 0.; + } + return compileDurationMsSum / windowSeconds; + } + + inline uint64_t MergePsoCompileBusyQpc(std::vector intervals) + { + if (intervals.empty()) { + return 0; + } + std::sort(intervals.begin(), intervals.end(), [](const PsoCompileQpcInterval& a, const PsoCompileQpcInterval& b) { + return a.startQpc < b.startQpc; + }); + uint64_t mergedBusyQpc = 0; + uint64_t curStart = intervals.front().startQpc; + uint64_t curEnd = intervals.front().endQpc; + for (size_t i = 1; i < intervals.size(); ++i) { + const auto& iv = intervals[i]; + if (iv.startQpc <= curEnd) { + if (iv.endQpc > curEnd) { + curEnd = iv.endQpc; + } + } + else { + mergedBusyQpc += curEnd - curStart; + curStart = iv.startQpc; + curEnd = iv.endQpc; + } + } + mergedBusyQpc += curEnd - curStart; + return mergedBusyQpc; + } + + inline double PsoCompileBusyPercent(uint64_t mergedBusyQpc, uint64_t windowQpc) + { + if (windowQpc == 0) { + return 0.; + } + return 100. * double(mergedBusyQpc) / double(windowQpc); + } + + // Metrics backed by ProcessDataStore::processData; extend as new process ETW metrics are added. + inline bool IsProcessDataMetric(PM_METRIC metric) + { + return metric == PM_METRIC_D3D12_PSO_COMPILE_COUNT || + metric == PM_METRIC_D3D12_PSO_COMPILE_TIME || + metric == PM_METRIC_D3D12_PSO_COMPILE_BUSY_PERCENT; + } +} diff --git a/IntelPresentMon/PresentMonService/ActionExecutionContext.cpp b/IntelPresentMon/PresentMonService/ActionExecutionContext.cpp index a04ff45fc..ca8a687e7 100644 --- a/IntelPresentMon/PresentMonService/ActionExecutionContext.cpp +++ b/IntelPresentMon/PresentMonService/ActionExecutionContext.cpp @@ -1,4 +1,4 @@ -#include "ActionExecutionContext.h" +#include "ActionExecutionContext.h" #include #include #include "../Interprocess/source/act/ActionHelper.h" diff --git a/IntelPresentMon/PresentMonService/FrameBroadcaster.h b/IntelPresentMon/PresentMonService/FrameBroadcaster.h index 811c041bc..cf4a49bd6 100644 --- a/IntelPresentMon/PresentMonService/FrameBroadcaster.h +++ b/IntelPresentMon/PresentMonService/FrameBroadcaster.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "../Interprocess/source/Interprocess.h" #include "../../PresentData/PresentMonTraceConsumer.hpp" #include "../CommonUtilities/win/Utilities.h" @@ -17,12 +17,12 @@ namespace pmon::svc class FrameBroadcaster { public: - using Segment = ipc::OwnedDataSegment; + using Segment = ipc::OwnedDataSegment; FrameBroadcaster(ipc::ServiceComms& comms) : comms_{ comms } {} std::shared_ptr RegisterTarget(uint32_t pid, bool isPlayback, bool isBackpressured) { std::lock_guard lk{ mtx_ }; - auto pSegment = comms_.CreateOrGetFrameDataSegment(pid, isBackpressured); + auto pSegment = comms_.CreateOrGetProcessDataSegment(pid, isBackpressured); auto& store = pSegment->GetStore(); auto& book = store.bookkeeping; // init bookkeeping only once and only here @@ -62,12 +62,27 @@ namespace pmon::svc std::shared_ptr pSegment; { std::lock_guard lk{ mtx_ }; - pSegment = comms_.GetFrameDataSegment(present.ProcessId); + pSegment = comms_.GetProcessDataSegment(present.ProcessId); } if (pSegment) { pSegment->GetStore().frameData.Push(FrameData::CopyFrameData(present)); } } + void BroadcastProcessDataSample(uint32_t processId, double psoCompileDurationMs, uint64_t eventCompleteQpc, std::optional timeoutMs = {}) + { + (void)timeoutMs; + std::shared_ptr pSegment; + { + std::lock_guard lk{ mtx_ }; + pSegment = comms_.GetProcessDataSegment(processId); + } + if (pSegment) { + ipc::ProcessDataSample sample{}; + sample.psoCompileDurationMs = psoCompileDurationMs; + sample.eventCompleteQpc = eventCompleteQpc; + pSegment->GetStore().processData.Push(sample); + } + } // Update the single consumer cursor for a backpressured playback ring. Playback // backpressure is SPSC: one producer in the service and one owning client reader. void UpdateReadSerial(uint32_t pid, uint64_t effectiveSerial) @@ -75,7 +90,7 @@ namespace pmon::svc std::shared_ptr pSegment; { std::lock_guard lk{ mtx_ }; - pSegment = comms_.GetFrameDataSegment(pid); + pSegment = comms_.GetProcessDataSegment(pid); } if (pSegment) { pSegment->GetStore().frameData.SetNextRead(effectiveSerial); @@ -87,7 +102,7 @@ namespace pmon::svc std::shared_ptr pSegment; { std::lock_guard lk{ mtx_ }; - pSegment = comms_.GetFrameDataSegment(pid); + pSegment = comms_.GetProcessDataSegment(pid); } if (pSegment) { return pSegment->GetStore().frameData.GetSerialRange().second; @@ -97,7 +112,7 @@ namespace pmon::svc void HandleTargetProcessEvent(const ProcessEvent& targetProcessEvent) { std::lock_guard lk{ mtx_ }; - if (auto pSegment = comms_.GetFrameDataSegment(targetProcessEvent.ProcessId)) { + if (auto pSegment = comms_.GetProcessDataSegment(targetProcessEvent.ProcessId)) { auto& store = pSegment->GetStore(); if (!store.bookkeeping.staticInitComplete && store.bookkeeping.isPlayback) { store.bookkeeping.staticInitComplete = true; @@ -107,10 +122,10 @@ namespace pmon::svc } } - std::vector GetPids() const + std::vector GetProcessDataPids() const { std::lock_guard lk{ mtx_ }; - return comms_.GetFramePids(); + return comms_.GetProcessDataPids(); } const ipc::ShmNamer& GetNamer() const { @@ -127,9 +142,9 @@ namespace pmon::svc std::lock_guard lk{ mtx_ }; // publish qpc before taking the lock so future stores created in the gap observe it - for (auto pid : comms_.GetFramePids()) { + for (auto pid : comms_.GetProcessDataPids()) { try { - auto pSeg = comms_.GetFrameDataSegment(pid); + auto pSeg = comms_.GetProcessDataSegment(pid); pSeg->GetStore().bookkeeping.startQpc = startQpc; } catch (...) { diff --git a/IntelPresentMon/PresentMonService/MockPresentMonSession.cpp b/IntelPresentMon/PresentMonService/MockPresentMonSession.cpp index 4fffa5f0e..e0c665112 100644 --- a/IntelPresentMon/PresentMonService/MockPresentMonSession.cpp +++ b/IntelPresentMon/PresentMonService/MockPresentMonSession.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022-2023 Intel Corporation +// Copyright (C) 2022-2023 Intel Corporation // SPDX-License-Identifier: MIT #include "MockPresentMonSession.h" #include "CliOptions.h" @@ -321,6 +321,17 @@ void MockPresentMonSession::ProcessEvents( // Copy any analyzed information from ConsumerThread and early-out if there // isn't any. DequeueAnalyzedInfo(processEvents, presentEvents); + if (pm_consumer_ && pBroadcaster) { + std::vector psoCompileEvents; + pm_consumer_->DequeuePsoCompileEvents(psoCompileEvents); + for (const auto& compileEvent : psoCompileEvents) { + if (!IsProcessTracked(compileEvent.ProcessId)) { + continue; + } + const double durationMs = trace_session_.TimestampDeltaToMilliSeconds(compileEvent.DurationQpc); + pBroadcaster->BroadcastProcessDataSample(compileEvent.ProcessId, durationMs, compileEvent.CompileCompleteQpc); + } + } if (processEvents->empty() && presentEvents->empty()) { return; } diff --git a/IntelPresentMon/PresentMonService/PresentMon.cpp b/IntelPresentMon/PresentMonService/PresentMon.cpp index 235e1a3f9..f8c4c2ead 100644 --- a/IntelPresentMon/PresentMonService/PresentMon.cpp +++ b/IntelPresentMon/PresentMonService/PresentMon.cpp @@ -1,7 +1,8 @@ -// Copyright (C) 2022-2023 Intel Corporation +// Copyright (C) 2022-2023 Intel Corporation // SPDX-License-Identifier: MIT #include "PresentMon.h" +#include "../PresentMonAPI2/PresentMonAPI.h" #include "CliOptions.h" #include "Logging.h" #include "..\CommonUtilities\str\String.h" diff --git a/IntelPresentMon/PresentMonService/PresentMon.h b/IntelPresentMon/PresentMonService/PresentMon.h index 21c4473c1..8ba12845e 100644 --- a/IntelPresentMon/PresentMonService/PresentMon.h +++ b/IntelPresentMon/PresentMonService/PresentMon.h @@ -1,10 +1,11 @@ -// Copyright (C) 2022-2023 Intel Corporation +// Copyright (C) 2022-2023 Intel Corporation // SPDX-License-Identifier: MIT #pragma once #include "PresentMonSession.h" #include "EtwLogger.h" #include "FrameBroadcaster.h" #include "MetricUse.h" +#include "../PresentMonAPI2/PresentMonAPI.h" #include "../CommonUtilities/win/Event.h" #include #include @@ -36,7 +37,7 @@ class PresentMon void CheckTraceSessions(); // Force stop trace sessions void StopTraceSessions(); - PM_STATUS UpdateTracking(const std::unordered_set& trackedPids); + PM_STATUS UpdateTracking(const std::unordered_set& trackedPids); PM_STATUS SetGpuTelemetryPeriod(std::optional telemetryPeriodRequestsMs) { return pSession_->SetGpuTelemetryPeriod(telemetryPeriodRequestsMs); diff --git a/IntelPresentMon/PresentMonService/PresentMonSession.cpp b/IntelPresentMon/PresentMonService/PresentMonSession.cpp index abbfeaa8a..993a7fbc1 100644 --- a/IntelPresentMon/PresentMonService/PresentMonSession.cpp +++ b/IntelPresentMon/PresentMonService/PresentMonSession.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022-2023 Intel Corporation +// Copyright (C) 2022-2023 Intel Corporation // SPDX-License-Identifier: MIT #include "PresentMonSession.h" #include @@ -12,15 +12,15 @@ pmon::test::service::Status PresentMonSession::GetTestingStatus() const trackedPids.emplace(entry.first); } } - std::set frameStorePids; + std::set processStorePids; if (pBroadcaster) { - for (auto pid : pBroadcaster->GetPids()) { - frameStorePids.emplace(pid); + for (auto pid : pBroadcaster->GetProcessDataPids()) { + processStorePids.emplace(pid); } } return pmon::test::service::Status{ .trackedPids = std::move(trackedPids), - .frameStorePids = std::move(frameStorePids), + .processStorePids = std::move(processStorePids), .telemetryPeriodMs = gpu_telemetry_period_ms_, .etwFlushPeriodMs = etw_flush_period_ms_, }; diff --git a/IntelPresentMon/PresentMonService/RealtimePresentMonSession.cpp b/IntelPresentMon/PresentMonService/RealtimePresentMonSession.cpp index 213d6b372..c8a022676 100644 --- a/IntelPresentMon/PresentMonService/RealtimePresentMonSession.cpp +++ b/IntelPresentMon/PresentMonService/RealtimePresentMonSession.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022-2023 Intel Corporation +// Copyright (C) 2022-2023 Intel Corporation // SPDX-License-Identifier: MIT #include "Logging.h" #include "RealtimePresentMonSession.h" @@ -266,6 +266,27 @@ void RealtimePresentMonSession::DequeueAnalyzedInfo( } } +void RealtimePresentMonSession::AddPsoCompileEvents() +{ + if (!session_active_.load(std::memory_order_acquire) || !pm_consumer_ || !pBroadcaster) { + return; + } + + std::vector psoCompileEvents; + pm_consumer_->DequeuePsoCompileEvents(psoCompileEvents); + if (psoCompileEvents.empty()) { + return; + } + + for (const auto& compileEvent : psoCompileEvents) { + if (!IsProcessTracked(compileEvent.ProcessId)) { + continue; + } + const double durationMs = trace_session_.TimestampDeltaToMilliSeconds(compileEvent.DurationQpc); + pBroadcaster->BroadcastProcessDataSample(compileEvent.ProcessId, durationMs, compileEvent.CompileCompleteQpc); + } +} + void RealtimePresentMonSession::AddPresents( std::vector> const& presentEvents, size_t* presentEventIndex, bool recording, bool checkStopQpc, @@ -434,6 +455,7 @@ void RealtimePresentMonSession::ProcessEvents( // Copy any analyzed information from ConsumerThread and early-out if there // isn't any. DequeueAnalyzedInfo(processEvents, presentEvents); + AddPsoCompileEvents(); if (processEvents->empty() && presentEvents->empty()) { return; } diff --git a/IntelPresentMon/PresentMonService/RealtimePresentMonSession.h b/IntelPresentMon/PresentMonService/RealtimePresentMonSession.h index f268407a6..8b2503bcc 100644 --- a/IntelPresentMon/PresentMonService/RealtimePresentMonSession.h +++ b/IntelPresentMon/PresentMonService/RealtimePresentMonSession.h @@ -1,4 +1,4 @@ -// Copyright (C) 2022-2023 Intel Corporation +// Copyright (C) 2022-2023 Intel Corporation // SPDX-License-Identifier: MIT #pragma once #include "PresentMonSession.h" @@ -41,6 +41,7 @@ class RealtimePresentMonSession : public PresentMonSession std::vector* processEvents, std::vector>* presentEvents, std::vector>* terminatedProcesses); + void AddPsoCompileEvents(); void StartOutputThread(); void StopOutputThread(); diff --git a/IntelPresentMon/UnitTests/ProcessDataRateTests.cpp b/IntelPresentMon/UnitTests/ProcessDataRateTests.cpp new file mode 100644 index 000000000..679e8ec38 --- /dev/null +++ b/IntelPresentMon/UnitTests/ProcessDataRateTests.cpp @@ -0,0 +1,106 @@ +// Copyright (C) 2026 Intel Corporation +// SPDX-License-Identifier: MIT +#include "../PresentMonMiddleware/ProcessDataRate.h" +#include "CppUnitTest.h" +#include + +using namespace Microsoft::VisualStudio::CppUnitTestFramework; + +namespace ProcessDataRateTests +{ + TEST_CLASS(ProcessDataRateTests) + { + public: + TEST_METHOD(CountRateUsesWindowDuration) + { + const double qpcPeriod = 1.0 / 10'000'000.0; + const uint64_t windowQpc = 10'000'000; + Assert::AreEqual(2.0, pmon::mid::PsoCompileCountRate(2, windowQpc, qpcPeriod), 0.0001); + } + + TEST_METHOD(TimeRateSumsCompileMillisecondsPerSecond) + { + const double qpcPeriod = 1.0 / 10'000'000.0; + const uint64_t windowQpc = 10'000'000; + Assert::AreEqual(50.0, pmon::mid::PsoCompileTimeRateMsPerSecond(50.0, windowQpc, qpcPeriod), 0.0001); + } + + TEST_METHOD(ZeroWindowReturnsZeroRates) + { + Assert::AreEqual(0.0, pmon::mid::PsoCompileCountRate(5, 0, 1.0), 0.0001); + Assert::AreEqual(0.0, pmon::mid::PsoCompileTimeRateMsPerSecond(25.0, 0, 1.0), 0.0001); + Assert::AreEqual(0.0, pmon::mid::PsoCompileBusyPercent(100, 0), 0.0001); + } + + TEST_METHOD(BusyPercentMergesNonOverlappingIntervals) + { + const uint64_t windowQpc = 1000; + std::vector intervals{ + { 0, 200 }, + { 400, 600 }, + }; + const uint64_t merged = pmon::mid::MergePsoCompileBusyQpc(std::move(intervals)); + Assert::AreEqual(400ull, merged); + Assert::AreEqual(40.0, pmon::mid::PsoCompileBusyPercent(merged, windowQpc), 0.0001); + } + + TEST_METHOD(BusyPercentMergesOverlappingIntervals) + { + const uint64_t windowQpc = 1000; + std::vector intervals{ + { 100, 600 }, + { 400, 900 }, + }; + const uint64_t merged = pmon::mid::MergePsoCompileBusyQpc(std::move(intervals)); + Assert::AreEqual(800ull, merged); + Assert::AreEqual(80.0, pmon::mid::PsoCompileBusyPercent(merged, windowQpc), 0.0001); + } + + TEST_METHOD(BusyPercentIdenticalOverlapCountsOnce) + { + const uint64_t windowQpc = 1000; + std::vector intervals{ + { 250, 750 }, + { 250, 750 }, + }; + const uint64_t merged = pmon::mid::MergePsoCompileBusyQpc(std::move(intervals)); + Assert::AreEqual(500ull, merged); + Assert::AreEqual(50.0, pmon::mid::PsoCompileBusyPercent(merged, windowQpc), 0.0001); + } + + TEST_METHOD(BusyPercentUsesClippedIntervalLength) + { + std::vector intervals{ + { 0, 500 }, + }; + const uint64_t merged = pmon::mid::MergePsoCompileBusyQpc(std::move(intervals)); + Assert::AreEqual(500ull, merged); + Assert::AreEqual(50.0, pmon::mid::PsoCompileBusyPercent(merged, 1000), 0.0001); + } + + TEST_METHOD(ClipToWindowIncludesPartialOverlap) + { + uint64_t clipStart = 0; + uint64_t clipEnd = 0; + const bool clipped = pmon::mid::PsoCompileClipToWindow(50, 250, 100, 200, clipStart, clipEnd); + Assert::IsTrue(clipped); + Assert::AreEqual(100ull, clipStart); + Assert::AreEqual(200ull, clipEnd); + } + + TEST_METHOD(ClipToWindowRejectsNonOverlap) + { + uint64_t clipStart = 0; + uint64_t clipEnd = 0; + Assert::IsFalse(pmon::mid::PsoCompileClipToWindow(0, 50, 100, 200, clipStart, clipEnd)); + Assert::IsFalse(pmon::mid::PsoCompileClipToWindow(250, 300, 100, 200, clipStart, clipEnd)); + } + + TEST_METHOD(QpcToDurationMsInvertsDurationMsToQpc) + { + const double qpcPeriod = 1.0 / 10'000'000.0; + const uint64_t qpc = pmon::mid::PsoCompileDurationMsToQpc(50.0, qpcPeriod); + Assert::AreEqual(50.0, pmon::mid::PsoCompileQpcToDurationMs(qpc, qpcPeriod), 0.0001); + } + }; +} diff --git a/IntelPresentMon/UnitTests/UnitTests.vcxproj b/IntelPresentMon/UnitTests/UnitTests.vcxproj index 4e9499ad0..cac19a377 100644 --- a/IntelPresentMon/UnitTests/UnitTests.vcxproj +++ b/IntelPresentMon/UnitTests/UnitTests.vcxproj @@ -104,6 +104,7 @@ + diff --git a/IntelPresentMon/UnitTests/UnitTests.vcxproj.filters b/IntelPresentMon/UnitTests/UnitTests.vcxproj.filters index c01e1008f..fff201a90 100644 --- a/IntelPresentMon/UnitTests/UnitTests.vcxproj.filters +++ b/IntelPresentMon/UnitTests/UnitTests.vcxproj.filters @@ -6,6 +6,7 @@ + diff --git a/IntelPresentMon/metrics.csv b/IntelPresentMon/metrics.csv index 3ee493e1b..fe9041ec3 100644 --- a/IntelPresentMon/metrics.csv +++ b/IntelPresentMon/metrics.csv @@ -82,6 +82,9 @@ PM_METRIC_CPU_TEMPERATURE,1,CPU Temperature,Average temperature across all physi PM_METRIC_CPU_CORE_TEMPERATURE,1,CPU Core Temperature,Temperature of each physical CPU core. PM_METRIC_CPU_FREQUENCY,1,CPU Frequency,Clock speed of the CPU. PM_METRIC_CPU_CORE_UTILITY,1,CPU Core Utility,Amount of CPU processing utility being used per core. +PM_METRIC_D3D12_PSO_COMPILE_COUNT,1,D3D12 PSO Compile Count,D3D12 PSO compilation events started every second. +PM_METRIC_D3D12_PSO_COMPILE_TIME,1,D3D12 PSO Compile Time,D3D12 PSO compile time in milliseconds per second (across all CPUs). +PM_METRIC_D3D12_PSO_COMPILE_BUSY_PERCENT,1,D3D12 PSO Compile Busy,Percent of time with D3D12 PSO compilation taking place. ,,, PM_METRIC_INSTRUMENTED_LATENCY,,Instrumented Latency,How long it took from the instrumented start of this frame until the frame was displayed on the screen. PM_METRIC_GPU_EFFECTIVE_FREQUENCY,1,GPU Effective Frequency,Effective clock speed of the GPU cores. diff --git a/PresentData/ETW/Microsoft_Windows_Direct3D12.h b/PresentData/ETW/Microsoft_Windows_Direct3D12.h new file mode 100644 index 000000000..304138e32 --- /dev/null +++ b/PresentData/ETW/Microsoft_Windows_Direct3D12.h @@ -0,0 +1,41 @@ +// Copyright (C) 2020-2026 Intel Corporation +// SPDX-License-Identifier: MIT +// +// This file originally generated by etw_list +// version: 2.5.1 +// parameters: --no_event_structs --event=CreatePipelineStateObject::Start --event=CreatePipelineStateObject::Stop --provider=Microsoft-Windows-Direct3D12 +#pragma once + +namespace Microsoft_Windows_Direct3D12 { + +struct __declspec(uuid("{5D8087DD-3A9B-4F56-90DF-49196CDC4F11}")) GUID_STRUCT; +static const auto GUID = __uuidof(GUID_STRUCT); + +enum class Keyword : uint64_t { + Microsoft_Windows_Direct3D12_PerfTiming = 0x2000000000000000, +}; + +enum class Level : uint8_t { + win_Informational = 0x4, +}; + +enum class Channel : uint8_t { + Microsoft_Windows_Direct3D12_PerfTiming = 0x12, +}; + +#define EVENT_DESCRIPTOR_DECL(name_, id_, version_, channel_, level_, opcode_, task_, keyword_) struct name_ { \ + static uint16_t const Id = id_; \ + static uint8_t const Version = version_; \ + static uint8_t const Channel = channel_; \ + static uint8_t const Level = level_; \ + static uint8_t const Opcode = opcode_; \ + static uint16_t const Task = task_; \ + static Keyword const Keyword = (Keyword) keyword_; \ +}; + +EVENT_DESCRIPTOR_DECL(CreatePipelineStateObject_Start , 0x009b, 0x00, 0x12, 0x04, 0x01, 0x0042, 0x2000000000000400); +EVENT_DESCRIPTOR_DECL(CreatePipelineStateObject_Stop , 0x009c, 0x00, 0x12, 0x04, 0x02, 0x0042, 0x2000000000000400); + +#undef EVENT_DESCRIPTOR_DECL + +} diff --git a/PresentData/PresentData.vcxproj b/PresentData/PresentData.vcxproj index ef1430651..5b21a3d76 100644 --- a/PresentData/PresentData.vcxproj +++ b/PresentData/PresentData.vcxproj @@ -345,6 +345,7 @@ + diff --git a/PresentData/PresentData.vcxproj.filters b/PresentData/PresentData.vcxproj.filters index df266457d..09f5b1923 100644 --- a/PresentData/PresentData.vcxproj.filters +++ b/PresentData/PresentData.vcxproj.filters @@ -11,6 +11,9 @@ ETW + + ETW + ETW diff --git a/PresentData/PresentMonTraceConsumer.cpp b/PresentData/PresentMonTraceConsumer.cpp index ab18d3ee5..b8e42249d 100644 --- a/PresentData/PresentMonTraceConsumer.cpp +++ b/PresentData/PresentMonTraceConsumer.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2017-2026 Intel Corporation +// Copyright (C) 2017-2026 Intel Corporation // Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved // SPDX-License-Identifier: MIT @@ -6,6 +6,7 @@ #include "ETW/Intel_PresentMon.h" #include "ETW/Microsoft_Windows_D3D9.h" +#include "ETW/Microsoft_Windows_Direct3D12.h" #include "ETW/Microsoft_Windows_Dwm_Core.h" #include "ETW/Microsoft_Windows_Dwm_Core_Win7.h" #include "ETW/Microsoft_Windows_DXGI.h" @@ -351,6 +352,51 @@ void PMTraceConsumer::HandleD3D9Event(EVENT_RECORD* pEventRecord) } } +void PMTraceConsumer::HandleD3D12Event(EVENT_RECORD* pEventRecord) +{ + auto const& hdr = pEventRecord->EventHeader; + if (!IsProcessTrackedForFiltering(hdr.ProcessId)) { + return; + } + + PsoCompileActivityKey key{}; + key.processId = hdr.ProcessId; + key.activityId = hdr.ActivityId; + + switch (hdr.EventDescriptor.Id) { + case Microsoft_Windows_Direct3D12::CreatePipelineStateObject_Start::Id: + { + const uint64_t compileStartQpc = (uint64_t)hdr.TimeStamp.QuadPart; + std::lock_guard lock(mPsoCompileEventMutex); + mPendingPsoCompileStartQpc[key] = compileStartQpc; + break; + } + case Microsoft_Windows_Direct3D12::CreatePipelineStateObject_Stop::Id: + { + const uint64_t compileCompleteQpc = (uint64_t)hdr.TimeStamp.QuadPart; + PsoCompileCompletedEvent completed{}; + completed.ProcessId = hdr.ProcessId; + completed.CompileCompleteQpc = compileCompleteQpc; + { + std::lock_guard lock(mPsoCompileEventMutex); + auto it = mPendingPsoCompileStartQpc.find(key); + if (it == mPendingPsoCompileStartQpc.end()) { + break; + } + if (compileCompleteQpc >= it->second) { + completed.DurationQpc = compileCompleteQpc - it->second; + } + mPendingPsoCompileStartQpc.erase(it); + mCompletedPsoCompileEvents.push_back(completed); + } + break; + } + default: + assert(!mFilteredEvents); + break; + } +} + void PMTraceConsumer::HandleDXGIEvent(EVENT_RECORD* pEventRecord) { auto const& hdr = pEventRecord->EventHeader; @@ -3582,6 +3628,12 @@ void PMTraceConsumer::DequeueProcessEvents(std::vector& outProcess outProcessEvents.swap(mProcessEvents); } +void PMTraceConsumer::DequeuePsoCompileEvents(std::vector& outPsoCompileEvents) +{ + std::lock_guard lock(mPsoCompileEventMutex); + outPsoCompileEvents.swap(mCompletedPsoCompileEvents); +} + void PMTraceConsumer::DequeuePresentEvents(std::vector>& outPresentEvents) { outPresentEvents.clear(); @@ -3684,6 +3736,12 @@ void PMTraceConsumer::ResetPresentTrackingData(bool shrink) { mProcessEvents.clear(); } + { + std::lock_guard lock(mPsoCompileEventMutex); + mPendingPsoCompileStartQpc.clear(); + mCompletedPsoCompileEvents.clear(); + } + // Clear and potentially shrink present tracking maps if (shrink) { decltype(mPresentByThreadId){}.swap(mPresentByThreadId); @@ -3868,4 +3926,4 @@ void PMTraceConsumer::SetAppTimingDataAsComplete(uint32_t processId, uint32_t ap DebugAssert(ii->second.ProcessId == processId); ii->second.PresentCompleted = true; } -} \ No newline at end of file +} diff --git a/PresentData/PresentMonTraceConsumer.hpp b/PresentData/PresentMonTraceConsumer.hpp index 858cf3c68..4a3d1c042 100644 --- a/PresentData/PresentMonTraceConsumer.hpp +++ b/PresentData/PresentMonTraceConsumer.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2017-2026 Intel Corporation +// Copyright (C) 2017-2026 Intel Corporation // Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved // SPDX-License-Identifier: MIT #pragma once @@ -169,6 +169,12 @@ struct ProcessEvent { bool IsStartEvent = false; // Whether this is a start event (true) or a stop event (false). }; +struct PsoCompileCompletedEvent { + uint32_t ProcessId = 0; + uint64_t CompileCompleteQpc = 0; + uint64_t DurationQpc = 0; +}; + struct PresentEvent { uint64_t PresentStartTime; // QPC value of the first event related to the Present (D3D9, DXGI, or DXGK Present_Start) uint32_t ProcessId; // ID of the process that presented @@ -353,6 +359,7 @@ struct PMTraceConsumer // separate swapchains may appear out of order. void DequeueProcessEvents(std::vector& outProcessEvents); + void DequeuePsoCompileEvents(std::vector& outPsoCompileEvents); void DequeuePresentEvents(std::vector>& outPresentEvents); uint32_t GetNumOverflowedPresents() const { @@ -386,6 +393,33 @@ struct PMTraceConsumer // Mutexs to protect consumer/dequeue access from different threads: std::mutex mProcessEventMutex; std::mutex mPresentEventMutex; + + struct PsoCompileActivityKey { + uint32_t processId = 0; + GUID activityId = {}; + bool operator==(const PsoCompileActivityKey& other) const + { + return processId == other.processId && + (bool)InlineIsEqualGUID(activityId, other.activityId); + } + }; + + struct PsoCompileActivityKeyHash { + size_t operator()(const PsoCompileActivityKey& key) const + { + size_t h = std::hash{}(key.processId); + h = pmon::util::hash::HashCombine(h, std::hash{}((uint64_t)key.activityId.Data1)); + h = pmon::util::hash::HashCombine(h, std::hash{}(((uint64_t)key.activityId.Data2 << 32) | key.activityId.Data3)); + uint64_t data4Part = 0; + memcpy(&data4Part, key.activityId.Data4, sizeof(data4Part)); + h = pmon::util::hash::HashCombine(h, std::hash{}(data4Part)); + return h; + } + }; + + std::unordered_map mPendingPsoCompileStartQpc; + std::vector mCompletedPsoCompileEvents; + std::mutex mPsoCompileEventMutex; // condition variable to signal when output ring space becomes available, used for backpressure in offline mode std::condition_variable mCompletedRingCondition; // event used to signal when new events are available for dequeing @@ -588,6 +622,7 @@ struct PMTraceConsumer void HandleProcessEvent(EVENT_RECORD* pEventRecord); void HandleDXGIEvent(EVENT_RECORD* pEventRecord); void HandleD3D9Event(EVENT_RECORD* pEventRecord); + void HandleD3D12Event(EVENT_RECORD* pEventRecord); void HandleDXGKEvent(EVENT_RECORD* pEventRecord); void HandleWin32kEvent(EVENT_RECORD* pEventRecord); void HandleDWMEvent(EVENT_RECORD* pEventRecord); diff --git a/PresentData/PresentMonTraceSession.cpp b/PresentData/PresentMonTraceSession.cpp index bba9ec826..8c80dcc62 100644 --- a/PresentData/PresentMonTraceSession.cpp +++ b/PresentData/PresentMonTraceSession.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2017-2024 Intel Corporation +// Copyright (C) 2017-2024 Intel Corporation // Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved // SPDX-License-Identifier: MIT @@ -8,6 +8,7 @@ #include "NvidiaTraceConsumer.hpp" #include "ETW/Microsoft_Windows_D3D9.h" +#include "ETW/Microsoft_Windows_Direct3D12.h" #include "ETW/Microsoft_Windows_Dwm_Core.h" #include "ETW/Microsoft_Windows_Dwm_Core_Win7.h" #include "ETW/Microsoft_Windows_DXGI.h" @@ -276,6 +277,7 @@ void DisableProviders(TRACEHANDLE sessionHandle) ULONG status = 0; status = EnableTraceEx2(sessionHandle, &Intel_PresentMon::GUID, EVENT_CONTROL_CODE_DISABLE_PROVIDER, 0, 0, 0, 0, nullptr); status = EnableTraceEx2(sessionHandle, &Microsoft_Windows_D3D9::GUID, EVENT_CONTROL_CODE_DISABLE_PROVIDER, 0, 0, 0, 0, nullptr); + status = EnableTraceEx2(sessionHandle, &Microsoft_Windows_Direct3D12::GUID, EVENT_CONTROL_CODE_DISABLE_PROVIDER, 0, 0, 0, 0, nullptr); status = EnableTraceEx2(sessionHandle, &Microsoft_Windows_DXGI::GUID, EVENT_CONTROL_CODE_DISABLE_PROVIDER, 0, 0, 0, 0, nullptr); status = EnableTraceEx2(sessionHandle, &Microsoft_Windows_Dwm_Core::GUID, EVENT_CONTROL_CODE_DISABLE_PROVIDER, 0, 0, 0, 0, nullptr); status = EnableTraceEx2(sessionHandle, &Microsoft_Windows_Dwm_Core::Win7::GUID, EVENT_CONTROL_CODE_DISABLE_PROVIDER, 0, 0, 0, 0, nullptr); @@ -363,6 +365,10 @@ void CALLBACK EventRecordCallback(EVENT_RECORD* pEventRecord) session->mPMConsumer->HandleD3D9Event(pEventRecord); return; } + if (hdr.ProviderId == Microsoft_Windows_Direct3D12::GUID) { + session->mPMConsumer->HandleD3D12Event(pEventRecord); + return; + } if (hdr.ProviderId == Microsoft_Windows_Kernel_Process::GUID || hdr.ProviderId == NT_Process::GUID) { session->mPMConsumer->HandleProcessEvent(pEventRecord); @@ -933,6 +939,15 @@ ULONG EnableProvidersListing( if (status != ERROR_SUCCESS) return status; + // Microsoft_Windows_Direct3D12 + // + provider.ClearFilter(); + provider.AddEvent(); + provider.AddEvent(); + status = provider.Enable(sessionHandle, Microsoft_Windows_Direct3D12::GUID); + if (status != ERROR_SUCCESS) return status; + + // Intel_PresentMon // if (pmConsumer->mTrackFrameType || pmConsumer->mTrackPMMeasurements || pmConsumer->mTrackAppTiming) { diff --git a/Tools/collect_etw_info.cmd b/Tools/collect_etw_info.cmd index 6e70f552f..75475ec24 100644 --- a/Tools/collect_etw_info.cmd +++ b/Tools/collect_etw_info.cmd @@ -35,6 +35,11 @@ set events=%events% --event=Present::Start set events=%events% --event=Present::Stop call :etw_list "Microsoft-Windows-D3D9" "%out_dir%\Microsoft_Windows_D3D9.h" +set events= +set events=%events% --event=CreatePipelineStateObject::Start +set events=%events% --event=CreatePipelineStateObject::Stop +call :etw_list "Microsoft-Windows-Direct3D12" "%out_dir%\Microsoft_Windows_Direct3D12.h" + set events= set events=%events% --event=MILEVENT_MEDIA_UCE_PROCESSPRESENTHISTORY_GetPresentHistory::Info set events=%events% --event=SCHEDULE_PRESENT::Start