Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.

Commit b36b68f

Browse files
committed
Update llpc from commit 4bd41bcf
Set SLC=0 for ATM LowerGpuRt: fix a type confusion Honor NonUniform decorations on OpAccessChain operands Force WGP when NGG is in passthrough mode Use idxen modifier for (RW)StructuredBuffers Compatible with LLVM upstream change that moved Utility funcs to Utils Include Line Correlation Info for Tools via LLPC [Continuations] Cleanup Complete op lowering [CompilerUtils] Improve handling of freeze in ValueOriginTracker [compilerutils] Add GetGlobalInModule to CrossModuleInliner amdllpc: some usability improvements [Continuations] Add SpecializeDriverShadersPass Split up gl_in array type. [Continuations] Remove stack lowering define Use inbounds gep to index row major matrix llvmraytracing: Remove support for _AmdContinuationsGetFlags Properly encapsulate state that affects GPURT specialization llvmraytracing: only support a waitmask of -1 llvmraytracing: remove remaining traces of EnqueueCall compilerutils: fix a warning [LowerGlobals] Mark globals with `buffer.index` users as readonly Implement structural GEP dialect for in/out llvmraytracing: add LLPC/LLVM scratch/global address spaces llpc/ProcessGpuRtLibrary: use earlyGpurtTransform [Continuations] Make use of `llvm::zip` in `CleanupContinuationsPass::updateCpsFunctionArgs` Adjust the llvm-tblgen memory limit lgc: split lowering of GroupMemcpyOp for mesh/task shaders to MeshTaskShader PatchResourceCollect: stop cleaning undef output value in some cases [Continuations] Freeze poison that is stored to payload Fix wrong pipeline dump message Fix issues with coherent Update SPIR-V header to latest version Simplify load of attribute ring buffer descriptor [Continuations] Enable Traversal specialization test Adjust tests to take into account upstream overload of rsrc and samp arguments [Continuations] Fix signed/unsigned comparison warning LowerRaytracingPipeline: tease apart return handling and any-hit exit handling LowerRaytracingPipeline: unify function end handling lgc: support int4 in cooperative matrix Add rounding mode for PackHalf2x16 [Continuations] Fix unused variable warning lgc: Add MsgPackScanner [Continuations] Remove `LegacyCleanupContinuations` pass LowerCooperativeMatrix: fix compile warning due to missing break statement lgc: New RegStackUsage to propagate reg/stack usage [LGC] Add helper lane state for subgroups Add client name to SPIR-V frontend Use more flexible readfirstlane Gate argument definition for `GEP::collectOffset`. [Continuations] Introduce dummy `csp` argument for `lgc.cps.jump` [Continuations] Tolerate non-waiting AwaitTraversal Add AmdExtD3DShaderIntrinsics_LoadDwordAtAddrx3 Add more BuiltIn which has primitive index in mesh shader Rewrite the helper getShaderModuleUsageInfo [Continuations] Replace isLgcRtOp helper with isDialectOp helper from dialects Downgrade scope Device to Workgroup if permitted Optimize performance for PrimSetup Update submodule llvm-dialects Remove inactive built-ins for last vertex processing stage Fix internal hash for color export shader Fix call to lookupIntrinsicID [Continuations] Move Simplifying GEP helpers to CompilerUtils [PatchBufferOp] Generate struct buffer cmpxchg intrinsics [LGC] Refactor default wave size setting [Continuations] Derive `DispatchSystemData` type from `_cont_DispatchRaysIndex3` [Continuations] Replace "lgc.rt" starts_with check Rename some classes and files Optimize PointSize write when the value is 1.0 [Continuations] Handle `_AmdGetShaderRecordIndex` calls [LowerBufferOperations] Check for uniform buffer pointers for s_buffer_load lgc: Mark applicable LgcDialect and Builder ops as NoDivergenceSource
1 parent 872ddfd commit b36b68f

400 files changed

Lines changed: 16539 additions & 5242 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CMakeLists.txt

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,11 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
206206
if (TARGET llvm-dialects-example)
207207
set_property(TARGET llvm-dialects-example PROPERTY FOLDER Misc)
208208
endif()
209-
if (LLVM_OPTIMIZED_TABLEGEN)
209+
if (NOT WIN32 AND LLVM_OPTIMIZED_TABLEGEN)
210+
#if _WIN32
211+
# These targets don't exist on Windows when CMake is first invoked.
212+
# They are created later at build time, when the cross-compilation takes place.
213+
#endif
210214
set_property(TARGET llvm_nm_target PROPERTY FOLDER Misc)
211215
set_property(TARGET llvm_readobj_target PROPERTY FOLDER Misc)
212216
set_property(TARGET llvm-min-tblgen-host PROPERTY FOLDER Misc)
@@ -215,9 +219,8 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
215219
set_property(TARGET CREATE_LLVM_NATIVE PROPERTY FOLDER Misc)
216220
endif()
217221
#if _WIN32
218-
if (MSVC)
219-
# We can't use LLVM_OPTIMIZED_TABLEGEN on Windows, and the 32-bit llvm-tblgen can easily
220-
# to run out of memory. Tell the linker to allow addresses larger than 2GB.
222+
if(MSVC)
223+
# The 32-bit llvm-tblgen can easily run out of memory. Tell the linker to allow addresses larger than 2GB.
221224
set_property(TARGET llvm-tblgen PROPERTY LINK_FLAGS "/LARGEADDRESSAWARE")
222225
endif()
223226
#endif

cmake/LlvmMainRevision.cmake

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
##
2+
#######################################################################################################################
3+
#
4+
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
5+
#
6+
# Permission is hereby granted, free of charge, to any person obtaining a copy
7+
# of this software and associated documentation files (the "Software"), to
8+
# deal in the Software without restriction, including without limitation the
9+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
# sell copies of the Software, and to permit persons to whom the Software is
11+
# furnished to do so, subject to the following conditions:
12+
#
13+
# The above copyright notice and this permission notice shall be included in all
14+
# copies or substantial portions of the Software.
15+
#
16+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22+
# IN THE SOFTWARE.
23+
#
24+
#######################################################################################################################
25+
26+
# Include this file to set LLVM_MAIN_REVISION, for when it is needed at cmake level rather than C++ level.
27+
28+
if (NOT LLVM_MAIN_REVISION)
29+
# A sneaky way to get the LLVM source directory, assuming we are included from a LLVM external
30+
# project such as LGC or LLPCFE.
31+
get_filename_component(LLVM_SOURCE_DIR "${CPACK_RESOURCE_FILE_LICENSE}" DIRECTORY)
32+
if (NOT LLVM_SOURCE_DIR)
33+
message(FATAL_ERROR "LLVM_SOURCE_DIR not found")
34+
endif()
35+
36+
# Scrape LLVM_MAIN_REVISION out of llvm-config.h.cmake. If not found, set to a high number.
37+
set(LLVM_CONFIG_H_NAME "${LLVM_SOURCE_DIR}/include/llvm/Config/llvm-config.h.cmake")
38+
file(READ "${LLVM_CONFIG_H_NAME}" LLVM_CONFIG_H_CONTENTS)
39+
string(REGEX REPLACE "^.* LLVM_MAIN_REVISION ([0-9]+).*$" "\\1" LLVM_MAIN_REVISION "${LLVM_CONFIG_H_CONTENTS}")
40+
if ("${LLVM_MAIN_REVISION}" STREQUAL "${LLVM_CONFIG_H_CONTENTS}")
41+
set(LLVM_MAIN_REVISION 999999999)
42+
endif()
43+
endif()
44+

compilerutils/include/compilerutils/CompilerUtils.h

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,20 @@ struct CrossModuleInlinerResult {
9292
// One CrossModuleInliner instance must only be used for a single target module, otherwise things can go wrong.
9393
class CrossModuleInliner {
9494
public:
95-
CrossModuleInliner() = default;
95+
// Callback passed to getGlobalInModule, that tries to find an existing GlobalValue in the target module or copies it
96+
// to the target module.
97+
using GetGlobalInModuleTy = std::function<llvm::GlobalValue &(CrossModuleInliner &inliner,
98+
llvm::GlobalValue &sourceGV, llvm::Module &targetGv)>;
99+
100+
CrossModuleInliner(GetGlobalInModuleTy getGlobalInModuleCallback = defaultGetGlobalInModuleFunc);
101+
102+
// Do not allow copy but allow moving
103+
CrossModuleInliner(const CrossModuleInliner &) = delete;
104+
CrossModuleInliner(CrossModuleInliner &&);
105+
CrossModuleInliner &operator=(const CrossModuleInliner &) = delete;
106+
CrossModuleInliner &operator=(CrossModuleInliner &&);
107+
108+
~CrossModuleInliner() noexcept;
96109

97110
// Inline a call to a function even if the called function is in a different module.
98111
// If the result of that function call should be used, a use must exist before calling this function.
@@ -118,19 +131,21 @@ class CrossModuleInliner {
118131
// target module.
119132
llvm::GlobalValue *findCopiedGlobal(llvm::GlobalValue &sourceGv, llvm::Module &targetModule);
120133

134+
// Default implementation that finds global values using getCrossModuleName.
135+
static llvm::GlobalValue &defaultGetGlobalInModuleFunc(CrossModuleInliner &inliner, llvm::GlobalValue &sourceGv,
136+
llvm::Module &targetModule);
137+
121138
static std::string getCrossModuleName(llvm::GlobalValue &gv);
122139

123140
private:
124141
// Checks that we haven't processed a different target module earlier.
125-
void checkTargetModule(llvm::Module &targetModule) {
126-
if (lastUsedTargetModule == nullptr)
127-
lastUsedTargetModule = &targetModule;
128-
else
129-
assert(lastUsedTargetModule == &targetModule);
130-
}
142+
void checkTargetModule(llvm::Module &targetModule);
143+
144+
struct Impl;
145+
class CrossModuleValueMaterializer;
131146

132-
llvm::SmallDenseMap<llvm::GlobalValue *, llvm::GlobalValue *> mappedGlobals;
133-
llvm::Module *lastUsedTargetModule = nullptr; // used to check that we don't use different target modules
147+
// Split into Impl class, so we don’t need to include everything in this header.
148+
std::unique_ptr<Impl> impl;
134149
};
135150

136151
// Essentially RAUW for pointers for the case that these use different address
@@ -140,6 +155,13 @@ class CrossModuleInliner {
140155
// The caller has to handle the erasure afterwards.
141156
void replaceAllPointerUses(llvm::IRBuilder<> *builder, llvm::Value *oldPointerValue, llvm::Value *newPointerValue,
142157
llvm::SmallVectorImpl<llvm::Instruction *> &toBeRemoved);
158+
159+
// Create a GEP if idx is non-null, otherwise return the pointer.
160+
llvm::Value *simplifyingCreateConstGEP1_32(llvm::IRBuilder<> &builder, llvm::Type *ty, llvm::Value *ptr, uint32_t idx);
161+
162+
// Create an inbounds GEP if idx is non-null, otherwise return the pointer.
163+
llvm::Value *simplifyingCreateConstInBoundsGEP1_32(llvm::IRBuilder<> &builder, llvm::Type *ty, llvm::Value *ptr,
164+
uint32_t idx);
143165
} // namespace CompilerUtils
144166

145167
namespace llvm {
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
***********************************************************************************************************************
3+
*
4+
* Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to
8+
* deal in the Software without restriction, including without limitation the
9+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
* sell copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22+
* IN THE SOFTWARE.
23+
*
24+
**********************************************************************************************************************/
25+
26+
//===- DxilUtils.h - --------------------------------------------------------------------------------------------===//
27+
//
28+
// Shared DXIl-related helpers.
29+
//
30+
//===--------------------------------------------------------------------------------------------------------------===//
31+
32+
#pragma once
33+
34+
#include "llvm/ADT/StringRef.h"
35+
36+
namespace CompilerUtils::dxil {
37+
38+
// Try to demangle function names in the DXIL format:
39+
// ...\01?FuncName@@...
40+
// @param funcName : Name of the callee
41+
// @returns: the original string if the name was not demangleable or the demangled function name.
42+
inline llvm::StringRef tryDemangleFunctionName(llvm::StringRef inputName) {
43+
assert(!inputName.empty());
44+
45+
constexpr static llvm::StringRef manglingPrefix = "\01?";
46+
47+
// Expect both characters to be there, and `\01?` to occur before `@@`
48+
size_t start = inputName.find(manglingPrefix);
49+
if (start == llvm::StringRef::npos)
50+
return inputName;
51+
52+
// The case start >= end is implicitly checked by the second call to `find`.
53+
const size_t end = inputName.find("@@", start);
54+
if (end == llvm::StringRef::npos)
55+
return inputName;
56+
57+
start += manglingPrefix.size();
58+
59+
// Extract unmangled name: Return everything after the first occurrence of `\01?` and before the first occurrence of
60+
// `@@` after `?`.
61+
return inputName.substr(start, end - start);
62+
}
63+
64+
} // namespace CompilerUtils::dxil

compilerutils/include/compilerutils/ValueOriginTracking.h

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,12 @@ namespace ValueTracking {
106106
struct SliceStatus {
107107
// As the actual enum is contained within the struct, its values don't leak into the containing namespace,
108108
// and it's not possible to implicitly cast a SliceStatus to an int, so it's as good as an enum class.
109+
// The UndefOrPoison case always originates from a `poison` or `undef` value.
110+
// We must be careful with freeze instructions operating on such values, see FreezeHandlingMode.
109111
enum StatusEnum : uint32_t { Constant = 0x1, Dynamic = 0x2, UndefOrPoison = 0x4 };
110112
StatusEnum S = {};
111113

114+
// Intentionally allow implicit conversion:
112115
SliceStatus(StatusEnum S) : S{S} {}
113116

114117
static SliceStatus makeEmpty() { return static_cast<StatusEnum>(0); }
@@ -188,6 +191,45 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const ValueInfo &VI);
188191
// constant and then always propagated, allowing to replace the argument by the initial constant.
189192
class ValueOriginTracker {
190193
public:
194+
// Configuration options for ValueOriginTracker.
195+
struct Options {
196+
unsigned BytesPerSlice = 4;
197+
unsigned MaxBytesPerValue = 512;
198+
199+
// Freeze instructions are problematic for value origin tracking.
200+
//
201+
// While `freeze poison` are intended to help optimization by allowing it to pick any value, we cannot just
202+
// treat `freeze poison` as UndefOrPoison, because an optimization relying on that would need to ensure
203+
// other users of the optimized `freeze poison` observe the same value picked by optimization, and value origin
204+
// tracking does not allow to query which `freeze poison` instructions a particular slice originates from.
205+
// Instead, the only safe way to treat `freeze poison` is dynamic.
206+
//
207+
// In some cases, e.g. when not optimizing based on the analysis result, and instead just using it for sanity
208+
// checking in testing, treating `freeze poison` as UndefOrPoison however is the intended result, and if
209+
// value origin tracking implicitly considered all `freeze poison` as dynamic, then client code would need to
210+
// propagate the intended UndefOrPoison semantics manually.
211+
//
212+
// The FreezeHandlingMode enum allows to avoid that, allowing the client to specify how `freeze poison` and
213+
// `freeze undef` should be handled.
214+
//
215+
// If we want to optimize based on `freeze poison`, one option would be eliminating all freeze instructions by some
216+
// constant (e.g. `zeroinitializer`) before running the analysis, as some LLVM transforms like instcombine do.
217+
// This ensures that not only the analysis sees a common constant value for `freeze poison`, but also ensures other
218+
// uses of `freeze poison` observe the same value.
219+
//
220+
// As a less conservative potential future improvement, we could instead explicitly keep track of FrozenPoison
221+
// slices in value origin tracking, and when merging FrozenPoison with constants, recording which `freeze poison`
222+
// values need to be replaced by which constants to allow that.
223+
enum class FreezeHandlingMode {
224+
// Treat slices in freeze instructions that are UndefOrPoison in the freeze operand as dynamic.
225+
Dynamic = 0,
226+
// Always forward value infos of freeze operands for freeze instructions.
227+
// In particular, `freeze poison` is always reported as UndefOrPoison.
228+
Forward
229+
};
230+
FreezeHandlingMode FreezeMode = FreezeHandlingMode::Dynamic;
231+
};
232+
191233
using ValueInfo = ValueTracking::ValueInfo;
192234
// In some cases, client code has additional information on where values originate from, or
193235
// where they should be assumed to originate from just for the purpose of the analysis.
@@ -225,10 +267,9 @@ class ValueOriginTracker {
225267
// Also, only a single status on assumptions is allowed.
226268
using ValueOriginAssumptions = llvm::DenseMap<llvm::Instruction *, ValueInfo>;
227269

228-
ValueOriginTracker(const llvm::DataLayout &DL, unsigned BytesPerSlice = 4, unsigned MaxBytesPerValue = 512,
270+
ValueOriginTracker(const llvm::DataLayout &DL, Options Opts,
229271
ValueOriginAssumptions OriginAssumptions = ValueOriginAssumptions{})
230-
: DL{DL}, BytesPerSlice{BytesPerSlice}, MaxBytesPerValue{MaxBytesPerValue},
231-
OriginAssumptions(std::move(OriginAssumptions)) {}
272+
: DL{DL}, Opts{Opts}, OriginAssumptions(std::move(OriginAssumptions)) {}
232273

233274
// Computes a value info for the given value.
234275
// If the value has been seen before, returns a cache hit from the ValueInfos map.
@@ -247,8 +288,7 @@ class ValueOriginTracker {
247288
private:
248289
struct ValueInfoBuilder;
249290
const llvm::DataLayout &DL;
250-
unsigned BytesPerSlice = 0;
251-
unsigned MaxBytesPerValue = 0;
291+
Options Opts;
252292
ValueOriginAssumptions OriginAssumptions;
253293
llvm::DenseMap<llvm::Value *, ValueInfo> ValueInfos;
254294

0 commit comments

Comments
 (0)