Skip to content

Commit 075abf0

Browse files
hegdevasantgregkh
authored andcommitted
iommu/amd/pgtbl: Fix possible race while increase page table level
[ Upstream commit 1e56310 ] The AMD IOMMU host page table implementation supports dynamic page table levels (up to 6 levels), starting with a 3-level configuration that expands based on IOVA address. The kernel maintains a root pointer and current page table level to enable proper page table walks in alloc_pte()/fetch_pte() operations. The IOMMU IOVA allocator initially starts with 32-bit address and onces its exhuasted it switches to 64-bit address (max address is determined based on IOMMU and device DMA capability). To support larger IOVA, AMD IOMMU driver increases page table level. But in unmap path (iommu_v1_unmap_pages()), fetch_pte() reads pgtable->[root/mode] without lock. So its possible that in exteme corner case, when increase_address_space() is updating pgtable->[root/mode], fetch_pte() reads wrong page table level (pgtable->mode). It does compare the value with level encoded in page table and returns NULL. This will result is iommu_unmap ops to fail and upper layer may retry/log WARN_ON. CPU 0 CPU 1 ------ ------ map pages unmap pages alloc_pte() -> increase_address_space() iommu_v1_unmap_pages() -> fetch_pte() pgtable->root = pte (new root value) READ pgtable->[mode/root] Reads new root, old mode Updates mode (pgtable->mode += 1) Since Page table level updates are infrequent and already synchronized with a spinlock, implement seqcount to enable lock-free read operations on the read path. Fixes: 754265b ("iommu/amd: Fix race in increase_address_space()") Reported-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com> Cc: stable@vger.kernel.org Cc: Joao Martins <joao.m.martins@oracle.com> Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> Signed-off-by: Vasant Hegde <vasant.hegde@amd.com> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> [ Adapted pgtable->mode and pgtable->root to use domain->iop.mode and domain->iop.root ] Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 564f231 commit 075abf0

2 files changed

Lines changed: 23 additions & 4 deletions

File tree

drivers/iommu/amd/amd_iommu_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,7 @@ struct amd_irte_ops;
540540
container_of((x), struct amd_io_pgtable, pgtbl_cfg)
541541

542542
struct amd_io_pgtable {
543+
seqcount_t seqcount; /* Protects root/mode update */
543544
struct io_pgtable_cfg pgtbl_cfg;
544545
struct io_pgtable iop;
545546
int mode;

drivers/iommu/amd/io_pgtable.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <linux/slab.h>
1818
#include <linux/types.h>
1919
#include <linux/dma-mapping.h>
20+
#include <linux/seqlock.h>
2021

2122
#include <asm/barrier.h>
2223

@@ -171,8 +172,11 @@ static bool increase_address_space(struct protection_domain *domain,
171172

172173
*pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
173174

175+
write_seqcount_begin(&domain->iop.seqcount);
174176
domain->iop.root = pte;
175177
domain->iop.mode += 1;
178+
write_seqcount_end(&domain->iop.seqcount);
179+
176180
amd_iommu_update_and_flush_device_table(domain);
177181
amd_iommu_domain_flush_complete(domain);
178182

@@ -199,6 +203,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
199203
gfp_t gfp,
200204
bool *updated)
201205
{
206+
unsigned int seqcount;
202207
int level, end_lvl;
203208
u64 *pte, *page;
204209

@@ -214,8 +219,14 @@ static u64 *alloc_pte(struct protection_domain *domain,
214219
}
215220

216221

217-
level = domain->iop.mode - 1;
218-
pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)];
222+
do {
223+
seqcount = read_seqcount_begin(&domain->iop.seqcount);
224+
225+
level = domain->iop.mode - 1;
226+
pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)];
227+
} while (read_seqcount_retry(&domain->iop.seqcount, seqcount));
228+
229+
219230
address = PAGE_SIZE_ALIGN(address, page_size);
220231
end_lvl = PAGE_SIZE_LEVEL(page_size);
221232

@@ -292,15 +303,20 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
292303
unsigned long *page_size)
293304
{
294305
int level;
306+
unsigned int seqcount;
295307
u64 *pte;
296308

297309
*page_size = 0;
298310

299311
if (address > PM_LEVEL_SIZE(pgtable->mode))
300312
return NULL;
301313

302-
level = pgtable->mode - 1;
303-
pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
314+
do {
315+
seqcount = read_seqcount_begin(&pgtable->seqcount);
316+
level = pgtable->mode - 1;
317+
pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
318+
} while (read_seqcount_retry(&pgtable->seqcount, seqcount));
319+
304320
*page_size = PTE_LEVEL_PAGE_SIZE(level);
305321

306322
while (level > 0) {
@@ -524,6 +540,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
524540
cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE,
525541
cfg->tlb = &v1_flush_ops;
526542

543+
seqcount_init(&pgtable->seqcount);
544+
527545
pgtable->iop.ops.map_pages = iommu_v1_map_pages;
528546
pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages;
529547
pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;

0 commit comments

Comments
 (0)