From b3578eb7cd04da378f54a4a20fe062df4da82f8d Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 18 Jun 2024 15:48:59 +0100 Subject: [PATCH] iommu/dma: Add ability to configure NUMA allocation policy for remapped allocations Add iommu_dma_numa_policy= kernel parameter which can be used to modify the NUMA allocation policy of remapped buffer allocations. Policy is only used for devices which are not associated with a NUMA node. Syntax identical to what tmpfs accepts as it's mpol argument is accepted. Some examples: iommu_dma_numa_policy=interleave iommu_dma_numa_policy=interleave=skip-interleave iommu_dma_numa_policy=bind:0-3,5,7,9-15 iommu_dma_numa_policy=bind=static:1-2 Signed-off-by: Tvrtko Ursulin --- drivers/iommu/dma-iommu.c | 60 +++++++++++++++++++++++++++++++++++++++ include/linux/mempolicy.h | 10 +++++++ mm/mempolicy.c | 6 ++++ 3 files changed, 76 insertions(+) --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -883,11 +884,65 @@ static void __iommu_dma_free_pages(struc kvfree(pages); } +#if IS_ENABLED(CONFIG_NUMA) +static struct mempolicy iommu_dma_mpol = { + .refcnt = ATOMIC_INIT(1), /* never free it */ + .mode = MPOL_LOCAL, +}; + +static struct mempolicy *dma_iommu_numa_policy(void) +{ + return &iommu_dma_mpol; +} + +static unsigned short dma_iommu_numa_mode(void) +{ + return iommu_dma_mpol.mode; +} + +static int __init setup_numapolicy(char *str) +{ + struct mempolicy pol = { }, *ppol = &pol; + char buf[128]; + int ret; + + if (str) + ret = mpol_parse_str(str, &ppol); + else + ret = -EINVAL; + + if (!ret) { + iommu_dma_mpol = pol; + mpol_to_str(buf, sizeof(buf), &pol); + pr_info("DMA IOMMU NUMA default policy overridden to '%s'\n", buf); + } else { + pr_warn("Unable to parse dma_iommu_numa_policy=\n"); + } + + return ret == 0; +} +__setup("iommu_dma_numa_policy=", setup_numapolicy); +#else +static struct mempolicy *dma_iommu_numa_policy(void) +{ + return NULL; +} + +static unsigned short dma_iommu_numa_mode(void) +{ + return MPOL_LOCAL; +} +#endif static struct page **__iommu_dma_alloc_pages(struct device *dev, unsigned int count, unsigned long order_mask, gfp_t gfp) { struct page **pages; unsigned int i = 0, nid = dev_to_node(dev); + const bool use_numa = nid == NUMA_NO_NODE && + dma_iommu_numa_mode() != MPOL_LOCAL; + + if (use_numa) + order_mask = 1; order_mask &= GENMASK(MAX_PAGE_ORDER, 0); if (!order_mask) @@ -903,6 +958,7 @@ static struct page **__iommu_dma_alloc_p while (count) { struct page *page = NULL; unsigned int order_size; + nodemask_t *nodemask; /* * Higher-order allocations are a convenience rather @@ -917,6 +973,10 @@ static struct page **__iommu_dma_alloc_p order_size = 1U << order; if (order_mask > order_size) alloc_flags |= __GFP_NORETRY; + if (use_numa) + nodemask = numa_policy_nodemask(gfp, + dma_iommu_numa_policy(), + i, &nid); page = alloc_pages_node(nid, alloc_flags, order); if (!page) continue; --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -135,6 +135,8 @@ bool vma_policy_mof(struct vm_area_struc extern void numa_default_policy(void); extern void numa_policy_init(void); +nodemask_t *numa_policy_nodemask(gfp_t gfp, struct mempolicy *pol, pgoff_t ilx, + int *nid); extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); @@ -238,6 +240,14 @@ static inline void numa_policy_init(void { } +static inline nodemask_t * +numa_policy_nodemask(gfp_t gfp, struct mempolicy *pol, pgoff_t ilx, int *nid) +{ + *nid = NUMA_NO_NODE; + + return NULL; +} + static inline void numa_default_policy(void) { } --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2090,6 +2090,12 @@ static nodemask_t *policy_nodemask(gfp_t return nodemask; } +nodemask_t *numa_policy_nodemask(gfp_t gfp, struct mempolicy *pol, pgoff_t ilx, + int *nid) +{ + return policy_nodemask(gfp, pol, ilx, nid); +} + #ifdef CONFIG_HUGETLBFS /* * huge_node(@vma, @addr, @gfp_flags, @mpol)