Skip to content

Commit 1cf1372

Browse files
Alex ShiExactExampl
authored andcommitted
mm/lru: introduce TestClearPageLRU()
Currently lru_lock still guards both lru list and page's lru bit, that's ok. but if we want to use specific lruvec lock on the page, we need to pin down the page's lruvec/memcg during locking. Just taking lruvec lock first may be undermined by the page's memcg charge/migration. To fix this problem, we will clear the lru bit out of locking and use it as pin down action to block the page isolation in memcg changing. So now a standard steps of page isolation is following: 1, get_page(); #pin the page avoid to be free 2, TestClearPageLRU(); #block other isolation like memcg change 3, spin_lock on lru_lock; #serialize lru list access 4, delete page from lru list; This patch start with the first part: TestClearPageLRU, which combines PageLRU check and ClearPageLRU into a macro func TestClearPageLRU. This function will be used as page isolation precondition to prevent other isolations some where else. Then there are may !PageLRU page on lru list, need to remove BUG() checking accordingly. There 2 rules for lru bit now: 1, the lru bit still indicate if a page on lru list, just in some temporary moment(isolating), the page may have no lru bit when it's on lru list. but the page still must be on lru list when the lru bit set. 2, have to remove lru bit before delete it from lru list. As Andrew Morton mentioned this change would dirty cacheline for a page which isn't on the LRU. But the loss would be acceptable in Rong Chen <rong.a.chen@intel.com> report: https://lore.kernel.org/lkml/20200304090301.GB5972@shao2-debian/ Link: https://lkml.kernel.org/r/1604566549-62481-15-git-send-email-alex.shi@linux.alibaba.com Suggested-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> Acked-by: Hugh Dickins <hughd@google.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Michal Hocko <mhocko@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Alexander Duyck <alexander.duyck@gmail.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Jann Horn <jannh@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Kirill A. Shutemov <kirill@shutemov.name> Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Mika Penttilä <mika.penttila@nextfour.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Shakeel Butt <shakeelb@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Wei Yang <richard.weiyang@gmail.com> Cc: Yang Shi <yang.shi@linux.alibaba.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Change-Id: Ib8ebcaceec8676de729b41bb0ba330e07472acaf Signed-off-by: fadlyas07 <mhmmdfdlyas@gmail.com> Signed-off-by: HeroBuxx <me@herobuxx.me>
1 parent dda5bc8 commit 1cf1372

3 files changed

Lines changed: 22 additions & 21 deletions

File tree

include/linux/page-flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ PAGEFLAG(Referenced, referenced, PF_HEAD)
279279
PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
280280
__CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
281281
PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
282+
TESTCLEARFLAG(LRU, lru, PF_HEAD)
282283
PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
283284
TESTCLEARFLAG(Active, active, PF_HEAD)
284285
PAGEFLAG(Workingset, workingset, PF_HEAD)

mm/mlock.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,12 @@ void mlock_vma_page(struct page *page)
108108
*/
109109
static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
110110
{
111-
if (PageLRU(page)) {
111+
if (TestClearPageLRU(page)) {
112112
struct lruvec *lruvec;
113113

114114
lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
115115
if (getpage)
116116
get_page(page);
117-
ClearPageLRU(page);
118117
del_page_from_lru_list(page, lruvec);
119118
return true;
120119
}

mm/vmscan.c

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,7 +1629,7 @@ unsigned long reclaim_pages(struct list_head *page_list)
16291629
*/
16301630
int __isolate_lru_page(struct page *page, isolate_mode_t mode)
16311631
{
1632-
int ret = -EINVAL;
1632+
int ret = -EBUSY;
16331633

16341634
/* Only take pages on the LRU. */
16351635
if (!PageLRU(page))
@@ -1639,8 +1639,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
16391639
if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
16401640
return ret;
16411641

1642-
ret = -EBUSY;
1643-
16441642
/*
16451643
* To minimise LRU disruption, the caller can indicate that it only
16461644
* wants to isolate pages it will be able to operate on without
@@ -1687,8 +1685,10 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
16871685
* sure the page is not being freed elsewhere -- the
16881686
* page release code relies on it.
16891687
*/
1690-
ClearPageLRU(page);
1691-
ret = 0;
1688+
if (TestClearPageLRU(page))
1689+
ret = 0;
1690+
else
1691+
put_page(page);
16921692
}
16931693

16941694
return ret;
@@ -1755,8 +1755,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
17551755
page = lru_to_page(src);
17561756
prefetchw_prev_lru_page(page, src, flags);
17571757

1758-
VM_BUG_ON_PAGE(!PageLRU(page), page);
1759-
17601758
if (page_zonenum(page) > sc->reclaim_idx) {
17611759
list_move(&page->lru, &pages_skipped);
17621760
nr_skipped[page_zonenum(page)]++;
@@ -1847,20 +1845,18 @@ int isolate_lru_page(struct page *page)
18471845
VM_BUG_ON_PAGE(!page_count(page), page);
18481846
WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
18491847

1850-
if (PageLRU(page)) {
1848+
if (TestClearPageLRU(page)) {
18511849
struct zone *zone = page_zone(page);
18521850
struct lruvec *lruvec;
18531851

1854-
spin_lock_irq(zone_lru_lock(zone));
1852+
get_page(page);
18551853
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
1856-
if (PageLRU(page)) {
1857-
get_page(page);
1858-
ClearPageLRU(page);
1859-
del_page_from_lru_list(page, lruvec);
1860-
ret = 0;
1861-
}
1854+
spin_lock_irq(zone_lru_lock(zone));
1855+
del_page_from_lru_list(page, lruvec);
18621856
spin_unlock_irq(zone_lru_lock(zone));
1857+
ret = 0;
18631858
}
1859+
18641860
return ret;
18651861
}
18661862

@@ -6941,6 +6937,11 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
69416937
struct pglist_data *pagepgdat = page_pgdat(page);
69426938

69436939
pgscanned++;
6940+
6941+
/* block memcg migration during page moving between lru */
6942+
if (!TestClearPageLRU(page))
6943+
continue;
6944+
69446945
if (pagepgdat != pgdat) {
69456946
if (pgdat)
69466947
spin_unlock_irq(&pgdat->lru_lock);
@@ -6949,21 +6950,21 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
69496950
}
69506951
lruvec = mem_cgroup_page_lruvec(page, pgdat);
69516952

6952-
if (!PageLRU(page) || !PageUnevictable(page))
6953-
continue;
6954-
6955-
if (page_evictable(page)) {
6953+
if (page_evictable(page) && PageUnevictable(page)) {
69566954
del_page_from_lru_list(page, lruvec);
69576955
ClearPageUnevictable(page);
69586956
add_page_to_lru_list(page, lruvec);
69596957
pgrescued++;
69606958
}
6959+
SetPageLRU(page);
69616960
}
69626961

69636962
if (pgdat) {
69646963
__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
69656964
__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
69666965
spin_unlock_irq(&pgdat->lru_lock);
6966+
} else if (pgscanned) {
6967+
count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
69676968
}
69686969
}
69696970
#endif /* CONFIG_SHMEM */

0 commit comments

Comments
 (0)