Skip to content

Commit a288020

Browse files
Kiryl Shutsemauakpm00
authored andcommitted
mm/rmap: fix a mlock race condition in folio_referenced_one()
The mlock_vma_folio() function requires the page table lock to be held in order to safely mlock the folio. However, folio_referenced_one() mlocks a large folios outside of the page_vma_mapped_walk() loop where the page table lock has already been dropped. Rework the mlock logic to use the same code path inside the loop for both large and small folios. Use PVMW_PGTABLE_CROSSED to detect when the folio is mapped across a page table boundary. [akpm@linux-foundation.org: s/CROSSSED/CROSSED/] Link: https://lkml.kernel.org/r/20250923110711.690639-3-kirill@shutemov.name Signed-off-by: Kiryl Shutsemau <kas@kernel.org> Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: David Hildenbrand <david@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 2db5798 commit a288020

1 file changed

Lines changed: 20 additions & 37 deletions

File tree

mm/rmap.c

Lines changed: 20 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -850,34 +850,34 @@ static bool folio_referenced_one(struct folio *folio,
850850
{
851851
struct folio_referenced_arg *pra = arg;
852852
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
853-
int referenced = 0;
854-
unsigned long start = address, ptes = 0;
853+
int ptes = 0, referenced = 0;
855854

856855
while (page_vma_mapped_walk(&pvmw)) {
857856
address = pvmw.address;
858857

859858
if (vma->vm_flags & VM_LOCKED) {
860-
if (!folio_test_large(folio) || !pvmw.pte) {
861-
/* Restore the mlock which got missed */
862-
mlock_vma_folio(folio, vma);
863-
page_vma_mapped_walk_done(&pvmw);
864-
pra->vm_flags |= VM_LOCKED;
865-
return false; /* To break the loop */
866-
}
859+
ptes++;
860+
pra->mapcount--;
861+
862+
/* Only mlock fully mapped pages */
863+
if (pvmw.pte && ptes != pvmw.nr_pages)
864+
continue;
865+
867866
/*
868-
* For large folio fully mapped to VMA, will
869-
* be handled after the pvmw loop.
867+
* All PTEs must be protected by page table lock in
868+
* order to mlock the page.
870869
*
871-
* For large folio cross VMA boundaries, it's
872-
* expected to be picked by page reclaim. But
873-
* should skip reference of pages which are in
874-
* the range of VM_LOCKED vma. As page reclaim
875-
* should just count the reference of pages out
876-
* the range of VM_LOCKED vma.
870+
* If page table boundary has been cross, current ptl
871+
* only protect part of ptes.
877872
*/
878-
ptes++;
879-
pra->mapcount--;
880-
continue;
873+
if (pvmw.flags & PVMW_PGTABLE_CROSSED)
874+
continue;
875+
876+
/* Restore the mlock which got missed */
877+
mlock_vma_folio(folio, vma);
878+
page_vma_mapped_walk_done(&pvmw);
879+
pra->vm_flags |= VM_LOCKED;
880+
return false; /* To break the loop */
881881
}
882882

883883
/*
@@ -913,23 +913,6 @@ static bool folio_referenced_one(struct folio *folio,
913913
pra->mapcount--;
914914
}
915915

916-
if ((vma->vm_flags & VM_LOCKED) &&
917-
folio_test_large(folio) &&
918-
folio_within_vma(folio, vma)) {
919-
unsigned long s_align, e_align;
920-
921-
s_align = ALIGN_DOWN(start, PMD_SIZE);
922-
e_align = ALIGN_DOWN(start + folio_size(folio) - 1, PMD_SIZE);
923-
924-
/* folio doesn't cross page table boundary and fully mapped */
925-
if ((s_align == e_align) && (ptes == folio_nr_pages(folio))) {
926-
/* Restore the mlock which got missed */
927-
mlock_vma_folio(folio, vma);
928-
pra->vm_flags |= VM_LOCKED;
929-
return false; /* To break the loop */
930-
}
931-
}
932-
933916
if (referenced)
934917
folio_clear_idle(folio);
935918
if (folio_test_clear_young(folio))

0 commit comments

Comments
 (0)