pgtable.h source code [linux/include/linux/pgtable.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	#ifndef _LINUX_PGTABLE_H
3	#define _LINUX_PGTABLE_H
4
5	#include <linux/pfn.h>
6	#include <asm/pgtable.h>
7
8	#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
9	#define PUD_ORDER (PUD_SHIFT - PAGE_SHIFT)
10
11	#ifndef __ASSEMBLY__
12	#ifdef CONFIG_MMU
13
14	#include <linux/mm_types.h>
15	#include <linux/bug.h>
16	#include <linux/errno.h>
17	#include <asm-generic/pgtable_uffd.h>
18	#include <linux/page_table_check.h>
19
20	#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
21	defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
22	#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
23	#endif
24
25	/*
26	* On almost all architectures and configurations, 0 can be used as the
27	* upper ceiling to free_pgtables(): on many architectures it has the same
28	* effect as using TASK_SIZE. However, there is one configuration which
29	* must impose a more careful limit, to avoid freeing kernel pgtables.
30	*/
31	#ifndef USER_PGTABLES_CEILING
32	#define USER_PGTABLES_CEILING 0UL
33	#endif
34
35	/*
36	* This defines the first usable user address. Platforms
37	* can override its value with custom FIRST_USER_ADDRESS
38	* defined in their respective <asm/pgtable.h>.
39	*/
40	#ifndef FIRST_USER_ADDRESS
41	#define FIRST_USER_ADDRESS 0UL
42	#endif
43
44	/*
45	* This defines the generic helper for accessing PMD page
46	* table page. Although platforms can still override this
47	* via their respective <asm/pgtable.h>.
48	*/
49	#ifndef pmd_pgtable
50	#define pmd_pgtable(pmd) pmd_page(pmd)
51	#endif
52
53	#define pmd_folio(pmd) page_folio(pmd_page(pmd))
54
55	/*
56	* A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD]
57	*
58	* The pXx_index() functions return the index of the entry in the page
59	* table page which would control the given virtual address
60	*
61	* As these functions may be used by the same code for different levels of
62	* the page table folding, they are always available, regardless of
63	* CONFIG_PGTABLE_LEVELS value. For the folded levels they simply return 0
64	* because in such cases PTRS_PER_PxD equals 1.
65	*/
66
67	static inline unsigned long pte_index(unsigned long address)
68	{
69	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - `1`);
70	}
71
72	#ifndef pmd_index
73	static inline unsigned long pmd_index(unsigned long address)
74	{
75	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - `1`);
76	}
77	#define pmd_index pmd_index
78	#endif
79
80	#ifndef pud_index
81	static inline unsigned long pud_index(unsigned long address)
82	{
83	return (address >> PUD_SHIFT) & (PTRS_PER_PUD - `1`);
84	}
85	#define pud_index pud_index
86	#endif
87
88	#ifndef pgd_index
89	/ Must be a compile-time constant, so implement it as a macro /
90	#define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
91	#endif
92
93	#ifndef kernel_pte_init
94	static inline void kernel_pte_init(void *addr)
95	{
96	}
97	#define kernel_pte_init kernel_pte_init
98	#endif
99
100	#ifndef pmd_init
101	static inline void pmd_init(void *addr)
102	{
103	}
104	#define pmd_init pmd_init
105	#endif
106
107	#ifndef pud_init
108	static inline void pud_init(void *addr)
109	{
110	}
111	#define pud_init pud_init
112	#endif
113
114	#ifndef pte_offset_kernel
115	static inline pte_t pte_offset_kernel(pmd_t pmd, unsigned long address)
116	{
117	return (pte_t )pmd_page_vaddr(pmd: pmd) + pte_index(address);
118	}
119	#define pte_offset_kernel pte_offset_kernel
120	#endif
121
122	#ifdef CONFIG_HIGHPTE
123	#define __pte_map(pmd, address) \
124	((pte_t )kmap_local_page(pmd_page((pmd))) + pte_index((address)))
125	#define pte_unmap(pte) do { \
126	kunmap_local((pte)); \
127	rcu_read_unlock(); \
128	} while (0)
129	#else
130	static inline pte_t __pte_map(pmd_t pmd, unsigned long address)
131	{
132	return pte_offset_kernel(pmd, address);
133	}
134	static inline void pte_unmap(pte_t *pte)
135	{
136	rcu_read_unlock();
137	}
138	#endif
139
140	void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
141
142	/ Find an entry in the second-level page table.. /
143	#ifndef pmd_offset
144	static inline pmd_t pmd_offset(pud_t pud, unsigned long address)
145	{
146	return pud_pgtable(pud: *pud) + pmd_index(address);
147	}
148	#define pmd_offset pmd_offset
149	#endif
150
151	#ifndef pud_offset
152	static inline pud_t pud_offset(p4d_t p4d, unsigned long address)
153	{
154	return p4d_pgtable(p4d: *p4d) + pud_index(address);
155	}
156	#define pud_offset pud_offset
157	#endif
158
159	static inline pgd_t pgd_offset_pgd(pgd_t pgd, unsigned long address)
160	{
161	return (pgd + pgd_index(address));
162	};
163
164	/*
165	* a shortcut to get a pgd_t in a given mm
166	*/
167	#ifndef pgd_offset
168	#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
169	#endif
170
171	/*
172	* a shortcut which implies the use of the kernel's pgd, instead
173	* of a process's
174	*/
175	#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
176
177	/*
178	* In many cases it is known that a virtual address is mapped at PMD or PTE
179	* level, so instead of traversing all the page table levels, we can get a
180	* pointer to the PMD entry in user or kernel page table or translate a virtual
181	* address to the pointer in the PTE in the kernel page tables with simple
182	* helpers.
183	*/
184	static inline pmd_t pmd_off(struct* mm_struct mm, unsigned* long va)
185	{
186	return pmd_offset(pud_offset(p4d: p4d_offset(pgd_offset(mm, va), address: va), address: va), address: va);
187	}
188
189	static inline pmd_t pmd_off_k(unsigned* long va)
190	{
191	return pmd_offset(pud_offset(p4d: p4d_offset(pgd_offset_k(va), address: va), address: va), address: va);
192	}
193
194	static inline pte_t virt_to_kpte(unsigned* long vaddr)
195	{
196	pmd_t *pmd = pmd_off_k(va: vaddr);
197
198	return pmd_none(pmd: *pmd) ? NULL : pte_offset_kernel(pmd, address: vaddr);
199	}
200
201	#ifndef pmd_young
202	static inline int pmd_young(pmd_t pmd)
203	{
204	return `0`;
205	}
206	#endif
207
208	#ifndef pmd_dirty
209	static inline int pmd_dirty(pmd_t pmd)
210	{
211	return `0`;
212	}
213	#endif
214
215	/*
216	* A facility to provide lazy MMU batching. This allows PTE updates and
217	* page invalidations to be delayed until a call to leave lazy MMU mode
218	* is issued. Some architectures may benefit from doing this, and it is
219	* beneficial for both shadow and direct mode hypervisors, which may batch
220	* the PTE updates which happen during this window. Note that using this
221	* interface requires that read hazards be removed from the code. A read
222	* hazard could result in the direct mode hypervisor case, since the actual
223	* write to the page tables may not yet have taken place, so reads though
224	* a raw PTE pointer after it has been modified are not guaranteed to be
225	* up to date.
226	*
227	* In the general case, no lock is guaranteed to be held between entry and exit
228	* of the lazy mode. So the implementation must assume preemption may be enabled
229	* and cpu migration is possible; it must take steps to be robust against this.
230	* (In practice, for user PTE updates, the appropriate page table lock(s) are
231	* held, but for kernel PTE updates, no lock is held). Nesting is not permitted
232	* and the mode cannot be used in interrupt context.
233	*/
234	#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
235	#define arch_enter_lazy_mmu_mode() do {} while (0)
236	#define arch_leave_lazy_mmu_mode() do {} while (0)
237	#define arch_flush_lazy_mmu_mode() do {} while (0)
238	#endif
239
240	#ifndef pte_batch_hint
241	/**
242	* pte_batch_hint - Number of pages that can be added to batch without scanning.
243	* @ptep: Page table pointer for the entry.
244	* @pte: Page table entry.
245	*
246	* Some architectures know that a set of contiguous ptes all map the same
247	* contiguous memory with the same permissions. In this case, it can provide a
248	* hint to aid pte batching without the core code needing to scan every pte.
249	*
250	* An architecture implementation may ignore the PTE accessed state. Further,
251	* the dirty state must apply atomically to all the PTEs described by the hint.
252	*
253	* May be overridden by the architecture, else pte_batch_hint is always 1.
254	*/
255	static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
256	{
257	return `1`;
258	}
259	#endif
260
261	#ifndef pte_advance_pfn
262	static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
263	{
264	return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
265	}
266	#endif
267
268	#define pte_next_pfn(pte) pte_advance_pfn(pte, 1)
269
270	#ifndef set_ptes
271	/**
272	* set_ptes - Map consecutive pages to a contiguous range of addresses.
273	* @mm: Address space to map the pages into.
274	* @addr: Address to map the first page at.
275	* @ptep: Page table pointer for the first entry.
276	* @pte: Page table entry for the first page.
277	* @nr: Number of pages to map.
278	*
279	* When nr==1, initial state of pte may be present or not present, and new state
280	* may be present or not present. When nr>1, initial state of all ptes must be
281	* not present, and new state must be present.
282	*
283	* May be overridden by the architecture, or the architecture can define
284	* set_pte() and PFN_PTE_SHIFT.
285	*
286	* Context: The caller holds the page table lock. The pages all belong
287	* to the same folio. The PTEs are all in the same PMD.
288	*/
289	static inline void set_ptes(struct mm_struct mm, unsigned* long addr,
290	pte_t ptep, pte_t pte, unsigned* int nr)
291	{
292	page_table_check_ptes_set(mm, ptep, pte, nr);
293
294	for (;;) {
295	set_pte(ptep, pte);
296	if (--nr == `0`)
297	break;
298	ptep++;
299	pte = pte_next_pfn(pte);
300	}
301	}
302	#endif
303	#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
304
305	#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
306	extern int ptep_set_access_flags(struct vm_area_struct *vma,
307	unsigned long address, pte_t *ptep,
308	pte_t entry, int dirty);
309	#endif
310
311	#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
312	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
313	extern int pmdp_set_access_flags(struct vm_area_struct *vma,
314	unsigned long address, pmd_t *pmdp,
315	pmd_t entry, int dirty);
316	extern int pudp_set_access_flags(struct vm_area_struct *vma,
317	unsigned long address, pud_t *pudp,
318	pud_t entry, int dirty);
319	#else
320	static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
321	unsigned long address, pmd_t *pmdp,
322	pmd_t entry, int dirty)
323	{
324	BUILD_BUG();
325	return `0`;
326	}
327	static inline int pudp_set_access_flags(struct vm_area_struct *vma,
328	unsigned long address, pud_t *pudp,
329	pud_t entry, int dirty)
330	{
331	BUILD_BUG();
332	return `0`;
333	}
334	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
335	#endif
336
337	#ifndef ptep_get
338	static inline pte_t ptep_get(pte_t *ptep)
339	{
340	return READ_ONCE(*ptep);
341	}
342	#endif
343
344	#ifndef pmdp_get
345	static inline pmd_t pmdp_get(pmd_t *pmdp)
346	{
347	return READ_ONCE(*pmdp);
348	}
349	#endif
350
351	#ifndef pudp_get
352	static inline pud_t pudp_get(pud_t *pudp)
353	{
354	return READ_ONCE(*pudp);
355	}
356	#endif
357
358	#ifndef p4dp_get
359	static inline p4d_t p4dp_get(p4d_t *p4dp)
360	{
361	return READ_ONCE(*p4dp);
362	}
363	#endif
364
365	#ifndef pgdp_get
366	static inline pgd_t pgdp_get(pgd_t *pgdp)
367	{
368	return READ_ONCE(*pgdp);
369	}
370	#endif
371
372	#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
373	static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
374	unsigned long address,
375	pte_t *ptep)
376	{
377	pte_t pte = ptep_get(ptep);
378	int r = `1`;
379	if (!pte_young(pte))
380	r = `0`;
381	else
382	set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
383	return r;
384	}
385	#endif
386
387	#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
388	#if defined(CONFIG_TRANSPARENT_HUGEPAGE) \|\| defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
389	static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
390	unsigned long address,
391	pmd_t *pmdp)
392	{
393	pmd_t pmd = *pmdp;
394	int r = `1`;
395	if (!pmd_young(pmd))
396	r = `0`;
397	else
398	set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
399	return r;
400	}
401	#else
402	static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
403	unsigned long address,
404	pmd_t *pmdp)
405	{
406	BUILD_BUG();
407	return `0`;
408	}
409	#endif /* CONFIG_TRANSPARENT_HUGEPAGE \|\| CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
410	#endif
411
412	#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
413	int ptep_clear_flush_young(struct vm_area_struct *vma,
414	unsigned long address, pte_t *ptep);
415	#endif
416
417	#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
418	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
419	extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
420	unsigned long address, pmd_t *pmdp);
421	#else
422	/*
423	* Despite relevant to THP only, this API is called from generic rmap code
424	* under PageTransHuge(), hence needs a dummy implementation for !THP
425	*/
426	static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
427	unsigned long address, pmd_t *pmdp)
428	{
429	BUILD_BUG();
430	return `0`;
431	}
432	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
433	#endif
434
435	#ifndef arch_has_hw_nonleaf_pmd_young
436	/*
437	* Return whether the accessed bit in non-leaf PMD entries is supported on the
438	* local CPU.
439	*/
440	static inline bool arch_has_hw_nonleaf_pmd_young(void)
441	{
442	return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
443	}
444	#endif
445
446	#ifndef arch_has_hw_pte_young
447	/*
448	* Return whether the accessed bit is supported on the local CPU.
449	*
450	* This stub assumes accessing through an old PTE triggers a page fault.
451	* Architectures that automatically set the access bit should overwrite it.
452	*/
453	static inline bool arch_has_hw_pte_young(void)
454	{
455	return IS_ENABLED(CONFIG_ARCH_HAS_HW_PTE_YOUNG);
456	}
457	#endif
458
459	#ifndef arch_check_zapped_pte
460	static inline void arch_check_zapped_pte(struct vm_area_struct *vma,
461	pte_t pte)
462	{
463	}
464	#endif
465
466	#ifndef arch_check_zapped_pmd
467	static inline void arch_check_zapped_pmd(struct vm_area_struct *vma,
468	pmd_t pmd)
469	{
470	}
471	#endif
472
473	#ifndef arch_check_zapped_pud
474	static inline void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud)
475	{
476	}
477	#endif
478
479	#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
480	static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
481	unsigned long address,
482	pte_t *ptep)
483	{
484	pte_t pte = ptep_get(ptep);
485	pte_clear(mm, address, ptep);
486	page_table_check_pte_clear(mm, pte);
487	return pte;
488	}
489	#endif
490
491	#ifndef clear_young_dirty_ptes
492	/**
493	* clear_young_dirty_ptes - Mark PTEs that map consecutive pages of the
494	* same folio as old/clean.
495	* @mm: Address space the pages are mapped into.
496	* @addr: Address the first page is mapped at.
497	* @ptep: Page table pointer for the first entry.
498	* @nr: Number of entries to mark old/clean.
499	* @flags: Flags to modify the PTE batch semantics.
500	*
501	* May be overridden by the architecture; otherwise, implemented by
502	* get_and_clear/modify/set for each pte in the range.
503	*
504	* Note that PTE bits in the PTE range besides the PFN can differ. For example,
505	* some PTEs might be write-protected.
506	*
507	* Context: The caller holds the page table lock. The PTEs map consecutive
508	* pages that belong to the same folio. The PTEs are all in the same PMD.
509	*/
510	static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
511	unsigned long addr, pte_t *ptep,
512	unsigned int nr, cydp_t flags)
513	{
514	pte_t pte;
515
516	for (;;) {
517	if (flags == CYDP_CLEAR_YOUNG)
518	ptep_test_and_clear_young(vma, addr, ptep);
519	else {
520	pte = ptep_get_and_clear(mm: vma->vm_mm, addr, ptep);
521	if (flags & CYDP_CLEAR_YOUNG)
522	pte = pte_mkold(pte);
523	if (flags & CYDP_CLEAR_DIRTY)
524	pte = pte_mkclean(pte);
525	set_pte_at(vma->vm_mm, addr, ptep, pte);
526	}
527	if (--nr == `0`)
528	break;
529	ptep++;
530	addr += PAGE_SIZE;
531	}
532	}
533	#endif
534
535	static inline void ptep_clear(struct mm_struct mm, unsigned* long addr,
536	pte_t *ptep)
537	{
538	pte_t pte = ptep_get(ptep);
539
540	pte_clear(mm, addr, ptep);
541	/*
542	* No need for ptep_get_and_clear(): page table check doesn't care about
543	* any bits that could have been set by HW concurrently.
544	*/
545	page_table_check_pte_clear(mm, pte);
546	}
547
548	#ifdef CONFIG_GUP_GET_PXX_LOW_HIGH
549	/*
550	* For walking the pagetables without holding any locks. Some architectures
551	* (eg x86-32 PAE) cannot load the entries atomically without using expensive
552	* instructions. We are guaranteed that a PTE will only either go from not
553	* present to present, or present to not present -- it will not switch to a
554	* completely different present page without a TLB flush inbetween; which we
555	* are blocking by holding interrupts off.
556	*
557	* Setting ptes from not present to present goes:
558	*
559	* ptep->pte_high = h;
560	* smp_wmb();
561	* ptep->pte_low = l;
562	*
563	* And present to not present goes:
564	*
565	* ptep->pte_low = 0;
566	* smp_wmb();
567	* ptep->pte_high = 0;
568	*
569	* We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
570	* We load pte_high after loading pte_low, which ensures we don't see an older
571	* value of pte_high. Then we recheck pte_low, which ensures that we haven't
572	* picked up a changed pte high. We might have gotten rubbish values from
573	* pte_low and pte_high, but we are guaranteed that pte_low will not have the
574	* present bit set unless it is 'l'. Because get_user_pages_fast() only
575	* operates on present ptes we're safe.
576	*/
577	static inline pte_t ptep_get_lockless(pte_t *ptep)
578	{
579	pte_t pte;
580
581	do {
582	pte.pte_low = ptep->pte_low;
583	smp_rmb();
584	pte.pte_high = ptep->pte_high;
585	smp_rmb();
586	} while (unlikely(pte.pte_low != ptep->pte_low));
587
588	return pte;
589	}
590	#define ptep_get_lockless ptep_get_lockless
591
592	#if CONFIG_PGTABLE_LEVELS > 2
593	static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
594	{
595	pmd_t pmd;
596
597	do {
598	pmd.pmd_low = pmdp->pmd_low;
599	smp_rmb();
600	pmd.pmd_high = pmdp->pmd_high;
601	smp_rmb();
602	} while (unlikely(pmd.pmd_low != pmdp->pmd_low));
603
604	return pmd;
605	}
606	#define pmdp_get_lockless pmdp_get_lockless
607	#define pmdp_get_lockless_sync() tlb_remove_table_sync_one()
608	#endif /* CONFIG_PGTABLE_LEVELS > 2 */
609	#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */
610
611	/*
612	* We require that the PTE can be read atomically.
613	*/
614	#ifndef ptep_get_lockless
615	static inline pte_t ptep_get_lockless(pte_t *ptep)
616	{
617	return ptep_get(ptep);
618	}
619	#endif
620
621	#ifndef pmdp_get_lockless
622	static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
623	{
624	return pmdp_get(pmdp);
625	}
626	static inline void pmdp_get_lockless_sync(void)
627	{
628	}
629	#endif
630
631	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
632	#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
633	static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
634	unsigned long address,
635	pmd_t *pmdp)
636	{
637	pmd_t pmd = *pmdp;
638
639	pmd_clear(pmdp);
640	page_table_check_pmd_clear(mm, pmd);
641
642	return pmd;
643	}
644	#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
645	#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
646	static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
647	unsigned long address,
648	pud_t *pudp)
649	{
650	pud_t pud = *pudp;
651
652	pud_clear(pudp);
653	page_table_check_pud_clear(mm, pud);
654
655	return pud;
656	}
657	#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
658	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
659
660	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
661	#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
662	static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
663	unsigned long address, pmd_t *pmdp,
664	int full)
665	{
666	return pmdp_huge_get_and_clear(mm: vma->vm_mm, addr: address, pmdp);
667	}
668	#endif
669
670	#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
671	static inline pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
672	unsigned long address, pud_t *pudp,
673	int full)
674	{
675	return pudp_huge_get_and_clear(mm: vma->vm_mm, addr: address, pudp);
676	}
677	#endif
678	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
679
680	#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
681	static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
682	unsigned long address, pte_t *ptep,
683	int full)
684	{
685	return ptep_get_and_clear(mm, address, ptep);
686	}
687	#endif
688
689	#ifndef get_and_clear_full_ptes
690	/**
691	* get_and_clear_full_ptes - Clear present PTEs that map consecutive pages of
692	* the same folio, collecting dirty/accessed bits.
693	* @mm: Address space the pages are mapped into.
694	* @addr: Address the first page is mapped at.
695	* @ptep: Page table pointer for the first entry.
696	* @nr: Number of entries to clear.
697	* @full: Whether we are clearing a full mm.
698	*
699	* May be overridden by the architecture; otherwise, implemented as a simple
700	* loop over ptep_get_and_clear_full(), merging dirty/accessed bits into the
701	* returned PTE.
702	*
703	* Note that PTE bits in the PTE range besides the PFN can differ. For example,
704	* some PTEs might be write-protected.
705	*
706	* Context: The caller holds the page table lock. The PTEs map consecutive
707	* pages that belong to the same folio. The PTEs are all in the same PMD.
708	*/
709	static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
710	unsigned long addr, pte_t ptep, unsigned* int nr, int full)
711	{
712	pte_t pte, tmp_pte;
713
714	pte = ptep_get_and_clear_full(mm, addr, ptep, full);
715	while (--nr) {
716	ptep++;
717	addr += PAGE_SIZE;
718	tmp_pte = ptep_get_and_clear_full(mm, addr, ptep, full);
719	if (pte_dirty(pte: tmp_pte))
720	pte = pte_mkdirty(pte);
721	if (pte_young(pte: tmp_pte))
722	pte = pte_mkyoung(pte);
723	}
724	return pte;
725	}
726	#endif
727
728	#ifndef clear_full_ptes
729	/**
730	* clear_full_ptes - Clear present PTEs that map consecutive pages of the same
731	* folio.
732	* @mm: Address space the pages are mapped into.
733	* @addr: Address the first page is mapped at.
734	* @ptep: Page table pointer for the first entry.
735	* @nr: Number of entries to clear.
736	* @full: Whether we are clearing a full mm.
737	*
738	* May be overridden by the architecture; otherwise, implemented as a simple
739	* loop over ptep_get_and_clear_full().
740	*
741	* Note that PTE bits in the PTE range besides the PFN can differ. For example,
742	* some PTEs might be write-protected.
743	*
744	* Context: The caller holds the page table lock. The PTEs map consecutive
745	* pages that belong to the same folio. The PTEs are all in the same PMD.
746	*/
747	static inline void clear_full_ptes(struct mm_struct mm, unsigned* long addr,
748	pte_t ptep, unsigned* int nr, int full)
749	{
750	for (;;) {
751	ptep_get_and_clear_full(mm, addr, ptep, full);
752	if (--nr == `0`)
753	break;
754	ptep++;
755	addr += PAGE_SIZE;
756	}
757	}
758	#endif
759
760	/*
761	* If two threads concurrently fault at the same page, the thread that
762	* won the race updates the PTE and its local TLB/Cache. The other thread
763	* gives up, simply does nothing, and continues; on architectures where
764	* software can update TLB, local TLB can be updated here to avoid next page
765	* fault. This function updates TLB only, do nothing with cache or others.
766	* It is the difference with function update_mmu_cache.
767	*/
768	#ifndef update_mmu_tlb_range
769	static inline void update_mmu_tlb_range(struct vm_area_struct *vma,
770	unsigned long address, pte_t ptep, unsigned* int nr)
771	{
772	}
773	#endif
774
775	static inline void update_mmu_tlb(struct vm_area_struct *vma,
776	unsigned long address, pte_t *ptep)
777	{
778	update_mmu_tlb_range(vma, address, ptep, nr: `1`);
779	}
780
781	/*
782	* Some architectures may be able to avoid expensive synchronization
783	* primitives when modifications are made to PTE's which are already
784	* not present, or in the process of an address space destruction.
785	*/
786	#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
787	static inline void pte_clear_not_present_full(struct mm_struct *mm,
788	unsigned long address,
789	pte_t *ptep,
790	int full)
791	{
792	pte_clear(mm, addr: address, ptep);
793	}
794	#endif
795
796	#ifndef clear_not_present_full_ptes
797	/**
798	* clear_not_present_full_ptes - Clear multiple not present PTEs which are
799	* consecutive in the pgtable.
800	* @mm: Address space the ptes represent.
801	* @addr: Address of the first pte.
802	* @ptep: Page table pointer for the first entry.
803	* @nr: Number of entries to clear.
804	* @full: Whether we are clearing a full mm.
805	*
806	* May be overridden by the architecture; otherwise, implemented as a simple
807	* loop over pte_clear_not_present_full().
808	*
809	* Context: The caller holds the page table lock. The PTEs are all not present.
810	* The PTEs are all in the same PMD.
811	*/
812	static inline void clear_not_present_full_ptes(struct mm_struct *mm,
813	unsigned long addr, pte_t ptep, unsigned* int nr, int full)
814	{
815	for (;;) {
816	pte_clear_not_present_full(mm, address: addr, ptep, full);
817	if (--nr == `0`)
818	break;
819	ptep++;
820	addr += PAGE_SIZE;
821	}
822	}
823	#endif
824
825	#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
826	extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
827	unsigned long address,
828	pte_t *ptep);
829	#endif
830
831	#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
832	extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
833	unsigned long address,
834	pmd_t *pmdp);
835	extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
836	unsigned long address,
837	pud_t *pudp);
838	#endif
839
840	#ifndef pte_mkwrite
841	static inline pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
842	{
843	return pte_mkwrite_novma(pte);
844	}
845	#endif
846
847	#if defined(CONFIG_ARCH_WANT_PMD_MKWRITE) && !defined(pmd_mkwrite)
848	static inline pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
849	{
850	return pmd_mkwrite_novma(pmd);
851	}
852	#endif
853
854	#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
855	struct mm_struct;
856	static inline void ptep_set_wrprotect(struct mm_struct mm, unsigned* long address, pte_t *ptep)
857	{
858	pte_t old_pte = ptep_get(ptep);
859	set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
860	}
861	#endif
862
863	#ifndef wrprotect_ptes
864	/**
865	* wrprotect_ptes - Write-protect PTEs that map consecutive pages of the same
866	* folio.
867	* @mm: Address space the pages are mapped into.
868	* @addr: Address the first page is mapped at.
869	* @ptep: Page table pointer for the first entry.
870	* @nr: Number of entries to write-protect.
871	*
872	* May be overridden by the architecture; otherwise, implemented as a simple
873	* loop over ptep_set_wrprotect().
874	*
875	* Note that PTE bits in the PTE range besides the PFN can differ. For example,
876	* some PTEs might be write-protected.
877	*
878	* Context: The caller holds the page table lock. The PTEs map consecutive
879	* pages that belong to the same folio. The PTEs are all in the same PMD.
880	*/
881	static inline void wrprotect_ptes(struct mm_struct mm, unsigned* long addr,
882	pte_t ptep, unsigned* int nr)
883	{
884	for (;;) {
885	ptep_set_wrprotect(mm, addr, ptep);
886	if (--nr == `0`)
887	break;
888	ptep++;
889	addr += PAGE_SIZE;
890	}
891	}
892	#endif
893
894	/*
895	* On some architectures hardware does not set page access bit when accessing
896	* memory page, it is responsibility of software setting this bit. It brings
897	* out extra page fault penalty to track page access bit. For optimization page
898	* access bit can be set during all page fault flow on these arches.
899	* To be differentiate with macro pte_mkyoung, this macro is used on platforms
900	* where software maintains page access bit.
901	*/
902	#ifndef pte_sw_mkyoung
903	static inline pte_t pte_sw_mkyoung(pte_t pte)
904	{
905	return pte;
906	}
907	#define pte_sw_mkyoung pte_sw_mkyoung
908	#endif
909
910	#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
911	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
912	static inline void pmdp_set_wrprotect(struct mm_struct *mm,
913	unsigned long address, pmd_t *pmdp)
914	{
915	pmd_t old_pmd = *pmdp;
916	set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
917	}
918	#else
919	static inline void pmdp_set_wrprotect(struct mm_struct *mm,
920	unsigned long address, pmd_t *pmdp)
921	{
922	BUILD_BUG();
923	}
924	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
925	#endif
926	#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
927	#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
928	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
929	static inline void pudp_set_wrprotect(struct mm_struct *mm,
930	unsigned long address, pud_t *pudp)
931	{
932	pud_t old_pud = *pudp;
933
934	set_pud_at(mm, addr: address, pudp, pud: pud_wrprotect(pud: old_pud));
935	}
936	#else
937	static inline void pudp_set_wrprotect(struct mm_struct *mm,
938	unsigned long address, pud_t *pudp)
939	{
940	BUILD_BUG();
941	}
942	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
943	#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
944	#endif
945
946	#ifndef pmdp_collapse_flush
947	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
948	extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
949	unsigned long address, pmd_t *pmdp);
950	#else
951	static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
952	unsigned long address,
953	pmd_t *pmdp)
954	{
955	BUILD_BUG();
956	return *pmdp;
957	}
958	#define pmdp_collapse_flush pmdp_collapse_flush
959	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
960	#endif
961
962	#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
963	extern void pgtable_trans_huge_deposit(struct mm_struct mm, pmd_t pmdp,
964	pgtable_t pgtable);
965	#endif
966
967	#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
968	extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct mm, pmd_t pmdp);
969	#endif
970
971	#ifndef arch_needs_pgtable_deposit
972	#define arch_needs_pgtable_deposit() (false)
973	#endif
974
975	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
976	/*
977	* This is an implementation of pmdp_establish() that is only suitable for an
978	* architecture that doesn't have hardware dirty/accessed bits. In this case we
979	* can't race with CPU which sets these bits and non-atomic approach is fine.
980	*/
981	static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
982	unsigned long address, pmd_t *pmdp, pmd_t pmd)
983	{
984	pmd_t old_pmd = *pmdp;
985	set_pmd_at(mm: vma->vm_mm, addr: address, pmdp, pmd);
986	return old_pmd;
987	}
988	#endif
989
990	#ifndef __HAVE_ARCH_PMDP_INVALIDATE
991	extern pmd_t pmdp_invalidate(struct vm_area_struct vma, unsigned* long address,
992	pmd_t *pmdp);
993	#endif
994
995	#ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD
996
997	/*
998	* pmdp_invalidate_ad() invalidates the PMD while changing a transparent
999	* hugepage mapping in the page tables. This function is similar to
1000	* pmdp_invalidate(), but should only be used if the access and dirty bits would
1001	* not be cleared by the software in the new PMD value. The function ensures
1002	* that hardware changes of the access and dirty bits updates would not be lost.
1003	*
1004	* Doing so can allow in certain architectures to avoid a TLB flush in most
1005	* cases. Yet, another TLB flush might be necessary later if the PMD update
1006	* itself requires such flush (e.g., if protection was set to be stricter). Yet,
1007	* even when a TLB flush is needed because of the update, the caller may be able
1008	* to batch these TLB flushing operations, so fewer TLB flush operations are
1009	* needed.
1010	*/
1011	extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
1012	unsigned long address, pmd_t *pmdp);
1013	#endif
1014
1015	#ifndef __HAVE_ARCH_PTE_SAME
1016	static inline int pte_same(pte_t pte_a, pte_t pte_b)
1017	{
1018	return pte_val(pte_a) == pte_val(pte_b);
1019	}
1020	#endif
1021
1022	#ifndef __HAVE_ARCH_PTE_UNUSED
1023	/*
1024	* Some architectures provide facilities to virtualization guests
1025	* so that they can flag allocated pages as unused. This allows the
1026	* host to transparently reclaim unused pages. This function returns
1027	* whether the pte's page is unused.
1028	*/
1029	static inline int pte_unused(pte_t pte)
1030	{
1031	return `0`;
1032	}
1033	#endif
1034
1035	#ifndef pte_access_permitted
1036	#define pte_access_permitted(pte, write) \
1037	(pte_present(pte) && (!(write) \|\| pte_write(pte)))
1038	#endif
1039
1040	#ifndef pmd_access_permitted
1041	#define pmd_access_permitted(pmd, write) \
1042	(pmd_present(pmd) && (!(write) \|\| pmd_write(pmd)))
1043	#endif
1044
1045	#ifndef pud_access_permitted
1046	#define pud_access_permitted(pud, write) \
1047	(pud_present(pud) && (!(write) \|\| pud_write(pud)))
1048	#endif
1049
1050	#ifndef p4d_access_permitted
1051	#define p4d_access_permitted(p4d, write) \
1052	(p4d_present(p4d) && (!(write) \|\| p4d_write(p4d)))
1053	#endif
1054
1055	#ifndef pgd_access_permitted
1056	#define pgd_access_permitted(pgd, write) \
1057	(pgd_present(pgd) && (!(write) \|\| pgd_write(pgd)))
1058	#endif
1059
1060	#ifndef __HAVE_ARCH_PMD_SAME
1061	static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
1062	{
1063	return pmd_val(pmd: pmd_a) == pmd_val(pmd: pmd_b);
1064	}
1065	#endif
1066
1067	#ifndef pud_same
1068	static inline int pud_same(pud_t pud_a, pud_t pud_b)
1069	{
1070	return pud_val(pud: pud_a) == pud_val(pud: pud_b);
1071	}
1072	#define pud_same pud_same
1073	#endif
1074
1075	#ifndef __HAVE_ARCH_P4D_SAME
1076	static inline int p4d_same(p4d_t p4d_a, p4d_t p4d_b)
1077	{
1078	return p4d_val(p4d: p4d_a) == p4d_val(p4d: p4d_b);
1079	}
1080	#endif
1081
1082	#ifndef __HAVE_ARCH_PGD_SAME
1083	static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
1084	{
1085	return pgd_val(pgd: pgd_a) == pgd_val(pgd: pgd_b);
1086	}
1087	#endif
1088
1089	#ifndef __HAVE_ARCH_DO_SWAP_PAGE
1090	static inline void arch_do_swap_page_nr(struct mm_struct *mm,
1091	struct vm_area_struct *vma,
1092	unsigned long addr,
1093	pte_t pte, pte_t oldpte,
1094	int nr)
1095	{
1096
1097	}
1098	#else
1099	/*
1100	* Some architectures support metadata associated with a page. When a
1101	* page is being swapped out, this metadata must be saved so it can be
1102	* restored when the page is swapped back in. SPARC M7 and newer
1103	* processors support an ADI (Application Data Integrity) tag for the
1104	* page as metadata for the page. arch_do_swap_page() can restore this
1105	* metadata when a page is swapped back in.
1106	*/
1107	static inline void arch_do_swap_page_nr(struct mm_struct *mm,
1108	struct vm_area_struct *vma,
1109	unsigned long addr,
1110	pte_t pte, pte_t oldpte,
1111	int nr)
1112	{
1113	for (int i = `0`; i < nr; i++) {
1114	arch_do_swap_page(vma->vm_mm, vma, addr + i * PAGE_SIZE,
1115	pte_advance_pfn(pte, i),
1116	pte_advance_pfn(oldpte, i));
1117	}
1118	}
1119	#endif
1120
1121	#ifndef __HAVE_ARCH_UNMAP_ONE
1122	/*
1123	* Some architectures support metadata associated with a page. When a
1124	* page is being swapped out, this metadata must be saved so it can be
1125	* restored when the page is swapped back in. SPARC M7 and newer
1126	* processors support an ADI (Application Data Integrity) tag for the
1127	* page as metadata for the page. arch_unmap_one() can save this
1128	* metadata on a swap-out of a page.
1129	*/
1130	static inline int arch_unmap_one(struct mm_struct *mm,
1131	struct vm_area_struct *vma,
1132	unsigned long addr,
1133	pte_t orig_pte)
1134	{
1135	return `0`;
1136	}
1137	#endif
1138
1139	/*
1140	* Allow architectures to preserve additional metadata associated with
1141	* swapped-out pages. The corresponding __HAVE_ARCH_SWAP_* macros and function
1142	* prototypes must be defined in the arch-specific asm/pgtable.h file.
1143	*/
1144	#ifndef __HAVE_ARCH_PREPARE_TO_SWAP
1145	static inline int arch_prepare_to_swap(struct folio *folio)
1146	{
1147	return `0`;
1148	}
1149	#endif
1150
1151	#ifndef __HAVE_ARCH_SWAP_INVALIDATE
1152	static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
1153	{
1154	}
1155
1156	static inline void arch_swap_invalidate_area(int type)
1157	{
1158	}
1159	#endif
1160
1161	#ifndef __HAVE_ARCH_SWAP_RESTORE
1162	static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
1163	{
1164	}
1165	#endif
1166
1167	#ifndef __HAVE_ARCH_MOVE_PTE
1168	#define move_pte(pte, old_addr, new_addr) (pte)
1169	#endif
1170
1171	#ifndef pte_accessible
1172	# define pte_accessible(mm, pte) ((void)(pte), 1)
1173	#endif
1174
1175	#ifndef flush_tlb_fix_spurious_fault
1176	#define flush_tlb_fix_spurious_fault(vma, address, ptep) flush_tlb_page(vma, address)
1177	#endif
1178
1179	/*
1180	* When walking page tables, get the address of the next boundary,
1181	* or the end address of the range if that comes earlier. Although no
1182	* vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
1183	*/
1184
1185	#define pgd_addr_end(addr, end) \
1186	({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
1187	(__boundary - 1 < (end) - 1)? __boundary: (end); \
1188	})
1189
1190	#ifndef p4d_addr_end
1191	#define p4d_addr_end(addr, end) \
1192	({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \
1193	(__boundary - 1 < (end) - 1)? __boundary: (end); \
1194	})
1195	#endif
1196
1197	#ifndef pud_addr_end
1198	#define pud_addr_end(addr, end) \
1199	({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
1200	(__boundary - 1 < (end) - 1)? __boundary: (end); \
1201	})
1202	#endif
1203
1204	#ifndef pmd_addr_end
1205	#define pmd_addr_end(addr, end) \
1206	({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
1207	(__boundary - 1 < (end) - 1)? __boundary: (end); \
1208	})
1209	#endif
1210
1211	/*
1212	* When walking page tables, we usually want to skip any p?d_none entries;
1213	* and any p?d_bad entries - reporting the error before resetting to none.
1214	* Do the tests inline, but report and clear the bad entry in mm/memory.c.
1215	*/
1216	void pgd_clear_bad(pgd_t *);
1217
1218	#ifndef __PAGETABLE_P4D_FOLDED
1219	void p4d_clear_bad(p4d_t *);
1220	#else
1221	#define p4d_clear_bad(p4d) do { } while (0)
1222	#endif
1223
1224	#ifndef __PAGETABLE_PUD_FOLDED
1225	void pud_clear_bad(pud_t *);
1226	#else
1227	#define pud_clear_bad(p4d) do { } while (0)
1228	#endif
1229
1230	void pmd_clear_bad(pmd_t *);
1231
1232	static inline int pgd_none_or_clear_bad(pgd_t *pgd)
1233	{
1234	if (pgd_none(pgd: *pgd))
1235	return `1`;
1236	if (unlikely(pgd_bad(*pgd))) {
1237	pgd_clear_bad(pgd);
1238	return `1`;
1239	}
1240	return `0`;
1241	}
1242
1243	static inline int p4d_none_or_clear_bad(p4d_t *p4d)
1244	{
1245	if (p4d_none(p4d: *p4d))
1246	return `1`;
1247	if (unlikely(p4d_bad(*p4d))) {
1248	p4d_clear_bad(p4d);
1249	return `1`;
1250	}
1251	return `0`;
1252	}
1253
1254	static inline int pud_none_or_clear_bad(pud_t *pud)
1255	{
1256	if (pud_none(pud: *pud))
1257	return `1`;
1258	if (unlikely(pud_bad(*pud))) {
1259	pud_clear_bad(pud);
1260	return `1`;
1261	}
1262	return `0`;
1263	}
1264
1265	static inline int pmd_none_or_clear_bad(pmd_t *pmd)
1266	{
1267	if (pmd_none(pmd: *pmd))
1268	return `1`;
1269	if (unlikely(pmd_bad(*pmd))) {
1270	pmd_clear_bad(pmd);
1271	return `1`;
1272	}
1273	return `0`;
1274	}
1275
1276	static inline pte_t __ptep_modify_prot_start(struct vm_area_struct *vma,
1277	unsigned long addr,
1278	pte_t *ptep)
1279	{
1280	/*
1281	* Get the current pte state, but zero it out to make it
1282	* non-present, preventing the hardware from asynchronously
1283	* updating it.
1284	*/
1285	return ptep_get_and_clear(mm: vma->vm_mm, addr, ptep);
1286	}
1287
1288	static inline void __ptep_modify_prot_commit(struct vm_area_struct *vma,
1289	unsigned long addr,
1290	pte_t *ptep, pte_t pte)
1291	{
1292	/*
1293	* The pte is non-present, so there's no hardware state to
1294	* preserve.
1295	*/
1296	set_pte_at(vma->vm_mm, addr, ptep, pte);
1297	}
1298
1299	#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1300	/*
1301	* Start a pte protection read-modify-write transaction, which
1302	* protects against asynchronous hardware modifications to the pte.
1303	* The intention is not to prevent the hardware from making pte
1304	* updates, but to prevent any updates it may make from being lost.
1305	*
1306	* This does not protect against other software modifications of the
1307	* pte; the appropriate pte lock must be held over the transaction.
1308	*
1309	* Note that this interface is intended to be batchable, meaning that
1310	* ptep_modify_prot_commit may not actually update the pte, but merely
1311	* queue the update to be done at some later time. The update must be
1312	* actually committed before the pte lock is released, however.
1313	*/
1314	static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
1315	unsigned long addr,
1316	pte_t *ptep)
1317	{
1318	return __ptep_modify_prot_start(vma, addr, ptep);
1319	}
1320
1321	/*
1322	* Commit an update to a pte, leaving any hardware-controlled bits in
1323	* the PTE unmodified.
1324	*/
1325	static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
1326	unsigned long addr,
1327	pte_t *ptep, pte_t old_pte, pte_t pte)
1328	{
1329	__ptep_modify_prot_commit(vma, addr, ptep, pte);
1330	}
1331	#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
1332	#endif /* CONFIG_MMU */
1333
1334	/*
1335	* No-op macros that just return the current protection value. Defined here
1336	* because these macros can be used even if CONFIG_MMU is not defined.
1337	*/
1338
1339	#ifndef pgprot_nx
1340	#define pgprot_nx(prot) (prot)
1341	#endif
1342
1343	#ifndef pgprot_noncached
1344	#define pgprot_noncached(prot) (prot)
1345	#endif
1346
1347	#ifndef pgprot_writecombine
1348	#define pgprot_writecombine pgprot_noncached
1349	#endif
1350
1351	#ifndef pgprot_writethrough
1352	#define pgprot_writethrough pgprot_noncached
1353	#endif
1354
1355	#ifndef pgprot_device
1356	#define pgprot_device pgprot_noncached
1357	#endif
1358
1359	#ifndef pgprot_mhp
1360	#define pgprot_mhp(prot) (prot)
1361	#endif
1362
1363	#ifdef CONFIG_MMU
1364	#ifndef pgprot_modify
1365	#define pgprot_modify pgprot_modify
1366	static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
1367	{
1368	if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
1369	newprot = pgprot_noncached(newprot);
1370	if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
1371	newprot = pgprot_writecombine(newprot);
1372	if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
1373	newprot = pgprot_device(newprot);
1374	return newprot;
1375	}
1376	#endif
1377	#endif /* CONFIG_MMU */
1378
1379	#ifndef pgprot_encrypted
1380	#define pgprot_encrypted(prot) (prot)
1381	#endif
1382
1383	#ifndef pgprot_decrypted
1384	#define pgprot_decrypted(prot) (prot)
1385	#endif
1386
1387	/*
1388	* A facility to provide batching of the reload of page tables and
1389	* other process state with the actual context switch code for
1390	* paravirtualized guests. By convention, only one of the batched
1391	* update (lazy) modes (CPU, MMU) should be active at any given time,
1392	* entry should never be nested, and entry and exits should always be
1393	* paired. This is for sanity of maintaining and reasoning about the
1394	* kernel code. In this case, the exit (end of the context switch) is
1395	* in architecture-specific code, and so doesn't need a generic
1396	* definition.
1397	*/
1398	#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
1399	#define arch_start_context_switch(prev) do {} while (0)
1400	#endif
1401
1402	#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
1403	#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
1404	static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
1405	{
1406	return pmd;
1407	}
1408
1409	static inline int pmd_swp_soft_dirty(pmd_t pmd)
1410	{
1411	return `0`;
1412	}
1413
1414	static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
1415	{
1416	return pmd;
1417	}
1418	#endif
1419	#else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
1420	static inline int pte_soft_dirty(pte_t pte)
1421	{
1422	return `0`;
1423	}
1424
1425	static inline int pmd_soft_dirty(pmd_t pmd)
1426	{
1427	return `0`;
1428	}
1429
1430	static inline pte_t pte_mksoft_dirty(pte_t pte)
1431	{
1432	return pte;
1433	}
1434
1435	static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
1436	{
1437	return pmd;
1438	}
1439
1440	static inline pte_t pte_clear_soft_dirty(pte_t pte)
1441	{
1442	return pte;
1443	}
1444
1445	static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
1446	{
1447	return pmd;
1448	}
1449
1450	static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
1451	{
1452	return pte;
1453	}
1454
1455	static inline int pte_swp_soft_dirty(pte_t pte)
1456	{
1457	return `0`;
1458	}
1459
1460	static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
1461	{
1462	return pte;
1463	}
1464
1465	static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
1466	{
1467	return pmd;
1468	}
1469
1470	static inline int pmd_swp_soft_dirty(pmd_t pmd)
1471	{
1472	return `0`;
1473	}
1474
1475	static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
1476	{
1477	return pmd;
1478	}
1479	#endif
1480
1481	#ifndef __HAVE_PFNMAP_TRACKING
1482	/*
1483	* Interfaces that can be used by architecture code to keep track of
1484	* memory type of pfn mappings specified by the remap_pfn_range,
1485	* vmf_insert_pfn.
1486	*/
1487
1488	static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
1489	pgprot_t *prot)
1490	{
1491	return `0`;
1492	}
1493
1494	static inline int pfnmap_track(unsigned long pfn, unsigned long size,
1495	pgprot_t *prot)
1496	{
1497	return `0`;
1498	}
1499
1500	static inline void pfnmap_untrack(unsigned long pfn, unsigned long size)
1501	{
1502	}
1503	#else
1504	/**
1505	* pfnmap_setup_cachemode - setup the cachemode in the pgprot for a pfn range
1506	* @pfn: the start of the pfn range
1507	* @size: the size of the pfn range in bytes
1508	* @prot: the pgprot to modify
1509	*
1510	* Lookup the cachemode for the pfn range starting at @pfn with the size
1511	* @size and store it in @prot, leaving other data in @prot unchanged.
1512	*
1513	* This allows for a hardware implementation to have fine-grained control of
1514	* memory cache behavior at page level granularity. Without a hardware
1515	* implementation, this function does nothing.
1516	*
1517	* Currently there is only one implementation for this - x86 Page Attribute
1518	* Table (PAT). See Documentation/arch/x86/pat.rst for more details.
1519	*
1520	* This function can fail if the pfn range spans pfns that require differing
1521	* cachemodes. If the pfn range was previously verified to have a single
1522	* cachemode, it is sufficient to query only a single pfn. The assumption is
1523	* that this is the case for drivers using the vmf_insert_pfn*() interface.
1524	*
1525	* Returns 0 on success and -EINVAL on error.
1526	*/
1527	int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
1528	pgprot_t *prot);
1529
1530	/**
1531	* pfnmap_track - track a pfn range
1532	* @pfn: the start of the pfn range
1533	* @size: the size of the pfn range in bytes
1534	* @prot: the pgprot to track
1535	*
1536	* Requested the pfn range to be 'tracked' by a hardware implementation and
1537	* setup the cachemode in @prot similar to pfnmap_setup_cachemode().
1538	*
1539	* This allows for fine-grained control of memory cache behaviour at page
1540	* level granularity. Tracking memory this way is persisted across VMA splits
1541	* (VMA merging does not apply for VM_PFNMAP).
1542	*
1543	* Currently, there is only one implementation for this - x86 Page Attribute
1544	* Table (PAT). See Documentation/arch/x86/pat.rst for more details.
1545	*
1546	* Returns 0 on success and -EINVAL on error.
1547	*/
1548	int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot);
1549
1550	/**
1551	* pfnmap_untrack - untrack a pfn range
1552	* @pfn: the start of the pfn range
1553	* @size: the size of the pfn range in bytes
1554	*
1555	* Untrack a pfn range previously tracked through pfnmap_track().
1556	*/
1557	void pfnmap_untrack(unsigned long pfn, unsigned long size);
1558	#endif
1559
1560	/**
1561	* pfnmap_setup_cachemode_pfn - setup the cachemode in the pgprot for a pfn
1562	* @pfn: the pfn
1563	* @prot: the pgprot to modify
1564	*
1565	* Lookup the cachemode for @pfn and store it in @prot, leaving other
1566	* data in @prot unchanged.
1567	*
1568	* See pfnmap_setup_cachemode() for details.
1569	*/
1570	static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot)
1571	{
1572	pfnmap_setup_cachemode(pfn, PAGE_SIZE, prot);
1573	}
1574
1575	#ifdef CONFIG_MMU
1576	#ifdef __HAVE_COLOR_ZERO_PAGE
1577	static inline int is_zero_pfn(unsigned long pfn)
1578	{
1579	extern unsigned long zero_pfn;
1580	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
1581	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
1582	}
1583
1584	#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr))
1585
1586	#else
1587	static inline int is_zero_pfn(unsigned long pfn)
1588	{
1589	extern unsigned long zero_pfn;
1590	return pfn == zero_pfn;
1591	}
1592
1593	static inline unsigned long my_zero_pfn(unsigned long addr)
1594	{
1595	extern unsigned long zero_pfn;
1596	return zero_pfn;
1597	}
1598	#endif
1599	#else
1600	static inline int is_zero_pfn(unsigned long pfn)
1601	{
1602	return `0`;
1603	}
1604
1605	static inline unsigned long my_zero_pfn(unsigned long addr)
1606	{
1607	return `0`;
1608	}
1609	#endif /* CONFIG_MMU */
1610
1611	#ifdef CONFIG_MMU
1612
1613	#ifndef CONFIG_TRANSPARENT_HUGEPAGE
1614	static inline int pmd_trans_huge(pmd_t pmd)
1615	{
1616	return `0`;
1617	}
1618	#ifndef pmd_write
1619	static inline int pmd_write(pmd_t pmd)
1620	{
1621	BUG();
1622	return `0`;
1623	}
1624	#endif /* pmd_write */
1625	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1626
1627	#ifndef pud_write
1628	static inline int pud_write(pud_t pud)
1629	{
1630	BUG();
1631	return `0`;
1632	}
1633	#endif /* pud_write */
1634
1635	#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) \|\| !defined(CONFIG_TRANSPARENT_HUGEPAGE)
1636	static inline int pmd_devmap(pmd_t pmd)
1637	{
1638	return `0`;
1639	}
1640	static inline int pud_devmap(pud_t pud)
1641	{
1642	return `0`;
1643	}
1644	static inline int pgd_devmap(pgd_t pgd)
1645	{
1646	return `0`;
1647	}
1648	#endif
1649
1650	#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) \|\| \
1651	!defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
1652	static inline int pud_trans_huge(pud_t pud)
1653	{
1654	return `0`;
1655	}
1656	#endif
1657
1658	static inline int pud_trans_unstable(pud_t *pud)
1659	{
1660	#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
1661	defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
1662	pud_t pudval = READ_ONCE(*pud);
1663
1664	if (pud_none(pud: pudval) \|\| pud_trans_huge(pud: pudval) \|\| pud_devmap(pud: pudval))
1665	return `1`;
1666	if (unlikely(pud_bad(pudval))) {
1667	pud_clear_bad(pud);
1668	return `1`;
1669	}
1670	#endif
1671	return `0`;
1672	}
1673
1674	#ifndef CONFIG_NUMA_BALANCING
1675	/*
1676	* In an inaccessible (PROT_NONE) VMA, pte_protnone() may indicate "yes". It is
1677	* perfectly valid to indicate "no" in that case, which is why our default
1678	* implementation defaults to "always no".
1679	*
1680	* In an accessible VMA, however, pte_protnone() reliably indicates PROT_NONE
1681	* page protection due to NUMA hinting. NUMA hinting faults only apply in
1682	* accessible VMAs.
1683	*
1684	* So, to reliably identify PROT_NONE PTEs that require a NUMA hinting fault,
1685	* looking at the VMA accessibility is sufficient.
1686	*/
1687	static inline int pte_protnone(pte_t pte)
1688	{
1689	return `0`;
1690	}
1691
1692	static inline int pmd_protnone(pmd_t pmd)
1693	{
1694	return `0`;
1695	}
1696	#endif /* CONFIG_NUMA_BALANCING */
1697
1698	#endif /* CONFIG_MMU */
1699
1700	#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
1701
1702	#ifndef __PAGETABLE_P4D_FOLDED
1703	int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
1704	void p4d_clear_huge(p4d_t *p4d);
1705	#else
1706	static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
1707	{
1708	return `0`;
1709	}
1710	static inline void p4d_clear_huge(p4d_t *p4d) { }
1711	#endif /* !__PAGETABLE_P4D_FOLDED */
1712
1713	int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
1714	int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
1715	int pud_clear_huge(pud_t *pud);
1716	int pmd_clear_huge(pmd_t *pmd);
1717	int p4d_free_pud_page(p4d_t p4d, unsigned* long addr);
1718	int pud_free_pmd_page(pud_t pud, unsigned* long addr);
1719	int pmd_free_pte_page(pmd_t pmd, unsigned* long addr);
1720	#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
1721	static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
1722	{
1723	return `0`;
1724	}
1725	static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
1726	{
1727	return `0`;
1728	}
1729	static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
1730	{
1731	return `0`;
1732	}
1733	static inline void p4d_clear_huge(p4d_t *p4d) { }
1734	static inline int pud_clear_huge(pud_t *pud)
1735	{
1736	return `0`;
1737	}
1738	static inline int pmd_clear_huge(pmd_t *pmd)
1739	{
1740	return `0`;
1741	}
1742	static inline int p4d_free_pud_page(p4d_t p4d, unsigned* long addr)
1743	{
1744	return `0`;
1745	}
1746	static inline int pud_free_pmd_page(pud_t pud, unsigned* long addr)
1747	{
1748	return `0`;
1749	}
1750	static inline int pmd_free_pte_page(pmd_t pmd, unsigned* long addr)
1751	{
1752	return `0`;
1753	}
1754	#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
1755
1756	#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
1757	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1758	/*
1759	* ARCHes with special requirements for evicting THP backing TLB entries can
1760	* implement this. Otherwise also, it can help optimize normal TLB flush in
1761	* THP regime. Stock flush_tlb_range() typically has optimization to nuke the
1762	* entire TLB if flush span is greater than a threshold, which will
1763	* likely be true for a single huge page. Thus a single THP flush will
1764	* invalidate the entire TLB which is not desirable.
1765	* e.g. see arch/arc: flush_pmd_tlb_range
1766	*/
1767	#define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
1768	#define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
1769	#else
1770	#define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG()
1771	#define flush_pud_tlb_range(vma, addr, end) BUILD_BUG()
1772	#endif
1773	#endif
1774
1775	struct file;
1776	int phys_mem_access_prot_allowed(struct file file, unsigned* long pfn,
1777	unsigned long size, pgprot_t *vma_prot);
1778
1779	#ifndef CONFIG_X86_ESPFIX64
1780	static inline void init_espfix_bsp(void) { }
1781	#endif
1782
1783	extern void __init pgtable_cache_init(void);
1784
1785	#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
1786	static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
1787	{
1788	return true;
1789	}
1790
1791	static inline bool arch_has_pfn_modify_check(void)
1792	{
1793	return false;
1794	}
1795	#endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
1796
1797	/*
1798	* Architecture PAGE_KERNEL_* fallbacks
1799	*
1800	* Some architectures don't define certain PAGE_KERNEL_* flags. This is either
1801	* because they really don't support them, or the port needs to be updated to
1802	* reflect the required functionality. Below are a set of relatively safe
1803	* fallbacks, as best effort, which we can count on in lieu of the architectures
1804	* not defining them on their own yet.
1805	*/
1806
1807	#ifndef PAGE_KERNEL_RO
1808	# define PAGE_KERNEL_RO PAGE_KERNEL
1809	#endif
1810
1811	#ifndef PAGE_KERNEL_EXEC
1812	# define PAGE_KERNEL_EXEC PAGE_KERNEL
1813	#endif
1814
1815	/*
1816	* Page Table Modification bits for pgtbl_mod_mask.
1817	*
1818	* These are used by the p?d_alloc_track*() set of functions an in the generic
1819	* vmalloc/ioremap code to track at which page-table levels entries have been
1820	* modified. Based on that the code can better decide when vmalloc and ioremap
1821	* mapping changes need to be synchronized to other page-tables in the system.
1822	*/
1823	#define __PGTBL_PGD_MODIFIED 0
1824	#define __PGTBL_P4D_MODIFIED 1
1825	#define __PGTBL_PUD_MODIFIED 2
1826	#define __PGTBL_PMD_MODIFIED 3
1827	#define __PGTBL_PTE_MODIFIED 4
1828
1829	#define PGTBL_PGD_MODIFIED BIT(__PGTBL_PGD_MODIFIED)
1830	#define PGTBL_P4D_MODIFIED BIT(__PGTBL_P4D_MODIFIED)
1831	#define PGTBL_PUD_MODIFIED BIT(__PGTBL_PUD_MODIFIED)
1832	#define PGTBL_PMD_MODIFIED BIT(__PGTBL_PMD_MODIFIED)
1833	#define PGTBL_PTE_MODIFIED BIT(__PGTBL_PTE_MODIFIED)
1834
1835	/ Page-Table Modification Mask /
1836	typedef unsigned int pgtbl_mod_mask;
1837
1838	#endif /* !__ASSEMBLY__ */
1839
1840	#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
1841	#ifdef CONFIG_PHYS_ADDR_T_64BIT
1842	/*
1843	* ZSMALLOC needs to know the highest PFN on 32-bit architectures
1844	* with physical address space extension, but falls back to
1845	* BITS_PER_LONG otherwise.
1846	*/
1847	#error Missing MAX_POSSIBLE_PHYSMEM_BITS definition
1848	#else
1849	#define MAX_POSSIBLE_PHYSMEM_BITS 32
1850	#endif
1851	#endif
1852
1853	#ifndef has_transparent_hugepage
1854	#define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE)
1855	#endif
1856
1857	#ifndef has_transparent_pud_hugepage
1858	#define has_transparent_pud_hugepage() IS_BUILTIN(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
1859	#endif
1860	/*
1861	* On some architectures it depends on the mm if the p4d/pud or pmd
1862	* layer of the page table hierarchy is folded or not.
1863	*/
1864	#ifndef mm_p4d_folded
1865	#define mm_p4d_folded(mm) __is_defined(__PAGETABLE_P4D_FOLDED)
1866	#endif
1867
1868	#ifndef mm_pud_folded
1869	#define mm_pud_folded(mm) __is_defined(__PAGETABLE_PUD_FOLDED)
1870	#endif
1871
1872	#ifndef mm_pmd_folded
1873	#define mm_pmd_folded(mm) __is_defined(__PAGETABLE_PMD_FOLDED)
1874	#endif
1875
1876	#ifndef p4d_offset_lockless
1877	#define p4d_offset_lockless(pgdp, pgd, address) p4d_offset(&(pgd), address)
1878	#endif
1879	#ifndef pud_offset_lockless
1880	#define pud_offset_lockless(p4dp, p4d, address) pud_offset(&(p4d), address)
1881	#endif
1882	#ifndef pmd_offset_lockless
1883	#define pmd_offset_lockless(pudp, pud, address) pmd_offset(&(pud), address)
1884	#endif
1885
1886	/*
1887	* pXd_leaf() is the API to check whether a pgtable entry is a huge page
1888	* mapping. It should work globally across all archs, without any
1889	* dependency on CONFIG_* options. For architectures that do not support
1890	* huge mappings on specific levels, below fallbacks will be used.
1891	*
1892	* A leaf pgtable entry should always imply the following:
1893	*
1894	* - It is a "present" entry. IOW, before using this API, please check it
1895	* with pXd_present() first. NOTE: it may not always mean the "present
1896	* bit" is set. For example, PROT_NONE entries are always "present".
1897	*
1898	* - It should _never_ be a swap entry of any type. Above "present" check
1899	* should have guarded this, but let's be crystal clear on this.
1900	*
1901	* - It should contain a huge PFN, which points to a huge page larger than
1902	* PAGE_SIZE of the platform. The PFN format isn't important here.
1903	*
1904	* - It should cover all kinds of huge mappings (e.g., pXd_trans_huge(),
1905	* pXd_devmap(), or hugetlb mappings).
1906	*/
1907	#ifndef pgd_leaf
1908	#define pgd_leaf(x) false
1909	#endif
1910	#ifndef p4d_leaf
1911	#define p4d_leaf(x) false
1912	#endif
1913	#ifndef pud_leaf
1914	#define pud_leaf(x) false
1915	#endif
1916	#ifndef pmd_leaf
1917	#define pmd_leaf(x) false
1918	#endif
1919
1920	#ifndef pgd_leaf_size
1921	#define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT)
1922	#endif
1923	#ifndef p4d_leaf_size
1924	#define p4d_leaf_size(x) P4D_SIZE
1925	#endif
1926	#ifndef pud_leaf_size
1927	#define pud_leaf_size(x) PUD_SIZE
1928	#endif
1929	#ifndef pmd_leaf_size
1930	#define pmd_leaf_size(x) PMD_SIZE
1931	#endif
1932	#ifndef __pte_leaf_size
1933	#ifndef pte_leaf_size
1934	#define pte_leaf_size(x) PAGE_SIZE
1935	#endif
1936	#define __pte_leaf_size(x,y) pte_leaf_size(y)
1937	#endif
1938
1939	/*
1940	* We always define pmd_pfn for all archs as it's used in lots of generic
1941	* code. Now it happens too for pud_pfn (and can happen for larger
1942	* mappings too in the future; we're not there yet). Instead of defining
1943	* it for all archs (like pmd_pfn), provide a fallback.
1944	*
1945	* Note that returning 0 here means any arch that didn't define this can
1946	* get severely wrong when it hits a real pud leaf. It's arch's
1947	* responsibility to properly define it when a huge pud is possible.
1948	*/
1949	#ifndef pud_pfn
1950	#define pud_pfn(x) 0
1951	#endif
1952
1953	/*
1954	* Some architectures have MMUs that are configurable or selectable at boot
1955	* time. These lead to variable PTRS_PER_x. For statically allocated arrays it
1956	* helps to have a static maximum value.
1957	*/
1958
1959	#ifndef MAX_PTRS_PER_PTE
1960	#define MAX_PTRS_PER_PTE PTRS_PER_PTE
1961	#endif
1962
1963	#ifndef MAX_PTRS_PER_PMD
1964	#define MAX_PTRS_PER_PMD PTRS_PER_PMD
1965	#endif
1966
1967	#ifndef MAX_PTRS_PER_PUD
1968	#define MAX_PTRS_PER_PUD PTRS_PER_PUD
1969	#endif
1970
1971	#ifndef MAX_PTRS_PER_P4D
1972	#define MAX_PTRS_PER_P4D PTRS_PER_P4D
1973	#endif
1974
1975	#ifndef pte_pgprot
1976	#define pte_pgprot(x) ((pgprot_t) {0})
1977	#endif
1978
1979	#ifndef pmd_pgprot
1980	#define pmd_pgprot(x) ((pgprot_t) {0})
1981	#endif
1982
1983	#ifndef pud_pgprot
1984	#define pud_pgprot(x) ((pgprot_t) {0})
1985	#endif
1986
1987	/ description of effects of mapping type and prot in current implementation.*
1988	* this is due to the limited x86 page protection hardware. The expected
1989	* behavior is in parens:
1990	*
1991	* map_type prot
1992	* PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
1993	* MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
1994	* w: (no) no w: (no) no w: (yes) yes w: (no) no
1995	* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
1996	*
1997	* MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
1998	* w: (no) no w: (no) no w: (copy) copy w: (no) no
1999	* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
2000	*
2001	* On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
2002	* MAP_PRIVATE (with Enhanced PAN supported):
2003	* r: (no) no
2004	* w: (no) no
2005	* x: (yes) yes
2006	*/
2007	#define DECLARE_VM_GET_PAGE_PROT \
2008	pgprot_t vm_get_page_prot(unsigned long vm_flags) \
2009	{ \
2010	return protection_map[vm_flags & \
2011	(VM_READ \| VM_WRITE \| VM_EXEC \| VM_SHARED)]; \
2012	} \
2013	EXPORT_SYMBOL(vm_get_page_prot);
2014
2015	#endif /* _LINUX_PGTABLE_H */
2016

source code of linux/include/linux/pgtable.h