Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 382b3ff

Browse filesBrowse files
committed
ENH: Ensure hugepages are also indicated for calloc allocations
On linux we madvise hugepages for large malloc allocations but fail to do so also for calloc allocations even though it makes just as much sense there. This aligns the two code paths.
1 parent fc7cc1e commit 382b3ff
Copy full SHA for 382b3ff

File tree

2 files changed

+27
-13
lines changed
Filter options

2 files changed

+27
-13
lines changed
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
* NumPy now indicates hugepages also for large ``np.zeros`` allocations
2+
on linux. Thus should generally improve performance.

‎numpy/_core/src/multiarray/alloc.c

Copy file name to clipboardExpand all lines: numpy/_core/src/multiarray/alloc.c
+25-13Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,24 @@ _set_madvise_hugepage(PyObject *NPY_UNUSED(self), PyObject *enabled_obj)
8080
}
8181

8282

83+
NPY_FINLINE void
84+
indicate_hugepages(void *p, size_t size) {
85+
#ifdef NPY_OS_LINUX
86+
/* allow kernel allocating huge pages for large arrays */
87+
if (NPY_UNLIKELY(size >= ((1u<<22u))) &&
88+
npy_thread_unsafe_state.madvise_hugepage) {
89+
npy_uintp offset = 4096u - (npy_uintp)p % (4096u);
90+
npy_uintp length = size - offset;
91+
/**
92+
* Intentionally not checking for errors that may be returned by
93+
* older kernel versions; optimistically tries enabling huge pages.
94+
*/
95+
madvise((void*)((npy_uintp)p + offset), length, MADV_HUGEPAGE);
96+
}
97+
#endif
98+
}
99+
100+
83101
/* as the cache is managed in global variables verify the GIL is held */
84102

85103
/*
@@ -108,19 +126,7 @@ _npy_alloc_cache(npy_uintp nelem, npy_uintp esz, npy_uint msz,
108126
#ifdef _PyPyGC_AddMemoryPressure
109127
_PyPyPyGC_AddMemoryPressure(nelem * esz);
110128
#endif
111-
#ifdef NPY_OS_LINUX
112-
/* allow kernel allocating huge pages for large arrays */
113-
if (NPY_UNLIKELY(nelem * esz >= ((1u<<22u))) &&
114-
npy_thread_unsafe_state.madvise_hugepage) {
115-
npy_uintp offset = 4096u - (npy_uintp)p % (4096u);
116-
npy_uintp length = nelem * esz - offset;
117-
/**
118-
* Intentionally not checking for errors that may be returned by
119-
* older kernel versions; optimistically tries enabling huge pages.
120-
*/
121-
madvise((void*)((npy_uintp)p + offset), length, MADV_HUGEPAGE);
122-
}
123-
#endif
129+
indicate_hugepages(p, nelem * esz);
124130
}
125131
return p;
126132
}
@@ -172,6 +178,9 @@ npy_alloc_cache_zero(size_t nmemb, size_t size)
172178
NPY_BEGIN_THREADS;
173179
p = PyDataMem_NEW_ZEROED(nmemb, size);
174180
NPY_END_THREADS;
181+
if (p) {
182+
indicate_hugepages(p, sz);
183+
}
175184
return p;
176185
}
177186

@@ -319,6 +328,9 @@ default_calloc(void *NPY_UNUSED(ctx), size_t nelem, size_t elsize)
319328
}
320329
NPY_BEGIN_THREADS;
321330
p = calloc(nelem, elsize);
331+
if (p) {
332+
indicate_hugepages(p, sz);
333+
}
322334
NPY_END_THREADS;
323335
return p;
324336
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.