File size: 8,905 Bytes
9c6594c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 |
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
cdef class MemoryPool(_Weakrefable):
"""
Base class for memory allocation.
Besides tracking its number of allocated bytes, a memory pool also
takes care of the required 64-byte alignment for Arrow data.
"""
def __init__(self):
raise TypeError("Do not call {}'s constructor directly, "
"use pyarrow.*_memory_pool instead."
.format(self.__class__.__name__))
cdef void init(self, CMemoryPool* pool):
self.pool = pool
def release_unused(self):
"""
Attempt to return to the OS any memory being held onto by the pool.
This function should not be called except potentially for
benchmarking or debugging as it could be expensive and detrimental to
performance.
This is best effort and may not have any effect on some memory pools
or in some situations (e.g. fragmentation).
"""
cdef CMemoryPool* pool = c_get_memory_pool()
with nogil:
pool.ReleaseUnused()
def bytes_allocated(self):
"""
Return the number of bytes that are currently allocated from this
memory pool.
"""
return self.pool.bytes_allocated()
def total_bytes_allocated(self):
"""
Return the total number of bytes that have been allocated from this
memory pool.
"""
return self.pool.total_bytes_allocated()
def max_memory(self):
"""
Return the peak memory allocation in this memory pool.
This can be an approximate number in multi-threaded applications.
None is returned if the pool implementation doesn't know how to
compute this number.
"""
ret = self.pool.max_memory()
return ret if ret >= 0 else None
def num_allocations(self):
"""
Return the number of allocations or reallocations that were made
using this memory pool.
"""
return self.pool.num_allocations()
def print_stats(self):
"""
Print statistics about this memory pool.
The output format is implementation-specific. Not all memory pools
implement this method.
"""
with nogil:
self.pool.PrintStats()
@property
def backend_name(self):
"""
The name of the backend used by this MemoryPool (e.g. "jemalloc").
"""
return frombytes(self.pool.backend_name())
def __repr__(self):
name = f"pyarrow.{self.__class__.__name__}"
return (f"<{name} "
f"backend_name={self.backend_name} "
f"bytes_allocated={self.bytes_allocated()} "
f"max_memory={self.max_memory()}>")
cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
if memory_pool is None:
return c_get_memory_pool()
else:
return memory_pool.pool
cdef api object box_memory_pool(CMemoryPool *c_pool):
cdef MemoryPool pool = MemoryPool.__new__(MemoryPool)
pool.init(c_pool)
return pool
cdef class LoggingMemoryPool(MemoryPool):
cdef:
unique_ptr[CLoggingMemoryPool] logging_pool
def __init__(self):
raise TypeError("Do not call {}'s constructor directly, "
"use pyarrow.logging_memory_pool instead."
.format(self.__class__.__name__))
cdef class ProxyMemoryPool(MemoryPool):
"""
Memory pool implementation that tracks the number of bytes and
maximum memory allocated through its direct calls, while redirecting
to another memory pool.
"""
cdef:
unique_ptr[CProxyMemoryPool] proxy_pool
def __init__(self):
raise TypeError("Do not call {}'s constructor directly, "
"use pyarrow.proxy_memory_pool instead."
.format(self.__class__.__name__))
def default_memory_pool():
"""
Return the process-global memory pool.
Examples
--------
>>> default_memory_pool()
<pyarrow.MemoryPool backend_name=... bytes_allocated=0 max_memory=...>
"""
cdef:
MemoryPool pool = MemoryPool.__new__(MemoryPool)
pool.init(c_get_memory_pool())
return pool
def proxy_memory_pool(MemoryPool parent):
"""
Create and return a MemoryPool instance that redirects to the
*parent*, but with separate allocation statistics.
Parameters
----------
parent : MemoryPool
The real memory pool that should be used for allocations.
"""
cdef ProxyMemoryPool out = ProxyMemoryPool.__new__(ProxyMemoryPool)
out.proxy_pool.reset(new CProxyMemoryPool(parent.pool))
out.init(out.proxy_pool.get())
return out
def logging_memory_pool(MemoryPool parent):
"""
Create and return a MemoryPool instance that redirects to the
*parent*, but also dumps allocation logs on stderr.
Parameters
----------
parent : MemoryPool
The real memory pool that should be used for allocations.
"""
cdef LoggingMemoryPool out = LoggingMemoryPool.__new__(
LoggingMemoryPool, parent)
out.logging_pool.reset(new CLoggingMemoryPool(parent.pool))
out.init(out.logging_pool.get())
return out
def system_memory_pool():
"""
Return a memory pool based on the C malloc heap.
"""
cdef:
MemoryPool pool = MemoryPool.__new__(MemoryPool)
pool.init(c_system_memory_pool())
return pool
def jemalloc_memory_pool():
"""
Return a memory pool based on the jemalloc heap.
NotImplementedError is raised if jemalloc support is not enabled.
"""
cdef:
CMemoryPool* c_pool
MemoryPool pool = MemoryPool.__new__(MemoryPool)
check_status(c_jemalloc_memory_pool(&c_pool))
pool.init(c_pool)
return pool
def mimalloc_memory_pool():
"""
Return a memory pool based on the mimalloc heap.
NotImplementedError is raised if mimalloc support is not enabled.
"""
cdef:
CMemoryPool* c_pool
MemoryPool pool = MemoryPool.__new__(MemoryPool)
check_status(c_mimalloc_memory_pool(&c_pool))
pool.init(c_pool)
return pool
def set_memory_pool(MemoryPool pool):
"""
Set the default memory pool.
Parameters
----------
pool : MemoryPool
The memory pool that should be used by default.
"""
c_set_default_memory_pool(pool.pool)
cdef MemoryPool _default_memory_pool = default_memory_pool()
cdef LoggingMemoryPool _logging_memory_pool = logging_memory_pool(
_default_memory_pool)
def log_memory_allocations(enable=True):
"""
Enable or disable memory allocator logging for debugging purposes
Parameters
----------
enable : bool, default True
Pass False to disable logging
"""
if enable:
set_memory_pool(_logging_memory_pool)
else:
set_memory_pool(_default_memory_pool)
def total_allocated_bytes():
"""
Return the currently allocated bytes from the default memory pool.
Other memory pools may not be accounted for.
"""
cdef CMemoryPool* pool = c_get_memory_pool()
return pool.bytes_allocated()
def jemalloc_set_decay_ms(decay_ms):
"""
Set arenas.dirty_decay_ms and arenas.muzzy_decay_ms to indicated number of
milliseconds. A value of 0 (the default) results in dirty / muzzy memory
pages being released right away to the OS, while a higher value will result
in a time-based decay. See the jemalloc docs for more information
It's best to set this at the start of your application.
Parameters
----------
decay_ms : int
Number of milliseconds to set for jemalloc decay conf parameters. Note
that this change will only affect future memory arenas
"""
check_status(c_jemalloc_set_decay_ms(decay_ms))
def supported_memory_backends():
"""
Return a list of available memory pool backends
"""
cdef vector[c_string] backends = c_supported_memory_backends()
return [backend.decode() for backend in backends]
|