Skip to content

Commit bf50068

Browse files
authored
in Lexbor use python's heap instead of C's heaps (#205)
* in lexbor use python's heap instead of C's heap as an extra performance enhancement * Remove bytearray and memoryview typehints from constructor * fix linting
1 parent 3ce9450 commit bf50068

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

selectolax/lexbor.pxd

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from libc.stdint cimport uint8_t, uint32_t, uintptr_t
22

3-
43
cdef extern from "lexbor/core/core.h" nogil:
54
ctypedef uint32_t lxb_codepoint_t
65
ctypedef unsigned char lxb_char_t
@@ -32,6 +31,19 @@ cdef extern from "lexbor/core/core.h" nogil:
3231
lexbor_str_t* lexbor_str_create()
3332
lxb_char_t * lexbor_str_data_noi(lexbor_str_t *str)
3433

34+
cdef extern from "lexbor/core/lexbor.h" nogil:
35+
ctypedef void *(*lexbor_memory_malloc_f)(size_t size) nogil
36+
ctypedef void *(*lexbor_memory_realloc_f)(void *dst, size_t size) nogil
37+
ctypedef void *(*lexbor_memory_calloc_f)(size_t num, size_t size) nogil
38+
ctypedef void (*lexbor_memory_free_f)(void *dst) nogil
39+
lxb_status_t lexbor_memory_setup(
40+
lexbor_memory_malloc_f new_malloc,
41+
lexbor_memory_realloc_f new_realloc,
42+
lexbor_memory_calloc_f new_calloc,
43+
lexbor_memory_free_f new_free
44+
)
45+
46+
3547
cdef extern from "lexbor/html/html.h" nogil:
3648
ctypedef unsigned int lxb_html_document_opt_t
3749

selectolax/lexbor.pyx

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
from cpython.bool cimport bool
2-
2+
from cpython.exc cimport PyErr_SetObject
3+
from cpython.mem cimport (
4+
PyMem_RawCalloc,
5+
PyMem_RawFree,
6+
PyMem_RawMalloc,
7+
PyMem_RawRealloc
8+
)
39
_ENCODING = 'UTF-8'
410

511
include "base.pxi"
@@ -47,6 +53,7 @@ cdef class LexborHTMLParser:
4753
"""
4854
cdef size_t html_len
4955
cdef object bytes_html
56+
5057
self._is_fragment = is_fragment
5158
self._fragment_document = NULL
5259
self._selector = None
@@ -752,3 +759,17 @@ cdef class LexborHTMLParser:
752759
dom_node = <lxb_dom_node_t *> element
753760

754761
return LexborNode.new(dom_node, self)
762+
763+
# Putting lexbor on python's heap is better than putting it
764+
# onto C's Heap, because python's Garbage collector can collect
765+
# this memory after use and has the bonus of gaining access to
766+
# mimalloc which python uses under the hood...
767+
if lexbor_memory_setup(
768+
PyMem_RawMalloc,
769+
PyMem_RawRealloc,
770+
PyMem_RawCalloc,
771+
PyMem_RawFree
772+
) != LXB_STATUS_OK:
773+
# This will almost never happen due to the code in both the windows and posix versions
774+
# but if something were to happen this excecption on import should be triggered...
775+
raise SelectolaxError("Can't initalize allocators from lexbor_memory_setup(...)")

0 commit comments

Comments
 (0)