forked from edubart/srpmalloc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsrpmalloc.c
2001 lines (1806 loc) · 74.3 KB
/
srpmalloc.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* srpmalloc.h - Small rpmalloc - Public Domain - 2021 Eduardo Bart (https://github.com/edubart)
* rpmalloc.c - Memory allocator - Public Domain - 2016-2020 Mattias Jansson
* This library is a fork of rpmalloc to be used with single thread applications
* and with old C compilers.
*
* This library provides a cross-platform malloc implementation in C99.
* The latest source code is always available at
*
* https://github.com/edubart/srpmalloc
*
* This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
*/
#include "srpmalloc.h"
////////////
///
/// Build time configurable limits
///
//////
#ifndef HEAP_ARRAY_SIZE
//! Size of heap hashmap
#define HEAP_ARRAY_SIZE 47
#endif
#ifndef ENABLE_ASSERTS
//! Enable asserts
#define ENABLE_ASSERTS 0
#endif
#ifndef DEFAULT_SPAN_MAP_COUNT
//! Default number of spans to map in call to map more virtual memory (default values yield 4MiB here)
#define DEFAULT_SPAN_MAP_COUNT 64
#endif
#ifndef GLOBAL_CACHE_MULTIPLIER
//! Multiplier for global cache
#define GLOBAL_CACHE_MULTIPLIER 8
#endif
#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
# define PLATFORM_WINDOWS 1
# define PLATFORM_POSIX 0
#else
# define PLATFORM_WINDOWS 0
# define PLATFORM_POSIX 1
#endif
/// Platform and arch specifics
#if PLATFORM_WINDOWS
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# endif
# include <windows.h>
#else
# include <unistd.h>
# include <stdio.h>
# include <stdlib.h>
# include <time.h>
# if defined(__APPLE__)
# include <TargetConditionals.h>
# if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
# include <mach/mach_vm.h>
# include <mach/vm_statistics.h>
# endif
# endif
#endif
#include <stdint.h>
#include <string.h>
#include <errno.h>
#if PLATFORM_POSIX
# include <sys/mman.h>
# include <sched.h>
# ifndef MAP_UNINITIALIZED
# define MAP_UNINITIALIZED 0
# endif
#endif
#include <errno.h>
#if ENABLE_ASSERTS
# undef NDEBUG
# if defined(_MSC_VER) && !defined(_DEBUG)
# define _DEBUG
# endif
# include <assert.h>
#define RPMALLOC_TOSTRING_M(x) #x
#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x)
#define rpmalloc_assert(truth, message) \
do { \
if (!(truth)) { \
if (_memory_config.error_callback) { \
_memory_config.error_callback( \
message " (" RPMALLOC_TOSTRING(truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__)); \
} else { \
assert((truth) && message); \
} \
} \
} while (0)
#else
# define rpmalloc_assert(truth, message) do {} while(0)
#endif
#if defined(__GNUC__)
#define EXPECTED(x) __builtin_expect((x), 1)
#define UNEXPECTED(x) __builtin_expect((x), 0)
#else
#define EXPECTED(x) (x)
#define UNEXPECTED(x) (x)
#endif
///
/// Preconfigured limits and sizes
///
//! Granularity of a small allocation block (must be power of two)
#define SMALL_GRANULARITY 16
//! Small granularity shift count
#define SMALL_GRANULARITY_SHIFT 4
//! Number of small block size classes
#define SMALL_CLASS_COUNT 65
//! Maximum size of a small block
#define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
//! Granularity of a medium allocation block
#define MEDIUM_GRANULARITY 512
//! Medium granularity shift count
#define MEDIUM_GRANULARITY_SHIFT 9
//! Number of medium block size classes
#define MEDIUM_CLASS_COUNT 61
//! Total number of small + medium size classes
#define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
//! Number of large block size classes
#define LARGE_CLASS_COUNT 63
//! Maximum size of a medium block
#define MEDIUM_SIZE_LIMIT (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
//! Maximum size of a large block
#define LARGE_SIZE_LIMIT ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two)
#define SPAN_HEADER_SIZE 128
//! Number of spans in thread cache
#define MAX_THREAD_SPAN_CACHE 400
//! Number of spans to transfer between thread and global cache
#define THREAD_SPAN_CACHE_TRANSFER 64
//! Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2)
#define MAX_THREAD_SPAN_LARGE_CACHE 100
//! Number of spans to transfer between thread and global cache for large spans
#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6
#define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
#define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second))
#define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
#define SIZE_CLASS_HUGE ((uint32_t)-1)
////////////
///
/// Data types
///
//////
//! A memory heap, per thread
typedef struct heap_t heap_t;
//! Span of memory pages
typedef struct span_t span_t;
//! Span list
typedef struct span_list_t span_list_t;
//! Span active data
typedef struct span_active_t span_active_t;
//! Size class definition
typedef struct size_class_t size_class_t;
//! Global cache
typedef struct global_cache_t global_cache_t;
//! Flag indicating span is the first (master) span of a split superspan
#define SPAN_FLAG_MASTER 1U
//! Flag indicating span is a secondary (sub) span of a split superspan
#define SPAN_FLAG_SUBSPAN 2U
//! Flag indicating span has blocks with increased alignment
#define SPAN_FLAG_ALIGNED_BLOCKS 4U
//! Flag indicating an unmapped master span
#define SPAN_FLAG_UNMAPPED_MASTER 8U
// A span can either represent a single span of memory pages with size declared by span_map_count configuration variable,
// or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single
// span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first
// (super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans
// that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire
// superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released
// in the same call to release the virtual memory range, but individual subranges can be decommitted individually
// to reduce physical memory use).
struct span_t {
//! Free list
void* free_list;
//! Total block count of size class
uint32_t block_count;
//! Size class
uint32_t size_class;
//! Index of last block initialized in free list
uint32_t free_list_limit;
//! Number of used blocks remaining when in partial state
uint32_t used_count;
//! Deferred free list
void* free_list_deferred;
//! Size of deferred free list, or list of spans when part of a cache list
uint32_t list_size;
//! Size of a block
uint32_t block_size;
//! Flags and counters
uint32_t flags;
//! Number of spans
uint32_t span_count;
//! Total span counter for master spans
uint32_t total_spans;
//! Offset from master span for subspans
uint32_t offset_from_master;
//! Remaining span counter, for master spans
int32_t remaining_spans;
//! Alignment offset
uint32_t align_offset;
//! Owning heap
heap_t* heap;
//! Next span
span_t* next;
//! Previous span
span_t* prev;
};
struct span_cache_t {
size_t count;
span_t* span[MAX_THREAD_SPAN_CACHE];
};
typedef struct span_cache_t span_cache_t;
struct span_large_cache_t {
size_t count;
span_t* span[MAX_THREAD_SPAN_LARGE_CACHE];
};
typedef struct span_large_cache_t span_large_cache_t;
struct heap_size_class_t {
//! Free list of active span
void* free_list;
//! Double linked list of partially used spans with free blocks.
// Previous span pointer in head points to tail span of list.
span_t* partial_span;
//! Early level cache of fully free spans
span_t* cache;
};
typedef struct heap_size_class_t heap_size_class_t;
// Control structure for a heap, either a thread heap or a first class heap if enabled
struct heap_t {
//! Owning thread ID
uintptr_t owner_thread;
//! Free lists for each size class
heap_size_class_t size_class[SIZE_CLASS_COUNT];
//! Arrays of fully freed spans, single span
span_cache_t span_cache;
//! List of deferred free spans (single linked list)
void* span_free_deferred;
//! Number of full spans
size_t full_span_count;
//! Mapped but unused spans
span_t* span_reserve;
//! Master span for mapped but unused spans
span_t* span_reserve_master;
//! Number of mapped but unused spans
uint32_t spans_reserved;
//! Child count
int32_t child_count;
//! Next heap in id list
heap_t* next_heap;
//! Next heap in orphan list
heap_t* next_orphan;
//! Heap ID
int32_t id;
//! Finalization state flag
int finalize;
//! Master heap owning the memory pages
heap_t* master_heap;
//! Arrays of fully freed spans, large spans with > 1 span count
span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1];
};
// Size class for defining a block size bucket
struct size_class_t {
//! Size of blocks in this class
uint32_t block_size;
//! Number of blocks in each chunk
uint16_t block_count;
//! Class index this class is merged with
uint16_t class_idx;
};
struct global_cache_t {
//! Cache lock
int32_t lock;
//! Cache count
uint32_t count;
//! Cached spans
span_t* span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE];
//! Unlimited cache overflow
span_t* overflow;
};
////////////
///
/// Global data
///
//////
//! Default span size (64KiB)
#define _memory_default_span_size (64 * 1024)
#define _memory_default_span_size_shift 16
#define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1)))
//! Initialized flag
static int _rpmalloc_initialized;
//! Configuration
static rpmalloc_config_t _memory_config;
//! Memory page size
static size_t _memory_page_size;
//! Shift to divide by page size
static size_t _memory_page_size_shift;
//! Granularity at which memory pages are mapped by OS
static size_t _memory_map_granularity;
//! Hardwired span size
#define _memory_span_size _memory_default_span_size
#define _memory_span_size_shift _memory_default_span_size_shift
#define _memory_span_mask _memory_default_span_mask
//! Number of spans to map in each map call
static size_t _memory_span_map_count;
//! Number of spans to keep reserved in each heap
static size_t _memory_heap_reserve_count;
//! Global size classes
static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
//! Run-time size limit of medium blocks
static size_t _memory_medium_size_limit;
//! Heap ID counter
static int32_t _memory_heap_id;
//! Global reserved spans
static span_t* _memory_global_reserve;
//! Global reserved count
static size_t _memory_global_reserve_count;
//! Global reserved master
static span_t* _memory_global_reserve_master;
//! All heaps
static heap_t* _memory_heaps[HEAP_ARRAY_SIZE];
//! Orphaned heaps
static heap_t* _memory_orphan_heaps;
////////////
///
/// Thread local heap and ID
///
//////
//! Current thread heap
static heap_t* _memory_thread_heap;
//! Set the current thread heap
static void
set_thread_heap(heap_t* heap) {
_memory_thread_heap = heap;
if (heap)
heap->owner_thread = 0;
}
////////////
///
/// Low level memory map/unmap
///
//////
//! Map more virtual memory
// size is number of bytes to map
// offset receives the offset in bytes from start of mapped region
// returns address to start of mapped region to use
static void*
_rpmalloc_mmap(size_t size, size_t* offset) {
rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size");
rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
return _memory_config.memory_map(size, offset);
}
//! Unmap virtual memory
// address is the memory address to unmap, as returned from _memory_map
// size is the number of bytes to unmap, which might be less than full region for a partial unmap
// offset is the offset in bytes to the actual mapped region, as set by _memory_map
// release is set to 0 for partial unmap, or size of entire range for a full unmap
static void
_rpmalloc_unmap(void* address, size_t size, size_t offset, size_t release) {
rpmalloc_assert(!release || (release >= size), "Invalid unmap size");
rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size");
if (release) {
rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size");
}
_memory_config.memory_unmap(address, size, offset, release);
}
//! Default implementation to map new pages to virtual memory
static void*
_rpmalloc_mmap_os(size_t size, size_t* offset) {
//Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity
size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0;
rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
#if PLATFORM_WINDOWS
//Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed"
void* ptr = VirtualAlloc(0, size + padding, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
if (!ptr) {
if (_memory_config.map_fail_callback) {
if (_memory_config.map_fail_callback(size + padding))
return _rpmalloc_mmap_os(size, offset);
} else {
rpmalloc_assert(ptr, "Failed to map virtual memory block");
}
return 0;
}
#else
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
# if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
int fd = (int)VM_MAKE_TAG(240U);
void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
# else
void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
# endif
if ((ptr == MAP_FAILED) || !ptr) {
if (_memory_config.map_fail_callback) {
if (_memory_config.map_fail_callback(size + padding))
return _rpmalloc_mmap_os(size, offset);
} else if (errno != ENOMEM) {
rpmalloc_assert((ptr != MAP_FAILED) && ptr, "Failed to map virtual memory block");
}
return 0;
}
#endif
if (padding) {
size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
rpmalloc_assert(final_padding <= _memory_span_size, "Internal failure in padding");
rpmalloc_assert(final_padding <= padding, "Internal failure in padding");
rpmalloc_assert(!(final_padding % 8), "Internal failure in padding");
ptr = pointer_offset(ptr, final_padding);
*offset = final_padding >> 3;
}
rpmalloc_assert((size < _memory_span_size) || !((uintptr_t)ptr & ~_memory_span_mask), "Internal failure in padding");
return ptr;
}
//! Default implementation to unmap pages from virtual memory
static void
_rpmalloc_unmap_os(void* address, size_t size, size_t offset, size_t release) {
rpmalloc_assert(release || (offset == 0), "Invalid unmap size");
rpmalloc_assert(!release || (release >= _memory_page_size), "Invalid unmap size");
rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size");
if (release && offset) {
offset <<= 3;
address = pointer_offset(address, -(int32_t)offset);
if ((release >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) {
//Padding is always one span size
release += _memory_span_size;
}
}
#if PLATFORM_WINDOWS
if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) {
rpmalloc_assert(0, "Failed to unmap virtual memory block");
}
#else
if (release) {
if (munmap(address, release)) {
rpmalloc_assert(0, "Failed to unmap virtual memory block");
}
} else {
#if defined(MADV_FREE_REUSABLE)
int ret;
while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 && (errno == EAGAIN))
errno = 0;
if ((ret == -1) && (errno != 0)) {
#elif defined(MADV_DONTNEED)
if (madvise(address, size, MADV_DONTNEED)) {
#elif defined(MADV_PAGEOUT)
if (madvise(address, size, MADV_PAGEOUT)) {
#elif defined(MADV_FREE)
if (madvise(address, size, MADV_FREE)) {
#else
if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
#endif
rpmalloc_assert(0, "Failed to madvise virtual memory block as free");
}
}
#endif
}
static void
_rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count);
//! Use global reserved spans to fulfill a memory map request (reserve size must be checked by caller)
static span_t*
_rpmalloc_global_get_reserved_spans(size_t span_count) {
span_t* span = _memory_global_reserve;
_rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, span, span_count);
_memory_global_reserve_count -= span_count;
if (_memory_global_reserve_count)
_memory_global_reserve = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift);
else
_memory_global_reserve = 0;
return span;
}
//! Store the given spans as global reserve (must only be called from within new heap allocation, not thread safe)
static void
_rpmalloc_global_set_reserved_spans(span_t* master, span_t* reserve, size_t reserve_span_count) {
_memory_global_reserve_master = master;
_memory_global_reserve_count = reserve_span_count;
_memory_global_reserve = reserve;
}
////////////
///
/// Span linked list management
///
//////
//! Add a span to double linked list at the head
static void
_rpmalloc_span_double_link_list_add(span_t** head, span_t* span) {
if (*head)
(*head)->prev = span;
span->next = *head;
*head = span;
}
//! Pop head span from double linked list
static void
_rpmalloc_span_double_link_list_pop_head(span_t** head, span_t* span) {
rpmalloc_assert(*head == span, "Linked list corrupted");
span = *head;
*head = span->next;
}
//! Remove a span from double linked list
static void
_rpmalloc_span_double_link_list_remove(span_t** head, span_t* span) {
rpmalloc_assert(*head, "Linked list corrupted");
if (*head == span) {
*head = span->next;
} else {
span_t* next_span = span->next;
span_t* prev_span = span->prev;
prev_span->next = next_span;
if (EXPECTED(next_span != 0))
next_span->prev = prev_span;
}
}
////////////
///
/// Span control
///
//////
static void
_rpmalloc_heap_cache_insert(heap_t* heap, span_t* span);
static void
_rpmalloc_heap_finalize(heap_t* heap);
static void
_rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count);
//! Declare the span to be a subspan and store distance from master span and span count
static void
_rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count) {
rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER), "Span master pointer and/or flag mismatch");
if (subspan != master) {
subspan->flags = SPAN_FLAG_SUBSPAN;
subspan->offset_from_master = (uint32_t)((uintptr_t)pointer_diff(subspan, master) >> _memory_span_size_shift);
subspan->align_offset = 0;
}
subspan->span_count = (uint32_t)span_count;
}
//! Use reserved spans to fulfill a memory map request (reserve size must be checked by caller)
static span_t*
_rpmalloc_span_map_from_reserve(heap_t* heap, size_t span_count) {
//Update the heap span reserve
span_t* span = heap->span_reserve;
heap->span_reserve = (span_t*)pointer_offset(span, span_count * _memory_span_size);
heap->spans_reserved -= (uint32_t)span_count;
_rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span, span_count);
return span;
}
//! Get the aligned number of spans to map in based on wanted count, configured mapping granularity and the page size
static size_t
_rpmalloc_span_align_count(size_t span_count) {
size_t request_count = (span_count > _memory_span_map_count) ? span_count : _memory_span_map_count;
if ((_memory_page_size > _memory_span_size) && ((request_count * _memory_span_size) % _memory_page_size))
request_count += _memory_span_map_count - (request_count % _memory_span_map_count);
return request_count;
}
//! Setup a newly mapped span
static void
_rpmalloc_span_initialize(span_t* span, size_t total_span_count, size_t span_count, size_t align_offset) {
span->total_spans = (uint32_t)total_span_count;
span->span_count = (uint32_t)span_count;
span->align_offset = (uint32_t)align_offset;
span->flags = SPAN_FLAG_MASTER;
span->remaining_spans = (int32_t)total_span_count;
}
static void
_rpmalloc_span_unmap(span_t* span);
//! Map an aligned set of spans, taking configured mapping granularity and the page size into account
static span_t*
_rpmalloc_span_map_aligned_count(heap_t* heap, size_t span_count) {
//If we already have some, but not enough, reserved spans, release those to heap cache and map a new
//full set of spans. Otherwise we would waste memory if page size > span size (huge pages)
size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
size_t align_offset = 0;
span_t* span = (span_t*)_rpmalloc_mmap(aligned_span_count * _memory_span_size, &align_offset);
if (!span)
return 0;
_rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
if (aligned_span_count > span_count) {
span_t* reserved_spans = (span_t*)pointer_offset(span, span_count * _memory_span_size);
size_t reserved_count = aligned_span_count - span_count;
if (heap->spans_reserved) {
_rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
_rpmalloc_heap_cache_insert(heap, heap->span_reserve);
}
if (reserved_count > _memory_heap_reserve_count) {
size_t remain_count = reserved_count - _memory_heap_reserve_count;
reserved_count = _memory_heap_reserve_count;
span_t* remain_span = (span_t*)pointer_offset(reserved_spans, reserved_count * _memory_span_size);
if (_memory_global_reserve) {
_rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master, _memory_global_reserve, _memory_global_reserve_count);
_rpmalloc_span_unmap(_memory_global_reserve);
}
_rpmalloc_global_set_reserved_spans(span, remain_span, remain_count);
}
_rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans, reserved_count);
}
return span;
}
//! Map in memory pages for the given number of spans (or use previously reserved pages)
static span_t*
_rpmalloc_span_map(heap_t* heap, size_t span_count) {
if (span_count <= heap->spans_reserved)
return _rpmalloc_span_map_from_reserve(heap, span_count);
span_t* span = 0;
int use_global_reserve = (_memory_page_size > _memory_span_size) || (_memory_span_map_count > _memory_heap_reserve_count);
if (use_global_reserve) {
if (_memory_global_reserve_count >= span_count) {
size_t reserve_count = (!heap->spans_reserved ? _memory_heap_reserve_count : span_count);
if (_memory_global_reserve_count < reserve_count)
reserve_count = _memory_global_reserve_count;
span = _rpmalloc_global_get_reserved_spans(reserve_count);
if (span) {
if (reserve_count > span_count) {
span_t* reserved_span = (span_t*)pointer_offset(span, span_count << _memory_span_size_shift);
_rpmalloc_heap_set_reserved_spans(heap, _memory_global_reserve_master, reserved_span, reserve_count - span_count);
}
// Already marked as subspan in _rpmalloc_global_get_reserved_spans
span->span_count = (uint32_t)span_count;
}
}
}
if (!span)
span = _rpmalloc_span_map_aligned_count(heap, span_count);
return span;
}
//! Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings)
static void
_rpmalloc_span_unmap(span_t* span) {
rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted");
rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted");
int is_master = !!(span->flags & SPAN_FLAG_MASTER);
span_t* master = is_master ? span : ((span_t*)pointer_offset(span, -(intptr_t)((uintptr_t)span->offset_from_master * _memory_span_size)));
rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted");
rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
size_t span_count = span->span_count;
if (!is_master) {
//Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master)
rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted");
if (_memory_span_size >= _memory_page_size)
_rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
} else {
//Special double flag to denote an unmapped master
//It must be kept in memory since span header must be used
span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN | SPAN_FLAG_UNMAPPED_MASTER;
}
master->remaining_spans -= (int32_t)span_count;
if (master->remaining_spans <= 0) {
//Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span
rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) && !!(master->flags & SPAN_FLAG_SUBSPAN), "Span flag corrupted");
size_t unmap_count = master->span_count;
if (_memory_span_size < _memory_page_size)
unmap_count = master->total_spans;
_rpmalloc_unmap(master, unmap_count * _memory_span_size, master->align_offset, (size_t)master->total_spans * _memory_span_size);
}
}
//! Move the span (used for small or medium allocations) to the heap thread cache
static void
_rpmalloc_span_release_to_cache(heap_t* heap, span_t* span) {
rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted");
rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT, "Invalid span size class");
rpmalloc_assert(span->span_count == 1, "Invalid span count");
if (!heap->finalize) {
if (heap->size_class[span->size_class].cache)
_rpmalloc_heap_cache_insert(heap, heap->size_class[span->size_class].cache);
heap->size_class[span->size_class].cache = span;
} else {
_rpmalloc_span_unmap(span);
}
}
//! Initialize a (partial) free list up to next system memory page, while reserving the first block
//! as allocated, returning number of blocks in list
static uint32_t
free_list_partial_init(void** list, void** first_block, void* page_start, void* block_start, uint32_t block_count, uint32_t block_size) {
rpmalloc_assert(block_count, "Internal failure");
*first_block = block_start;
if (block_count > 1) {
void* free_block = pointer_offset(block_start, block_size);
void* block_end = pointer_offset(block_start, (size_t)block_size * block_count);
//If block size is less than half a memory page, bound init to next memory page boundary
if (block_size < (_memory_page_size >> 1)) {
void* page_end = pointer_offset(page_start, _memory_page_size);
if (page_end < block_end)
block_end = page_end;
}
*list = free_block;
block_count = 2;
void* next_block = pointer_offset(free_block, block_size);
while (next_block < block_end) {
*((void**)free_block) = next_block;
free_block = next_block;
++block_count;
next_block = pointer_offset(next_block, block_size);
}
*((void**)free_block) = 0;
} else {
*list = 0;
}
return block_count;
}
//! Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list in heap class free list
static void*
_rpmalloc_span_initialize_new(heap_t* heap, heap_size_class_t* heap_size_class, span_t* span, uint32_t class_idx) {
rpmalloc_assert(span->span_count == 1, "Internal failure");
size_class_t* size_class = _memory_size_class + class_idx;
span->size_class = class_idx;
span->heap = heap;
span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
span->block_size = size_class->block_size;
span->block_count = size_class->block_count;
span->free_list = 0;
span->list_size = 0;
span->free_list_deferred = 0;
//Setup free list. Only initialize one system page worth of free blocks in list
void* block;
span->free_list_limit = free_list_partial_init(&heap_size_class->free_list, &block,
span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size);
//Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized
if (span->free_list_limit < span->block_count) {
_rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span);
span->used_count = span->free_list_limit;
} else {
++heap->full_span_count;
span->used_count = span->block_count;
}
return block;
}
static void
_rpmalloc_span_extract_free_list_deferred(span_t* span) {
// We need acquire semantics on the CAS operation since we are interested in the list size
// Refer to _rpmalloc_deallocate_defer_small_or_medium for further comments on this dependency
span->free_list = span->free_list_deferred;
span->used_count -= span->list_size;
span->list_size = 0;
span->free_list_deferred = 0;
}
static int
_rpmalloc_span_is_fully_utilized(span_t* span) {
rpmalloc_assert(span->free_list_limit <= span->block_count, "Span free list corrupted");
return !span->free_list && (span->free_list_limit >= span->block_count);
}
static int
_rpmalloc_span_finalize(heap_t* heap, size_t iclass, span_t* span, span_t** list_head) {
void* free_list = heap->size_class[iclass].free_list;
span_t* class_span = (span_t*)((uintptr_t)free_list & _memory_span_mask);
if (span == class_span) {
// Adopt the heap class free list back into the span free list
void* block = span->free_list;
void* last_block = 0;
while (block) {
last_block = block;
block = *((void**)block);
}
uint32_t free_count = 0;
block = free_list;
while (block) {
++free_count;
block = *((void**)block);
}
if (last_block) {
*((void**)last_block) = free_list;
} else {
span->free_list = free_list;
}
heap->size_class[iclass].free_list = 0;
span->used_count -= free_count;
}
//If this assert triggers you have memory leaks
rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected");
if (span->list_size == span->used_count) {
// This function only used for spans in double linked lists
if (list_head)
_rpmalloc_span_double_link_list_remove(list_head, span);
_rpmalloc_span_unmap(span);
return 1;
}
return 0;
}
////////////
///
/// Heap control
///
//////
static void
_rpmalloc_deallocate_huge(span_t*);
//! Store the given spans as reserve in the given heap
static void
_rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count) {
heap->span_reserve_master = master;
heap->span_reserve = reserve;
heap->spans_reserved = (uint32_t)reserve_span_count;
}
//! Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use
static void
_rpmalloc_heap_cache_adopt_deferred(heap_t* heap, span_t** single_span) {
span_t* span = (span_t*)((void*)heap->span_free_deferred);
while (span) {
span_t* next_span = (span_t*)span->free_list;
rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted");
if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
--heap->full_span_count;
if (single_span && !*single_span)
*single_span = span;
else
_rpmalloc_heap_cache_insert(heap, span);
} else {
if (span->size_class == SIZE_CLASS_HUGE) {
_rpmalloc_deallocate_huge(span);
} else {
rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Span size class invalid");
rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
--heap->full_span_count;
uint32_t idx = span->span_count - 1;
if (!idx && single_span && !*single_span)
*single_span = span;
else
_rpmalloc_heap_cache_insert(heap, span);
}
}
span = next_span;
}
}
static void
_rpmalloc_heap_unmap(heap_t* heap) {
if (!heap->master_heap) {
if ((heap->finalize > 1) && !heap->child_count) {
span_t* span = (span_t*)((uintptr_t)heap & _memory_span_mask);
_rpmalloc_span_unmap(span);
}
} else {
heap->master_heap->child_count -= 1;
if (heap->master_heap->child_count == 0) {
_rpmalloc_heap_unmap(heap->master_heap);
}
}
}
static void
_rpmalloc_heap_global_finalize(heap_t* heap) {
if (heap->finalize++ > 1) {
--heap->finalize;
return;
}
_rpmalloc_heap_finalize(heap);
for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
span_cache_t* span_cache;
if (!iclass)
span_cache = &heap->span_cache;
else
span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1));
for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
_rpmalloc_span_unmap(span_cache->span[ispan]);
span_cache->count = 0;
}
if (heap->full_span_count) {
--heap->finalize;
return;
}
for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
if (heap->size_class[iclass].free_list || heap->size_class[iclass].partial_span) {
--heap->finalize;
return;
}
}
//Heap is now completely free, unmap and remove from heap list
size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
heap_t* list_heap = _memory_heaps[list_idx];
if (list_heap == heap) {
_memory_heaps[list_idx] = heap->next_heap;
} else {
while (list_heap->next_heap != heap)
list_heap = list_heap->next_heap;
list_heap->next_heap = heap->next_heap;
}
_rpmalloc_heap_unmap(heap);
}
//! Insert a single span into thread heap cache, releasing to global cache if overflow
static void
_rpmalloc_heap_cache_insert(heap_t* heap, span_t* span) {
if (UNEXPECTED(heap->finalize != 0)) {
_rpmalloc_span_unmap(span);
_rpmalloc_heap_global_finalize(heap);
return;
}
size_t span_count = span->span_count;
if (span_count == 1) {
span_cache_t* span_cache = &heap->span_cache;
span_cache->span[span_cache->count++] = span;
if (span_cache->count == MAX_THREAD_SPAN_CACHE) {
const size_t remain_count = MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER;
for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan)
_rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
span_cache->count = remain_count;
}
} else {
size_t cache_idx = span_count - 2;
span_large_cache_t* span_cache = heap->span_large_cache + cache_idx;
span_cache->span[span_cache->count++] = span;
const size_t cache_limit = (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
if (span_cache->count == cache_limit) {
const size_t transfer_limit = 2 + (cache_limit >> 2);
const size_t transfer_count = (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit ? THREAD_SPAN_LARGE_CACHE_TRANSFER : transfer_limit);
const size_t remain_count = cache_limit - transfer_count;
for (size_t ispan = 0; ispan < transfer_count; ++ispan)
_rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
span_cache->count = remain_count;
}
}
}
//! Extract the given number of spans from the different cache levels
static span_t*
_rpmalloc_heap_thread_cache_extract(heap_t* heap, size_t span_count) {
span_t* span = 0;
span_cache_t* span_cache;
if (span_count == 1)
span_cache = &heap->span_cache;
else
span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2));
if (span_cache->count) {
return span_cache->span[--span_cache->count];
}
return span;
}