4
\$\begingroup\$

I'm trying to implment memory allocator guided by this tutorial. I used a mix of Next-fit search and Segregated-list search.
There are multiple slabs of different sizes (a slab is contagious blocks of memory of the same size, plus a header). If a slab ran out of free blocks, it allocates a new slab of the same size and link it to the current slab. free blocks are tracked using a bitmap in the header of each slab.

  • How is my design in terms of memory & speed?

  • Is there any way to determine which slab to free a block from, without knowing the size ? the current approach is to ask all the slabs to free the block, the one who owns that block will free it.

  • What's the best way to deal with big sizes of memory (bigger than ones of slabs)

  • How can I write some unit tests to this? it's hard to figure whether the address returned is valid or not.

    malloc.cpp

      #include "slab_allocator.h"
    
      const size_t PAGE_SIZE = 0x1000;
      static Slab<0x010, PAGE_SIZE> slab_0x10;
      static Slab<0x020, PAGE_SIZE> slab_0x20;
      static Slab<0x040, PAGE_SIZE> slab_0x40;
      static Slab<0x060, PAGE_SIZE> slab_0x60;
      static Slab<0x100, PAGE_SIZE> slab_0x100;
      static Slab<0x200, PAGE_SIZE> slab_0x200;
      static Slab<0x300, PAGE_SIZE> slab_0x300;
    
      void init() {
          slab_0x10.init();
          slab_0x20.init();
          slab_0x40.init();
          slab_0x60.init();
          slab_0x100.init();
          slab_0x200.init();
          slab_0x300.init();
      }
    
      void* custom_malloc(size_t size) {
          if (size < 0x10) {
              return slab_0x10.alloc();
          } else if (size < 0x20) {
              return slab_0x10.alloc();
          } else if (size < 0x40) {
              return slab_0x40.alloc();
          } else if (size < 0x60) {
              return slab_0x60.alloc();
          } else if (size < 0x100) {
              return slab_0x100.alloc();
          } else if (size < 0x200) {
              return slab_0x200.alloc();
          } else if (size < 0x500) {
              return slab_0x300.alloc();
          } else {
              return nullptr;
          }
      }
    
      void custom_free(void* address) {
          slab_0x10.free(address);
          slab_0x20.free(address);
          slab_0x40.free(address);
          slab_0x60.free(address);
          slab_0x100.free(address);
          slab_0x200.free(address);
          slab_0x300.free(address);
      }
    

slab_allocator.h:

    #pragma once
    #include "bitmap.h"
    #include <cstdint>
    #include <Windows.h>

    template<size_t slab_size, size_t memory_size> class Slab;
    template<size_t slab_size, size_t memory_size, size_t max_blocks = memory_size / slab_size> struct SlabHeader {
        Slab<slab_size, memory_size>* prev, * next;
        Bitmap<max_blocks> mem_map;
        size_t free_blocks;
        size_t next_fit_block;
    };

    template<size_t slab_size, size_t memory_size> class Slab {
    private:
        const static size_t MAX_HEADER_SIZE = sizeof(SlabHeader<slab_size, memory_size>);
        const static size_t MAX_BLOCKS = (memory_size - MAX_HEADER_SIZE) / slab_size;
        static_assert(memory_size > MAX_HEADER_SIZE);
        static_assert((slab_size + MAX_HEADER_SIZE) <= memory_size);

        SlabHeader<slab_size, memory_size, MAX_BLOCKS> header;
        char blocks[MAX_BLOCKS][slab_size];

        bool is_address_in_slab(void* address);
        void* alloc_in_current_slab(size_t block_index);
        void* alloc_in_new_slab();
        void free_from_current_slab(size_t block_index);
        void free_from_next_slab(void* address);
        void* request_memory_from_os(size_t size);
        void free_memory_to_os(void* addrss, size_t size);

    public:
        void init(Slab* prev = nullptr);
        void* alloc();
        void free(void* address);
    };

    template<size_t slab_size, size_t memory_size>
    void Slab<slab_size, memory_size>::init(Slab* prev) {
        header.prev = prev;
        header.next = nullptr;
        header.free_blocks = MAX_BLOCKS;
        header.next_fit_block = 0;
        header.mem_map.init();
    }

    template<size_t slab_size, size_t memory_size>
    void* Slab<slab_size, memory_size>::alloc() {
        size_t block_index = -1;
        if (header.free_blocks &&
            ((block_index = header.mem_map.find_unused(header.next_fit_block)) != BITMAP_NO_BITS_LEFT)) {
            return alloc_in_current_slab(block_index);
        } else {
            return alloc_in_new_slab();
        }

    }

    template<size_t slab_size, size_t memory_size>
    void Slab<slab_size, memory_size>::free(void* address) {
        if (is_address_in_slab(address) == false) {
            return free_from_next_slab(address);
        }
        size_t block_index = (uintptr_t(address) - uintptr_t(blocks)) / slab_size;
        assert(header.mem_map.check_used(block_index));
        free_from_current_slab(block_index);
    }

    template<size_t slab_size, size_t memory_size>
    bool Slab<slab_size, memory_size>::is_address_in_slab(void* address) {
        if ((address >= blocks) && (address <= &blocks[MAX_BLOCKS - 1][slab_size - 1])) {
            return true;
        } else {
            return false;
        }
    }

    template<size_t slab_size, size_t memory_size>
    void* Slab<slab_size, memory_size>::alloc_in_new_slab() {
        Slab* new_slab = static_cast<Slab*>(request_memory_from_os(sizeof(Slab)));
        if (!new_slab) {
            return nullptr;
        }
        new_slab->init(this);
        header.next = new_slab;
        return new_slab->alloc();
    }

    template<size_t slab_size, size_t memory_size>
    void* Slab<slab_size, memory_size>::alloc_in_current_slab(size_t block_index) {
        header.mem_map.set_used(block_index);
        header.next_fit_block = (block_index + 1) % MAX_BLOCKS;
        header.free_blocks--;
        return static_cast<void*>(blocks[block_index]);
    }

    template<size_t slab_size, size_t memory_size>
    void Slab<slab_size, memory_size>::free_from_current_slab(size_t block_index) {
        header.mem_map.set_unused(block_index);
        header.next_fit_block = block_index;
        header.free_blocks++;

        if ((header.free_blocks == 0) && (header.prev)) {
            //slab is empty, and it's not the first;
            header.prev->header.next = nullptr;
            free_memory_to_os(this, sizeof(Slab));
            //The slab committed suicide, don't ever use it again!
        }
    }

    template<size_t slab_size, size_t memory_size>
    void Slab<slab_size, memory_size>::free_from_next_slab(void* address) {
        if (header.next) {//if there is another slab in the list check on it too.
            header.next->free(address);
            return;
        } else {
            //address doesn't belong any slab.
            return;
        }
    }

    template<size_t slab_size, size_t memory_size>
    void* Slab<slab_size, memory_size>::request_memory_from_os(size_t size) {
        //system dependent function, returns aligned memory region.
        return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
    }

    template<size_t slab_size, size_t memory_size>
    void Slab<slab_size, memory_size>::free_memory_to_os(void* addrss, size_t size) {
        //system dependent function, returns aligned memory region.
        VirtualFree(addrss, size, MEM_FREE);
    }

Bitmap.h (not really important)

    #pragma once
    #include <cstdint>
    #include <assert.h>
    #include <cstring>

    #define CHECK_BIT(value, bit) ((value >> bit) & 1)
    #define BITMAP_NO_BITS_LEFT   0xFFFFFFFF

    template <size_t SIZE> class Bitmap {
    private:
        uint8_t m_bitmap_data[SIZE];

    public:
        void init();
        void set_used(unsigned position);
        void set_unused(unsigned position);
        unsigned find_unused(unsigned search_start = 0);
        unsigned find_used(unsigned search_start = 0);
        bool check_used(unsigned position);
        bool check_unused(unsigned position);
    };


    template <size_t SIZE> void Bitmap<SIZE>::init() {
        memset(m_bitmap_data, 0, sizeof(m_bitmap_data));
    }


    template <size_t SIZE> void Bitmap<SIZE>::set_used(unsigned position) {
        assert(position < SIZE);
        m_bitmap_data[position / 8] |= (1 << (position % 8));
    }

    template <size_t SIZE> void Bitmap<SIZE>::set_unused(unsigned position) {
        assert(position < SIZE);
        m_bitmap_data[position / 8] &= ~(1 << (position % 8));
    }

    template <size_t SIZE> unsigned Bitmap<SIZE>::find_unused(unsigned search_start) {
        assert(search_start < SIZE);
        size_t bit_index = search_start;
        while (bit_index < SIZE) {
            if (m_bitmap_data[bit_index / 8] == 0xFF) {
                bit_index += 8;
                continue;
            }
            if (!CHECK_BIT(m_bitmap_data[bit_index / 8], bit_index % 8))
                return bit_index;

            bit_index++;
        }
        return BITMAP_NO_BITS_LEFT;
    }

    template <size_t SIZE> unsigned Bitmap<SIZE>::find_used(unsigned search_start) {
        assert(search_start < SIZE);
        size_t bit_index = search_start;
        while (bit_index < SIZE) {
            if (m_bitmap_data[bit_index / 8] == 0) {
                bit_index += 8;
                continue;
            }
            if (CHECK_BIT(m_bitmap_data[bit_index / 8], bit_index % 8))
                return bit_index;

            bit_index++;
        }
        return BITMAP_NO_BITS_LEFT;
    }

    template <size_t SIZE> bool Bitmap<SIZE>::check_used(unsigned position) {
        return CHECK_BIT(m_bitmap_data[position / 8], position % 8);
    }

    template <size_t SIZE> bool Bitmap<SIZE>::check_unused(unsigned position) {
         return !CHECK_BIT(m_bitmap_data[position / 8], position % 8);
    }
\$\endgroup\$

1 Answer 1

3
\$\begingroup\$

Answers to your questions

How is my design in terms of memory & speed?

That's easy: measure it! Create some workload that allocates and frees memory, and time how long it takes. There are also operating system functions that can tell you how much memory your program is using, for example getrusage() on Linux. Have two versions, one using your slab allocator, and another using regular malloc()/free(), new/delete, or whatever way you have to get memory from the operating system, and check the difference in performance.

Is there any way to determine which slab to free a block from, without knowing the size ? the current approach is to ask all the slabs to free the block, the one who owns that block will free it.

One way is to have a little header allocated right before the memory region returned by alloc(), and store some metadata in that header, such as the pointer to the slab allocator object itself. Another option is to ensure slabs are always naturally aligned in memory, so you can quickly get a pointer to the start of the slab, regardless of where in the slab the allocation came from.

But often, the caller of custom_free() will actually know the size of the object it is freeing. So it makes sense to add a size parameter to custom_free(), so it can do the same thing you do in custom_malloc() to find the right slab object to free from.

What's the best way to deal with big sizes of memory (bigger than ones of slabs)

Then you just fall back to a regular malloc() or new.

How can I write some unit tests to this? it's hard to figure whether the address returned is valid or not.

One possibility is to just write to the allocated memory in the unit test, and then compile the unit tests with the AddressSanitizer enabled. Alternatively, run the unit test inside Valgrind to catch writes to invalid addresses.

Use uint64_t for the bitmap array

Unless you are specifically writing this code to run on an 8-bit microcontroller, I suggest you use uint64_t for the bitmap array. The reason is that computers nowadays have 64-bit registers, and operations on 64-bits at a time are as fast or sometimes even faster than on 8-bits at a time.

Use hardware instructions to find the first set bit in a bitmap

Most processors have instructions to find the first set bit in an integer. This is perfect for use in Bitmap::find_used(). In C++20, you will be able to use std::countl_zero() and related functions to access that functionality, but if you cannot use C++20 yet, then you might have platform-specific functions such as ffs() or compiler builtins such as GCC's __builtin_clz() to do the same.

Write proper constructors for your classes

You should not have an init() function in your classes, but a proper constructor that performs the initialization. That avoids the possibility that you accidentily forget to call the initializer, or call it twice.

Write a destructor for class Slab

You should write a destructor that cleans up any remaining memory that is in use by a slab when it is destroyed.

\$\endgroup\$
5
  • \$\begingroup\$ Thanks for the tips. I forgot to mention that this allocator will be part of a hobby operating system I'm working on, so I can't have constructor because I will need new to call it which I'm trying to implement in the first place, and I can't assume it will be called if the Slab is a global object since nothing will run before the kernel. I can't use new & malloc for a size bigger than slab's sizes too. \$\endgroup\$
    – 0xDEADC0DE
    Commented Sep 24, 2020 at 20:18
  • \$\begingroup\$ And about getting the pointer of the slab by the address of the block, is it safe (well defined behaviour) to assume that the class will be like a structure and the first address of the of the aligned region will be the address of the first data member of the class ?. But often, the caller of custom_free() will actually know the size of the object it is freeing by looking at delete operator or free of C library, they don't provide the size. \$\endgroup\$
    – 0xDEADC0DE
    Commented Sep 24, 2020 at 20:18
  • \$\begingroup\$ Unless there's inheritance involved, the pointer to the class will be the same as a pointer to the first member. As for delete and free not providing a size: no, because free() doesn't allow it, but delete does since C++17. But that's a generic allocator. You wouldn't normally use a slab allocator to do generic allocations, but rather to handle many allocations of identically sized objects, so then you would know the size at deallocation time. \$\endgroup\$
    – G. Sliepen
    Commented Sep 24, 2020 at 20:48
  • \$\begingroup\$ If I implement a way to find the correct slab just from the address, will it be appropriate for generic allocations ? \$\endgroup\$
    – 0xDEADC0DE
    Commented Sep 24, 2020 at 21:05
  • 1
    \$\begingroup\$ That is hard to say. It really depends on the memory usage pattern of your application. But if you say "generic allocations", then you should know that the algorithm used by malloc() and new is heavily optimized for generic use, so in that case your slab allocator will probably not show any benefit, even if you have a fast way of determining the slab from just an address. \$\endgroup\$
    – G. Sliepen
    Commented Sep 24, 2020 at 21:11

Not the answer you're looking for? Browse other questions tagged or ask your own question.