4
\$\begingroup\$

I'm looking for help speeding up some code I'm tinkering with.

The rest of the code, as well as my 'benchmark', can be found here.

 
#ifndef LALLOC_H
#define LALLOC_H

#define PAGESIZE (1048576)

#include <stdbool.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <errno.h>

typedef struct bblk bblock;
typedef bblock* bb;
struct bblk {
    size_t ind;
    bb next;
    size_t occ;
    char mem[PAGESIZE - (sizeof(size_t) + sizeof(bb) + sizeof(size_t))];
} __attribute__((packed));

typedef struct smmblk memblock;
typedef memblock* mb;
struct smmblk {
    mb prev;
    mb next;
    void* end;
    bb bblk;
    bool free;
} __attribute__((packed));

size_t bbhdr = (sizeof(size_t) + sizeof(bb) + sizeof(size_t));

bb first;

/**
 * @author Lev Knoblock
 * @notice Allocates a 'big block' of memory using mmap and inserts 1 'small block' into it
 * @dev Consider moving away from 1 page of memory. Maybe larger blocks would be better.
 * @param
 * @return bblk *
 */
bb bbinit() {
    bb out = mmap(NULL, PAGESIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
    if (out == MAP_FAILED) {
        printf("%s", sys_errlist[errno]);
        exit(40);
    }

    /* Big blocks are appended to an append-only linked list.
     * Since initially all memory in the block is free, the
     * occupancy is set to 0 */
    out->next = NULL;
    out->occ = 0;

    mb t = (mb) out->mem;
    /* The internal small block has no predecessors or
     * successors, but spans the full block width */
    t->prev = NULL;
    t->next = NULL;
    t->end = out->mem + (PAGESIZE - (sizeof(size_t) + sizeof(bb) + sizeof(size_t)));
    t->free = true;
    t->bblk = out;
    return out;
}

/**
 * @author Lev Knoblock
 * @notice Allocates a slice of memory by creating an appropriately sized small block in a big block
 * @dev Well its somehow slower than the prototype and I swear I knew what was making that one slow
 * @param 'big block' to allocate from, size of slice
 * @return void* to memory, or NULL if no space was found.
 */
static void* bblkalloc(bb blk, size_t size) {
    mb sb = (mb) blk->mem;
    /* Find the first free small block */
    while (1) {
        if (sb->free) break;
        tryGetNext:;
        if (sb->next == NULL) {
            /* Reached end of big block */
            return NULL;
        }
        sb = sb->next;
    }

    /* Remaining space in small block */
    size_t frsize = sb->end - (((void*)sb) + sizeof(memblock));

    /* If there isn't enough space to fit a new small block
     * find another block that will fit one */
    if (frsize < (size + sizeof(memblock))) {
        goto tryGetNext;
    }

    /* Initialize the new small block by stealing
     * space from the end of the 'current' small block */
    mb nsb = sb->end - (sizeof(memblock) + size);
    nsb->prev = sb;
    nsb->next = sb->next;
    nsb->end = sb->end;
    nsb->free = false;
    nsb->bblk = blk;

    /* Append new small block to list */
    sb->end = nsb;
    if (sb->next != NULL) sb->next->prev = nsb;
    sb->next = nsb;

    sb->bblk = blk;
    blk->occ++;
    /* Return pointer past allocation header */
    return ((void*)nsb) + sizeof(memblock);
}

/**
 * @author Lev Knoblock
 * @notice Allocates a slice of memory from the memory pool
 * @dev Currently has no functionality for reducing number of big blocks.
 * @param size of slice
 * @return void*
 */
void* lalloc(size_t size) {
    void* out;
    bb curr = first;
    unsigned int ind = 0;
    do {
        if (curr == NULL) {
            /* If current big block is null, set it up with its first small block */
            curr = bbinit();
            curr->ind = ind;
            if (ind == 0) first = curr;
        }
        /*
        if (curr->occ) {
            curr = curr->next;
            ind++;
            continue;
        }
         */
        out = bblkalloc(curr, size);
        /* If allocation fails go to the next big block (and allocate it if necessary)
         * otherwise, return the valid pointer */
        if (out != NULL) return out;
        //curr->occ = 1;
        curr = curr->next;
        ind++;
    } while (1);
}

/**
 * @author Lev Knoblock
 * @notice Frees a slice of memory from the memory pool
 * @dev Not really sure how to optimize further.
 * @param void* to slice
 * @return
 */
void lfree(void* a) {
    /* Decrement pointer to get to begining of header */
    mb sb = a - sizeof(memblock);
    sb->free = true;

    if (sb->prev != NULL && sb->prev->free) {
        /* If previous block exists and is free, extend it
         * to wrap over this one and remove pointers to
         * this block header */
        sb->prev->end = sb->end;
        sb->prev->next = sb->next;
        if (sb->next != NULL) sb->next->prev = sb->prev;

        /* Replace block pointer on stack */
        sb = sb->prev;
    }

    if (sb->next != NULL && sb->next->free) {
        /* If next block exists extend current one over
         * it and scrub pointers to it */
        sb->end = sb->next->end;
        sb->next = sb->next->next;
        if (sb->next != NULL) sb->next->prev = sb;
    }

    /* Decrement occupancy */
    sb->bblk->occ--;
}

#endif
\$\endgroup\$
6
  • \$\begingroup\$ (If I should put an explanation for how it is supposed to work, let me know, I wasn't sure) \$\endgroup\$ Commented Jun 26, 2020 at 1:32
  • 5
    \$\begingroup\$ You don't need to apologise for the goto, nor does it bring your question off-topic... but you'll definitely be called out on it during the review. \$\endgroup\$
    – Reinderien
    Commented Jun 26, 2020 at 2:08
  • 2
    \$\begingroup\$ When adding additional information you should edit your question instead of adding a comment. Learn more about comments including when to comment and when not to in the Help Center page about Comments. \$\endgroup\$ Commented Jun 26, 2020 at 17:29
  • 1
    \$\begingroup\$ Please clarify the purpose of the code. What made you write this? What was wrong with the tools already available? \$\endgroup\$
    – Mast
    Commented Jun 27, 2020 at 17:00
  • 1
    \$\begingroup\$ @Mast Mainly, as a learning experience. I wanted to learn about memory allocation, and I thought this would be a fun thing to try instead of copying a design from wikipedia. Most of my code has really no purpose beyond experimenting with something. \$\endgroup\$ Commented Jun 28, 2020 at 0:46

2 Answers 2

5
\$\begingroup\$

Variable linkage

bb first;

looks like it's in a header file. That means every time you include it from a different module, it will be re-declared with its own separate address. That's unlikely to be what you want. Instead, declare it extern, and then define it once in a C file.

Beyond that, though: why declare it in the header at all? It's an implementation detail that you shouldn't expose to your users.

Further, it looks like absolutely everything - including your function bodies - is in the header. Maybe your theory is that inlining everything produces faster code than having a more standard .c/.h layout. If this library is to be included in another project as a .so/.dll there is some non-zero chance that that's the case, but if this library is included in-source with any kind of self-respecting compiler that has whole program optimization, that chance drops to zero. Basically, I would consider this premature optimization and would be surprised if it's worth doing this over having a separated .c that better isolates your design and reduces code re-declaration.

Nested includes

These:

#include <stdbool.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <errno.h>

need to be trimmed down to only what's strictly necessary to declare the symbols in your lalloc.h. errno can definitely be removed; stdbool cannot; and I'm unsure about the others. The trimmed includes would be moved to the .c.

stderr

    printf("%s", sys_errlist[errno]);

should likely be fprintfed to stderr instead. Also, fprintf is not necessary; you can use puts/fputs.

Mystery error codes

    exit(40);

should get a #define.

Yes, goto is actually bad

This:

while (1) {
    tryGetNext:;
    // ...
}

if (frsize < (size + sizeof(memblock))) {
    goto tryGetNext;
}

just demonstrates that your while has not adequately captured what you're actually looping over. An outer loop should include everything everything up to this goto; the existing while should become an inner loop and the goto should go away.

An example is:

size_t frsize;
do {
    while (!sb->free) {
        if (sb->next == NULL) {
            /* Reached end of big block */
            return NULL;
        }
        sb = sb->next;
    }

    /* Remaining space in small block */
    frsize = sb->end - (((void*)sb) + sizeof(memblock));

    /* If there isn't enough space to fit a new small block
     * find another block that will fit one */
} while (frsize >= size + sizeof(memblock));

It's not strictly equivalent because in the original version you skip a free check under certain conditions. I'm not clear on whether this is a problem.

Pointer math

size_t frsize = sb->end - (((void*)sb) + sizeof(memblock));

seems a little awkward. Can you not just:

size_t frsize = (sb->end - sb - 1)*sizeof(memblock);

I'm surprised that the original version - subtracting non-void and void pointers - is even allowed.

Forever-loops

You mix styles:

do { } while (1);

while (1) { }

I don't love either. The clearest to me is usually while (true) { }, which is possible given that you have stdbool.

In the first case it shouldn't actually be a while loop;

unsigned int ind = 0;
do {
    ind++;
} while (1);

I find would be cleaner as

for (unsigned int ind = 0;; ind++)
\$\endgroup\$
9
  • \$\begingroup\$ Thank you very much for the reply! I've replaced the do-while loop with a for loop as per your suggestion, along with trimming the includes and getting rid of printf. As for everything being in the header, for me that's simply a prototyping method. It's easy to separate code once its written, but its faster to just write it in one place and then separate it at the end imho. Also, since the question was looking for any advice for performance optimizations, do you have any comments about that? \$\endgroup\$ Commented Jun 26, 2020 at 2:52
  • 1
    \$\begingroup\$ Unfortunately I do not see any obvious optimizations. Have you profiled? \$\endgroup\$
    – Reinderien
    Commented Jun 26, 2020 at 2:54
  • 1
    \$\begingroup\$ The correct way to spell "forever" in C is for (;;). \$\endgroup\$ Commented Jun 26, 2020 at 9:42
  • 1
    \$\begingroup\$ @CodyGray : it's a matter of perosnal preference, the for has been long considered such because it was use in a very popluar C book, the author themselves said there was no difference and it's a matter of personnal preference. See the answer there : stackoverflow.com/questions/20186809/endless-loop-in-c-c \$\endgroup\$
    – Walfrat
    Commented Jun 26, 2020 at 11:11
  • 1
    \$\begingroup\$ In addition to Reinderen's very excellent input, a doubly-linked list is not personally how I would choose to manage my memory allocations. And the only reason prev is there at all, it looks like, is so that you can patch the linked list back together following a deletion? Feh. Use an array as an allocation table, or something. Or maybe one array per bblk, fixed-size. The table size will limit the amount of fragmentation any one block can handle, but arguably that's a feature rather than a bug. \$\endgroup\$
    – FeRD
    Commented Jun 26, 2020 at 17:09
4
\$\begingroup\$

... looking for help speeding up some code

Functional concern

I see no provisional to insure the allocation meets universal alignment as malloc() does. This is a potential fatal error. Also research max_align_t.

7.22.3 Memory management functions The pointer returned if the allocation succeeds is suitably aligned so that it may be assigned to a pointer to any type of object with a fundamental alignment requirement ...

Even super aligning the size a bit more, like to a multiple of 16 or 32 can result in less fragmentation, effecting faster matches after lfree() for later allocations.


Rest is minor stuff.


Avoid mis-alignment

Certainly a pointer and size_t may have the same size and alignment needs, but what if they do not?

Although a struct * could be narrower on some unicorn platform, the reverse is more likely: the pointer wider and performs better aligned well.

typedef bblock* bb;
struct bblk {
    size_t ind;
    bb next;
    size_t occ;
    char mem[PAGESIZE - (sizeof(size_t) + sizeof(bb) + sizeof(size_t))];
} __attribute__((packed));

In general, put the widest members first and like with like.

typedef bblock* bb;
struct bblk {
    bb next; // First
    size_t ind;
    size_t occ;
    char mem[PAGESIZE - (sizeof(bb) + sizeof(size_t) + sizeof(size_t))];
} __attribute__((packed));

In general this applies to struct smmblk too, but only benefits in rare implementations where struct * narrower than void *.

struct smmblk {
    void* end; // void * certainly widest object point when objects pointer sizes differ.
    mb prev;
    mb next;
    bb bblk;
    bool free;
} __attribute__((packed));

Set aside packed

It is not portable and tends to make data that is memory space efficient at the cost of speed.

free(NULL) is OK yet not lfree(NULL)

Consider adding an internal NULL test to allow users that same latitude as free().


Hiding pointer types

typedef bblock* bb; and later use of bb hides that fact bb is a pointer and makes deciphering the code and ideas for improvements more challenging.

Avoid UB

void * addition is UB (or IDB) and distracts from performance analysis. Consider unsigned char * or char *.

// ((void*)sb) + sizeof(memblock)
((unsigned char*) sb) + sizeof memblock

void* a
// mb sb = a - sizeof(memblock);
// mb sb = (mb) ((unsigned char *)a - sizeof(memblock));
\$\endgroup\$
2
  • \$\begingroup\$ I was planning to implement the alignment for allocation at a later point - agree it is important feature. I've also implemented the check for free(NULL). The hidden pointer types are only until I feel that this code is in a decent enough state to separate into a .c/.h file and be 'done' ish. As for the packed, I was going to get rid of it, but when I did, I profiled again and lost some performance. Ideas? I find that very confusing. \$\endgroup\$ Commented Jun 26, 2020 at 3:19
  • 1
    \$\begingroup\$ @LevKnoblock The performance of allocation is highly dependent on the nature of allocations and free scenarios. A string heavy application vs. a heavy link list versus multi-precision math math can give a inconsistent performance ranking between implementations. "lost some performance" sounds too minor. Look to avoiding worst case scenarios (reduce big O) and strive for general performance. \$\endgroup\$ Commented Jun 26, 2020 at 3:44

Not the answer you're looking for? Browse other questions tagged or ask your own question.