3
\$\begingroup\$

This is a follow-up to: Reading a file into lines with mmap vs getline.

The actual names of the functions are not getline() and mmap_memchr(). The title only contained the short ones for brevity. Each routine starts with readlines_.

The code below has:

  • readlines_getline().
  • readlines_mmap_memchr().
  • readlines_mmap_getline().
  • readlines_fread().

Changes made:

  • Added two more routines, one uses fread() to read in chunks, and the other uses getline() and mmap() together.
  • Reduced copies in readlines_mmap_memchr(). Instead of allocating memory and then copying each line, a pointer to the start of the line is stored and all the newlines are replaced with null bytes.
  • Added posix_fadvise() in readlines_getline().
  • Added missing error checking for getline().
  • Installed a signal handler for SIGSEGV in case of a page fault.
  • Added documentation for all read routines, as @J_H asked.
  • Added cleanup functions for all read routines.

Code:

\\ ' is present in the first line as a kludge because highlight.js does not support digit separators yet. For anyone wishing to fix it, take a look at: c.js.

#define _POSIX_C_SOURCE 2008'09L // '
#define _XOPEN_SOURCE   700

#include <errno.h>
#include <inttypes.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include <fcntl.h>
#include <signal.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <unistd.h>

typedef uint64_t timestamp_t;

typedef struct line {
    char *line;
    size_t size;
} Line;

typedef struct file_buf {
    Line *lines;
    uintmax_t capacity;
    uintmax_t count;
    uintmax_t size;
} FileBuf;

[[gnu::always_inline, gnu::const]] static inline double mcs_to_secs(double mcs)
{
    return mcs / 1'000'000.0;
}

[[nodiscard, gnu::always_inline, gnu::returns_nonnull, gnu::nonnull]] static
inline void *safe_trim(void *p, size_t n)
{
    void *const cp = realloc(p, n);

    return cp ? cp : p;
}

#ifndef BENCHMARKING
void print_lines(const FileBuf fbuf[static 1])
{
    for (size_t i = 0; i < fbuf->count; i++) {
        puts(fbuf->lines[i].line);
    }
}

[[gnu::always_inline, gnu::pure]] static inline size_t get_total_lines(
        const FileBuf fbuf[static 1])
{
    return fbuf->count;
}

[[gnu::always_inline, gnu::pure]] static inline size_t get_total_bytes(
        const FileBuf fbuf[static 1])
{
    return fbuf->size;
}
#endif

void free_lines(FileBuf fbuf[static 1])
{
    for (size_t i = 0; i < fbuf->count; ++i) {
        free(fbuf->lines[i].line);
    }

    free(fbuf->lines);
}

[[nodiscard, gnu::always_inline]] static inline bool resize_fbuf(
        FileBuf fbuf[static 1])
{
    if (fbuf->capacity + BUFSIZ < fbuf->capacity) {
        return false;
    }

    const size_t new_capacity = fbuf->capacity + BUFSIZ;
    void *const tmp = realloc(fbuf->lines, new_capacity * sizeof fbuf->lines[0]);

    if (tmp != nullptr) {
        fbuf->lines = tmp;
        fbuf->capacity = new_capacity;
        return true;
    }

    return false;
}

bool append_line(FileBuf fbuf[static 1], 
                 size_t size,
                 char line[static size])
{
    fbuf->lines[fbuf->count].line = line;
    fbuf->lines[fbuf->count].line[size] = '\0';
    fbuf->lines[fbuf->count].size = size + 1;
    return fbuf->count + 1 < fbuf->count ? false : ++fbuf->count;
}

bool alloc_and_append_line(FileBuf fbuf[static 1], 
                           size_t size,
                           char line[static size])
{
    char *const tmp = malloc(size + 1);

    if (tmp == nullptr) {
        return false;
    }

    fbuf->lines[fbuf->count].line = tmp;
    memcpy(fbuf->lines[fbuf->count].line, line, size);
    fbuf->lines[fbuf->count].line[size] = '\0';
    fbuf->lines[fbuf->count].size = size + 1;
    return fbuf->count + 1 < fbuf->count ? false : ++fbuf->count;
}

void mmap_memchr_cleanup(const FileBuf fbuf[static 1])
{
    munmap(fbuf->lines[0].line, fbuf->size);
    free(fbuf->lines);
}

/* `stream` should be valid, i.e. associated with a file, and `fbuf` should 
 * point to a valid object of type `FileBuf`.
 *
 * On success, returns true. `mmap_memchr_cleanup()` should be called by the 
 * calling code at some point to clean up all resources associated with this 
 * file.
 *
 * On failure, all resources are released (`stream` is not touched) and `false`
 * is returned. */
[[nodiscard, gnu::nonnull]] bool getlines_mmap_memchr(FILE *restrict stream, 
                                                      FileBuf fbuf[restrict static 1])
{
    const int fd = fileno(stream);
    struct stat st;

    if (fstat(fd, &st) == -1) {
        close(fd);
        return false;
    }

    fbuf->size = (size_t) st.st_size;

    char *const map =
        mmap(nullptr, (size_t) st.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
    close(fd);

    if (map == MAP_FAILED) {
        return false;
    }

    posix_madvise(map, (size_t) st.st_size, POSIX_MADV_SEQUENTIAL | POSIX_MADV_WILLNEED);

    char *lhs;
    char *rhs;

    for (lhs = map; lhs < &map[st.st_size]; lhs = rhs + 1) {
        rhs = memchr(lhs, '\n', (size_t) (&map[st.st_size] - lhs));

        if (rhs == nullptr) {
            /* We have reached end-of-file or the file is malformed. */
            break;
        }

        if (fbuf->capacity <= fbuf->count) {
            if (!resize_fbuf(fbuf)) {
                goto cleanup_and_fail;
            }
        }

        if (!append_line(fbuf, (size_t) (rhs - lhs), lhs)) {
            goto cleanup_and_fail; 
        }
    }

    /* Trim to maximum used. */
    fbuf->lines = safe_trim(fbuf->lines, fbuf->count * sizeof fbuf->lines[0]);

    return true;

  cleanup_and_fail:
    munmap(map, fbuf->size);
    free(fbuf->lines);
    return false;
}

/* `stream` should be valid, i.e. associated with a file, and `fbuf` should 
 * point to a valid object of type `FileBuf`.
 *
 * On success, returns true. The calling code is required to call `free_lines()`
 * at some point in the code to release the memory associated with the lines.
 *
 * On failure, all resources are released (`stream` is not touched) and `false`
 * is returned. */
[[nodiscard, gnu::nonnull]] bool getlines_getline(FILE *restrict stream, 
                                                  FileBuf fbuf[restrict static 1])
{
    posix_fadvise(fileno(stream), 0, 0, POSIX_FADV_SEQUENTIAL | POSIX_FADV_WILLNEED);

    char *line = nullptr;
    size_t capacity = 0;
    ssize_t nread = 0;

    for (;;) {
        nread = getline(&line, &capacity, stream);

        if (nread == -1) {
            if (feof(stream)) {
                /* Input ended due to end-of-file. */
                break;
            }

            goto cleanup_and_fail;
        }

        fbuf->size += (size_t) nread;

        if (nread > 0 && line[nread - 1] == '\n') {
            line[--nread] = '\0';
        }

        if (fbuf->capacity <= fbuf->count) {
            if (!resize_fbuf(fbuf)) {
                goto cleanup_and_fail;
            }
        }

        /* Let getline() allocate another buffer. This saves us from making
         * a copy as well as an allocation. */
        if (!append_line(fbuf, (size_t) nread, line)) {
            goto cleanup_and_fail;
        }

        line = nullptr;
        capacity = 0;
    }

    free(line);

    /* Trim to maximum used, */
    fbuf->lines = safe_trim(fbuf->lines, fbuf->count * sizeof fbuf->lines[0]);
    return true;

  cleanup_and_fail:
    free(line);
    free_lines(fbuf);
    return false;
}

/* `stream` should be valid, i.e. associated with a file, and `fbuf` should 
 * point to a valid object of type `FileBuf`.
 *
 * On success, returns true. The calling code is required to call `free_lines()`
 * at some point in the code to release the memory associated with the lines.
 *
 * On failure, all resources are released (`stream` is not touched) and `false`
 * is returned. */
[[nodiscard, gnu::nonnull]] bool getlines_mmap_getline(FILE *restrict stream, 
                                                       FileBuf fbuf[restrict static 1])
{
    const int fd = fileno(stream);
    struct stat st;

    if (fstat(fd, &st) == -1) {
        close(fd);
        return false;
    }

    char *const map =
        mmap(nullptr, (size_t) st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);

    if (map == MAP_FAILED) {
        return false;
    }

    posix_madvise(map, (size_t) st.st_size, POSIX_MADV_SEQUENTIAL | POSIX_MADV_WILLNEED);
    
    FILE *const memstream = fmemopen(map, (size_t) st.st_size, "r");

    if (memstream == nullptr) {
        munmap(map, (size_t) st.st_size);
        return false;
    }

    const bool result = getlines_getline(memstream, fbuf);

    if (!result) {
        munmap(map, (size_t) st.st_size);
        fclose(memstream);
        return false;
    }

    /* Ignore errors on read-only file. */
    munmap(map, (size_t) st.st_size);

    fclose(memstream);

    /* Trim to maximum used. */
    fbuf->lines = safe_trim(fbuf->lines, fbuf->count * sizeof fbuf->lines[0]);

    return true;
}

/* Reads the next chunk of data from the stream referenced to by `stream`.
 * `chunk` must be a pointer to an array of size `size`. 
 *
 * Returns the number of bytes read on success, or zero elsewise. The chunk is 
 * not null-terminated.
 *
 * `read_next_chunk()` does not distinguish between end-of-file and error; the
 * routines `feof()` and `ferror()` must be used to determine which occured. */
[[nodiscard, gnu::nonnull]] size_t read_next_chunk(FILE *restrict stream, 
                                                   size_t size,
                                                   char chunk[restrict static size]) 
{
    const size_t rcount = fread(chunk, 1, size, stream);

    if (rcount < size) {
        if (!feof(stream)) {
            /* A read error occured. */
            return 0;
        }

        if (rcount == 0) {
            return 0;
        }
    }
    
    return rcount;
}

/* `stream` should be valid, i.e. associated with a file, and `fbuf` should 
 * point to a valid object of type `FileBuf`.
 *
 * On success, returns true. The calling code is required to call `free_lines()`
 * at some point in the code to release the memory associated with the lines.
 *
 * On failure, all resources are released (`stream` is not touched) and `false`
 * is returned. */
[[nodiscard, gnu::nonnull]] bool getlines_fread(FILE *restrict stream, 
                                                FileBuf fbuf[restrict static 1])
{
#define MIN_CHUNK_SIZE 65536u

#if   SIZE_MAX >= (MIN_CHUNK_SIZE * 4 - 1)
    #define CHUNK_SIZE (MIN_CHUNK_SIZE * 4 - 1)
#elif SIZE_MAX >= (MIN_CHUNK_SIZE * 2 - 1)
    #define CHUNK_SIZE (MIN_CHUNK_SIZE * 2 - 1)
#elif SIZE_MAX >= (MIN_CHUNK_SIZE  - 1)         /* Required by ISO C. */
    #define CHUNK_SIZE (MIN_CHUNK_SIZE - 1)
#else
    #error "The compiler is non-conforming to ISO C."
#endif
    posix_fadvise(fileno(stream), 0, 0, POSIX_FADV_SEQUENTIAL | POSIX_FADV_WILLNEED);

    char chunk[CHUNK_SIZE];
    size_t nread; 
    size_t remain = 0;
    size_t line_size = 0;
    char *lhs;
    char *rhs;

    while (nread = read_next_chunk(stream, CHUNK_SIZE - remain, chunk + remain)) {
        fbuf->size += nread;
        nread += remain;    /* Include remaining bytes from the previous read. */
        remain = 0;         /* Reset remain for the current iteration. */


        for (lhs = chunk; lhs < &chunk[nread]; lhs = rhs + 1) {
            rhs = memchr(lhs, '\n', (size_t) (&chunk[nread] - lhs));

            if (rhs == nullptr) {
                /* We did not find a newline. We shalt consider this an
                 * incomplete line and copy the bytes after the last '\n'
                 * to the beginning of the chunk. */
                remain = (size_t) (&chunk[nread] - lhs);
                memmove(chunk, chunk + (lhs - chunk), remain);
                break;
            }

            line_size = (size_t) (rhs - lhs);

            if (fbuf->capacity <= fbuf->count) {
                if (!resize_fbuf(fbuf)) {
                    goto cleanup_and_fail;
                }
            }

            if (!alloc_and_append_line(fbuf, line_size, lhs)) {
                goto cleanup_and_fail; 
            }
        }
    }
    
    if (remain) {
        if (fbuf->capacity <= fbuf->count) {
            if (!resize_fbuf(fbuf)) {
                goto cleanup_and_fail;
            }
        }

        if (!alloc_and_append_line(fbuf, remain, chunk)) {
            goto cleanup_and_fail; 
        }
    }

    /* Trim to maximum used. */
    fbuf->lines = safe_trim(fbuf->lines, fbuf->count * sizeof fbuf->lines[0]);
    return feof(stream);

  cleanup_and_fail:
    free_lines(fbuf);
    return false;
}

[[gnu::always_inline]] static inline timestamp_t get_posix_clock_time_fallback(void)
{
    struct timeval tv;
    return gettimeofday (&tv, NULL) == 0 ? 
        (timestamp_t) (tv.tv_sec * 1'000'000 + tv.tv_usec) : 0;
}

/* Reference: https://stackoverflow.com/a/37920181/20017547 */
[[gnu::always_inline]] static inline timestamp_t get_posix_clock_time(void)
{
#ifdef _POSIX_MONOTONIC_CLOCK
    struct timespec ts;

    if (clock_gettime (CLOCK_MONOTONIC, &ts) == 0) {
        return (timestamp_t) (ts.tv_sec * 1'000'000 + ts.tv_nsec / 1'000);
    } 

    return get_posix_clock_time_fallback();
#else
    return get_posix_clock_time_fallback();
#endif /* _POSIX_MONOTONIC_CLOCK */
}

[[gnu::always_inline, gnu::const]] static inline timestamp_t get_clock_difftime(
        timestamp_t t0, 
        timestamp_t t1)
{
    return t1 - t0;
}

[[gnu::nonnull, gnu::always_inline]] static inline void usage(
        FILE *restrict stream, 
        const char argv0[restrict static 1])
{
    fprintf(stream, "Usage: %s OPTIONS FILE.\n"
                    "OPTION: --fread, --mmap_getline, --mmap_memchr, --getline.\n",
                    argv0);
}

void sigsegv_handler(int signum)
{
    static constexpr char err[] = "Error: SIGSEGV raised by a page fault.\n";
    
    /* Casting to (void) alone still raises a warning. This is a hack used to
     * quieten it. */
    (void) !write(STDOUT_FILENO, err, sizeof err - 1);
    signal(signum, SIG_DFL);
    raise(signum);
}

int main(int argc, char *argv[])
{
    if (argc != 3) {
        fprintf(stderr, "error: expected 3 arguments.\n");
        usage(stderr, argv[0]);
        return EXIT_FAILURE;
    }

    FileBuf fbuf = {};
    bool (*fn)(FILE *, FileBuf *) = {};

    if (strcmp(argv[1], "--getline") == 0) {
        fn = getlines_getline;
    } else if (strcmp(argv[1], "--mmap_getline") == 0) {
        fn = getlines_mmap_getline;
    } else if (strcmp(argv[1], "--mmap_memchr") == 0) {
        fn = getlines_mmap_memchr;
    } else if (strcmp(argv[1], "--fread") == 0) {
        fn = getlines_fread;
    } else {
        fprintf(stderr, "Unknown option: \"%s\".\n", argv[1]);
        usage(stderr, argv[0]);
        return EXIT_FAILURE;
    }

    sigaction(SIGSEGV, &(struct sigaction) {sigsegv_handler}, nullptr);

    FILE *const stream = fopen(argv[2], "rb");

    if (stream == nullptr) {
        perror(argv[2]);
        return EXIT_FAILURE;
    }
    
    const timestamp_t t0 = get_posix_clock_time();

    if (!fn(stream, &fbuf)) {
        fprintf(stderr, "error: failed to read file: %s.\n",
            strerror(errno));
        fclose(stream);
        return EXIT_FAILURE;
    }

    const timestamp_t msecs = get_clock_difftime(t0, get_posix_clock_time());
    
#ifndef BENCHMARKING 
    print_lines(&fbuf);
    fprintf(stderr, "Read %ju lines, %ju bytes in: %f secs.\n", 
                    get_total_lines(&fbuf),
                    get_total_bytes(&fbuf),
                    mcs_to_secs((double) msecs));
#else
    printf("%fs", mcs_to_secs((double) msecs));
#endif
    fn == getlines_mmap_memchr ? mmap_memchr_cleanup(&fbuf) : free_lines(&fbuf);
    fclose(stream);
    return EXIT_SUCCESS;
}

Benchmarks:

Same data-set.

F_01: L:65748 W:65000 112M
    1.323386s getline
    1.062281s mmap_getline
    1.359288s mmap_memchr
    1.223515s fread

F_02: L:927277 W:917225 1.1G
    10.295918s getline
    10.152524s mmap_getline
    10.703103s mmap_memchr
    9.441809s fread

F_03: L:994391 W:983497 1.7G
    15.532013s getline
    16.348079s mmap_getline
    16.402985s mmap_memchr
    15.530191s fread

F_04: L:144137 W:142530 116M
    1.174413s getline
    1.088987s mmap_getline
    1.229583s mmap_memchr
    1.039069s fread

F_05: L:212197 W:209866 390M
    3.775912s getline
    3.504295s mmap_getline
    3.705634s mmap_memchr
    3.168295s fread

F_06: L:232727 W:230108 357M
    3.563904s getline
    3.199386s mmap_getline
    3.540193s mmap_memchr
    2.906388s fread

F_07: L:89372 W:88356 65M
    0.650826s getline
    0.716422s mmap_getline
    0.652412s mmap_memchr
    0.558456s fread

F_08: L:569562 W:563291 550M
    5.172587s getline
    4.786773s mmap_getline
    5.275632s mmap_memchr
    4.394102s fread

F_09: L:453558 W:448606 675M
    6.748524s getline
    5.898616s mmap_getline
    6.250793s mmap_memchr
    5.098025s fread

F_10: L:138394 W:136879 243M
    2.375930s getline
    2.347826s mmap_getline
    2.392065s mmap_memchr
    2.041770s fread

F_11: L:290150 W:285186 22M
    0.332532s getline
    0.305136s mmap_getline
    0.240075s mmap_memchr
    0.256278s fread

F_12: L:318646 W:315055 527M
    4.764323s getline
    4.322212s mmap_getline
    4.791663s mmap_memchr
    4.178332s fread

F_13: L:576988 W:570725 1.1G
    10.798792s getline
    9.531844s mmap_getline
    8.840263s mmap_memchr
    9.837410s fread

F_14: L:783682 W:774984 855M
    7.521734s getline
    6.869232s mmap_getline
    7.205962s mmap_memchr
    7.217902s fread

F_15: L:814488 W:805405 806M
    7.151254s getline
    6.840751s mmap_getline
    6.978558s mmap_memchr
    6.740878s fread

F_16: L:249697 W:247063 423M
    3.899772s getline
    3.524430s mmap_getline
    3.844033s mmap_memchr
    3.334856s fread

F_17: L:586037 W:579086 236M
    2.499446s getline
    2.265058s mmap_getline
    2.266540s mmap_memchr
    2.048737s fread

F_18: L:407917 W:403336 329M
    3.203412s getline
    2.856143s mmap_getline
    3.083320s mmap_memchr
    2.693788s fread

F_19: L:592859 W:585774 222M
    2.398518s getline
    2.101303s mmap_getline
    2.161576s mmap_memchr
    2.010903s fread

F_20: L:233854 W:231300 292M
    2.832616s getline
    2.497263s mmap_getline
    2.726214s mmap_memchr
    2.588875s fread

Valgrind reports nothing awry. AddressSanitizer and StaticAnalyzer also did not report anything. Code was compiled with optimizations and warnings enabled. The compiler did not emit any diagnostic warning either.

Review Request:

  • Have I introduced any bugs in the code?

  • How do you test a read routine? With another read routine? How do you test the testing read routine? How do you test the first read routine?...

  • Do you see any signed/unsigned integer overflow possibilities?

  • Anything, everything.

\$\endgroup\$
2
  • \$\begingroup\$ Note about mmap(): On Linux, SIGBUS is generated on an attempted access to a portion of the buffer that does not correspond to the file (for example, beyond the end of the file, including the case where another process has truncated the file). POSIX specification of mmap() does not require that the signal is delivered on error but leaves such possibility for implementations (out-of-space condition is provided as an example for such a signal being generated). The man page for OpenBSD states that it generates a SIGSEGV signal instead of SIGBUS on some architectures. \$\endgroup\$
    – Harith
    Commented May 24 at 22:34
  • \$\begingroup\$ [..continued] - FreeBSD and NetBSD guarantee a SIGBUS signal. Oracle Solaris's man page specifies that either of these two signals can be generated. Code using mmap() might like to take this into consideration. \$\endgroup\$
    – Harith
    Commented May 24 at 22:34

2 Answers 2

2
\$\begingroup\$

size_t vs uintmax_t

You use both size_t and uintmax_t in your code. size_t should be used for everything related to sizes, counts and indices of things that are stored in memory. However, files can be even larger than all the addressable memory, so using uintmax_t to keep of the size of a file is correct. However, count and capacity refer to the size of the array of Lines, which is in memory, so there you should use size_t.

Use regular pointers instead of [static 1]

I don't understand why you pass the file buffer using const FileBuf fbuf[static 1], instead of the more idiomatic const FileBuf *fbuf. Using array notation makes it seem like sometimes there are arrays of more than one FileBuf.

A similar thing applies to argv0. We normally use const char* for pointers to strings.

And even in the cases where using array notation is valid, like in char line[static size] in append_line(), it is quite weird to see the code accessing line[size], at that looks like it is out of bounds (even if [static size] means the array's length will be at least size).

Unnecessary use of restrict

Strict aliasing (which is the default rule) means that pointers to different types cannot alias (except for char* which can alias anything). The restrict keyword is there to say that pointers to the same type are not aliasing. However, you never have the case that you pass two pointers to the same type to your functions, and only a few places where there is a pointer to char, so you almost never have to use restrict. I would remove it unless really necessary; it is just noise and makes it look like something special is going on when there is not.

Resizing buffers

When you have a dynamically growing buffer, it's generally preferred to double their size each time they reach their limit, instead of adding the same amount each time. This ensures the amortized complexity of allocating memory stays \$O(1)\$. If you don't do this, the complexity is \$O(N)\$, where \$N\$ is the number of lines, so your program will start to slow down the more lines a file has.

You might not see a performance impact if the only memory allocation you do is in resize_fbuf, as then realloc() probably doesn't have to move the contents of the allocation to another place in memory. However, in a more complex program it can become an issue.

There is also a related issue:

Check for integer overflow

When resizing a buffer to a new size, or any other time you are adding or multiplying things together that depend on external input, you must check that those calculations won't overflow.

It is currently very unlikely there is any filesystem where the size of a file is larger than uintmax_t, so you could decide to skip overflow checks there.

Note that size_t is only 32 bits on 32-bit systems, but these systems can often handle files with 64-bit sizes. So casting st.st_size to size_t could result in reading less than the full file.

Possible munmap() with wrong size?

This one is unlikely to happen, but in getlines_mmap_getline() you cal mmap() with (size_t) st.st_size, but you never store that size. Instead, you rely on getlines_getline(memstream, fbuf) to calculate and fill in fbuf->size. But if there is any bug or corner case you didn't think of, and the resulting fbuf->size is not actually the same as st.st_size, then munmap() will be called with the wrong size. So I would recommend you store the value of st.st_size and use that exact same value to munmap().

Inconsistent use of attributes

You are already using [[nodiscard]] in some places, but it looks like append_line() and alloc_and_append_line() would also benefit from it.

The signal handler is not useful

You install a signal handler for SIGSEGV, but the only thing it does is print an error message (which should go to STDERR_FILENO rather than STDOUT_FILENO) that a segmentation fault occurred. But consider that if your program crashes from a segmentation fault and you didn't have a signal handler for it, a similar error message would be printed by the shell that started it anyway. So this signal handler doesn't add anything useful. What would be useful is if the signal handler could know that the segmentation fault happened during a read from the memory mapped file, and perhaps even from which part. You could set a volatile variable right before trying to read, and clear it afterwards, which the signal handler can then read and perhaps do something useful with. This will have a performance penalty though.

Testing a read routine

You can test a read routine by having a file with known contents, and then compare the output of the routine against the expected output given those contents.

Testing a test

You don't, otherwise you also have to consider testing the test of the test, and so on until infinity. But it is perhaps a good thought excercise, especially if you consider testing the test routines for negatives; this basically is the same as thinking about possible bugs in the read routines. And once you have come up with those, just make sure the regular tests are good enough to catch those bugs.

\$\endgroup\$
14
  • 1
    \$\begingroup\$ I am using restrict when I have a char * and a pointer of some other type. From what I know, char * is an exception to the aliasing rule. Is my understanding incorrect in that a char * is allowed to alias any other pointer type? \$\endgroup\$
    – Harith
    Commented May 24 at 19:07
  • 1
    \$\begingroup\$ "Things are even more weird when you use if for argv0. You are actually writing there that argv0 is an array of exactly one char. So now usage() is technically always reading out of bounds. Just don't do it, and pass it as const char *argv0." ==> static 1 means that the pointer should point to at least 1 item of type char. For instance, if I were to pass nullptr to usage(), the compiler is required to raise diagnostic information. (Or at least last I checked.) \$\endgroup\$
    – Harith
    Commented May 24 at 19:19
  • 1
    \$\begingroup\$ The uintmax_t type is functionally obsolete in C23, which allows there to be larger integral types than it. The reasoning was that, once ABIs fixed the size of uintmax_t, it could not ever change, and this was preventing compilers from adding uint128_t. \$\endgroup\$
    – Davislor
    Commented May 24 at 22:05
  • 1
    \$\begingroup\$ @Davislor About uintmax_t, what standard type do you suggest I use for file sizes in C23 and above? \$\endgroup\$
    – Harith
    Commented May 24 at 22:14
  • 2
    \$\begingroup\$ @Harith I agree with G Sliepen: use size_t for sizes and array indices. That’s guaranteed to be able to index any object (or its bytes). If you ever need to store the bits of an object pointer in an integral type, use (uintptr_t)(void*)p. \$\endgroup\$
    – Davislor
    Commented May 25 at 3:34
3
\$\begingroup\$

Potential overflow

tv.tv_sec * 1'000'000 can readily overflow.

Rather than

(timestamp_t) (tv.tv_sec * 1'000'000 + tv.tv_usec)

consider

(timestamp_t) tv.tv_sec * 1'000'000 + tv.tv_usec
// or
(timestamp_t) 1'000'000 * tv.tv_sec + tv.tv_usec
\$\endgroup\$

Not the answer you're looking for? Browse other questions tagged or ask your own question.