14
\$\begingroup\$

I am looking for ways to improve my password generation code written in C. This is in the same vein as my code in C++ and Python with many improvements.

I am using getopt.h for command line parsing and libsodium.h for cryptographically secure number generation.

I have written it in the GNU style (although not strictly). This code can be viewed on GitHub.

/* pgen -- generate cryptographically secure random strings */

#include <error.h>
#include <getopt.h>
#include <inttypes.h>
#include <sodium.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define PROGRAM_NAME "pgen"

#define _warn(...) do {                 \
        if(!quiet)                      \
            error(0, 0, __VA_ARGS__);   \
    } while(0)

#define STRTOUMAX_BASE 10

#define LOWER "abcdefghijklmnopqrstuvwxyz"
#define LOWER_L 26

#define NUMERIC "0123456789"
#define NUMERIC_L 10

#define SPECIAL "!@#$%^&*()-_=+`~[]{}\\|;:'\",.<>/?"
#define SPECIAL_L 32

#define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
#define UPPER_L 26

/* Disable warnings */
static bool quiet;

static struct option const long_opts[] = {
    {"help", no_argument, NULL, 'h'},
    {"lower", no_argument, NULL, 'L'},
    {"numeric", no_argument, NULL, 'N'},
    {"quiet", no_argument, NULL, 'q'},
    {"special", no_argument, NULL, 'S'},
    {"upper", no_argument, NULL, 'U'},
    {NULL, 0, NULL, 0}
};

_Noreturn void usage(int const status)
{
    if(status != EXIT_SUCCESS) {
        fprintf(stderr, "Usage: %s [OPTION]...\n", PROGRAM_NAME);
        fprintf(stderr, "Try '%s --help' for more information.\n",
                PROGRAM_NAME);
    } else {
        printf("Usage: %s [LEN] [OPTION]...\n", PROGRAM_NAME);
        puts("Generate cryptographically secure strings.");
        printf("Example: %s 16 -LNSU\n", PROGRAM_NAME);
        puts("\nOptions:\n\
  -h, --help       display this help text and exit\n\
  -L, --lower      generate lowercase letters\n\
  -N, --numeric    generate numeric digits\n\
  -q, --quiet      disable warnings\n\
  -S, --special    generate special characters\n\
  -U, --upper      generate uppercase letters");
    }

    exit(status);
}

int main(int const argc, char *const *const argv)
{
    char const *const key = (argc > 1) ? argv[1] : NULL;

    if(key != NULL
       && (strcmp(key, "-h") == 0 || strcmp(key, "--help") == 0)) {
        usage(EXIT_SUCCESS);
    }

    if(key == NULL)
        error(EXIT_FAILURE, 0, "first argument must be length");

    /* First argument is positional */
    optarg++;

    quiet = false;

    bool lower_b = false;
    bool upper_b = false;
    bool numeric_b = false;
    bool special_b = false;

    uintmax_t const length = strtoumax(key, NULL, STRTOUMAX_BASE);

    uint32_t chars_l = 0;

    int c;

    while((c = getopt_long(argc, argv, "hLNqSU", long_opts, NULL)) != -1) {
        switch(c) {
            case 'h':
                usage(EXIT_SUCCESS);
                break;
            case 'L':
                if(!lower_b) {
                    chars_l += LOWER_L;
                    lower_b = true;
                }
                break;
            case 'N':
                if(!numeric_b) {
                    chars_l += NUMERIC_L;
                    numeric_b = true;
                }
                break;
            case 'q':
                quiet = true;
                break;
            case 'S':
                if(!special_b) {
                    chars_l += SPECIAL_L;
                    special_b = true;
                }
                break;
            case 'U':
                if(!upper_b) {
                    chars_l += UPPER_L;
                    upper_b = true;
                }
                break;
            default:
                usage(EXIT_FAILURE);
        }
    }

    if(length == 0) {
        _warn("length is zero");
        exit(EXIT_SUCCESS);
    }

    if(lower_b + numeric_b + special_b + upper_b == 0) {
        _warn("no character sets selected");
        exit(EXIT_SUCCESS);
    }

    if(key[0] == '-' && !quiet) {
        _warn("it appears you entered a negative value, "
              "this value will be evaluated in an unsigned context");
        sleep(2);
    }

    char *const chars = malloc(chars_l + 1);

    if(chars == NULL)
        error(EXIT_FAILURE, 0, "failed to allocate memory for string");

    chars[0] = '\0';

    if(lower_b)
        strncat(chars, LOWER, LOWER_L);

    if(numeric_b)
        strncat(chars, NUMERIC, NUMERIC_L);

    if(special_b)
        strncat(chars, SPECIAL, SPECIAL_L);

    if(upper_b)
        strncat(chars, UPPER, UPPER_L);

    for(size_t i = 0; i < length; ++i)
        putchar(chars[randombytes_uniform(chars_l)]);

    free(chars);

    putchar('\n');

    return EXIT_SUCCESS;
}

Help text:

$ ./pgen.out --help
Usage: pgen [LEN] [OPTION]...
Generate cryptographically secure strings.
Example: pgen 16 -LNSU

Options:
  -h, --help       display this help text and exit
  -L, --lower      generate lowercase letters
  -N, --numeric    generate numeric digits
  -q, --quiet      disable warnings
  -S, --special    generate special characters
  -U, --upper      generate uppercase letters

I am particularly interested in performance and security. Obviously this code has not gone through a security audit, but if I am doing something horribly wrong with the security of the program, let me know.

Likewise, how can I make this code more readable?


Update: If you are interested, I have updated the GitHub code with my changes from the suggestions in this post.

\$\endgroup\$
3
  • 21
    \$\begingroup\$ 170 lines to give help and parse arguments, and 2 lines to do the real work. Yup, you've not merely followed, but taken the lead in the GNU style! \$\endgroup\$ Commented May 10, 2018 at 3:37
  • 1
    \$\begingroup\$ Can you describe the threat model you've assumed? One aspect in particular: is this for personal, offline use, or is it intended for embedded systems where an attacker can observe power consumption, or multi-user systems where the process is visible to others? \$\endgroup\$ Commented May 10, 2018 at 8:42
  • \$\begingroup\$ @TobySpeight It was made for personal, offline use. I am not too concerned about portability in this case. If you can suggest ways to improve security when the attacker can observe power consumption, I am all ears, because coding around that is entirely new to me. \$\endgroup\$
    – esote
    Commented May 10, 2018 at 13:52

5 Answers 5

15
\$\begingroup\$

Overall, this is pretty well written. It's very easy to read and understand. I especially like that you annotated the usage() function with _Noreturn. That really helps readability in my opinion.

Constants

In general, I'd avoid using #define for defining constants. C supports const and it allows you to give types to your constants so the compiler can check them and give appropriate errors or warnings on their use.

Static Inline Functions

Likewise, I would avoid making _warn() a macro. While you've done it fairly safely, it would be better as a static inline function. That way it can be stepped into during debugging, for example.

Avoid Globals

You've made quiet a global variable, though it's not obvious to me why. It could easily be a local variable in main() and it would avoid the problems that global variables have, such as it being hard to figure out where they change.

Break Your Code Into Functions

In my opinion, your main() function is too long and doing too many things. I would break it into 2 functions - parse_arguments() and generate_password(). This would make it even easier to read, and would clarify what those parts of the program are doing. It allows a future reader to avoid going through code unrelated to what they're looking for. For example, if there's an error in generating the passwords, it's probably unrelated to the argument parsing, so as a first pass, someone debugging this could skip that code more easily.

Multiline String Constants

In usage() you have a multiline string constant. It's written in a messy way. I recommend using the string concatenation offered by the language:

puts("\nOptions:\n"
    "   -h, --help       display this help text and exit\n"
    "   -L, --lower      generate lowercase letters\n"
    "   -N, --numeric    generate numeric digits\n"
    "   -q, --quiet      disable warnings\n"
    "   -S, --special    generate special characters\n"
    "   -U, --upper      generate uppercase letters");

Why Check For -h Twice?

I notice in your argument parsing that you have a check for -h or --help at the beginning of main(), then again in the switch statement below that. I would remove the first one and just let the loop handle it. You might want to change the error message on the if (key == NULL) to note that you can use -h to get help if you do that.

Performance

You specifically asked about performance. I would say that it's irrelevant in this case. This code isn't generating millions of passwords at a time. Even if it were, it's going to have to randomly pick symbols out of the array of allowed symbols, meaning it's going to be O(n) where n is the length of the password being generated. So I don't think there's much to do in terms of performance.

Usability

I'm not a security expert, so I'm probably not qualified to comment much on that. But one thing I do notice is that you aren't guaranteeing the user that the password won't have repeating numbers or letters. It's common for password verifiers on websites to disallow any more than 2 consecutive instances of the same character. It might be worth checking to make sure you don't have 3 or more in a row if that's a concern for the types of websites you use.

\$\endgroup\$
13
  • 7
    \$\begingroup\$ Regarding security, that’s a very short-sighted rule. There are words that have three identical consecutive letters in some languages (e.g. German, but even English!), they shouldn’t be forbidden in passwords. \$\endgroup\$ Commented May 10, 2018 at 9:08
  • 1
    \$\begingroup\$ Moreover, regarding the last paragraph, I think that the rule is decreasing the overall security. I mean, if the password is numeric and with 2 digits only, the attacker has to test 100 different values; if you enforce no consecutive instances the different passwords become 90, so you have decreased the number of possible passwords (and consequently the brute-force attack time) by 10% \$\endgroup\$
    – frarugi87
    Commented May 10, 2018 at 15:43
  • 1
    \$\begingroup\$ Regarding the remark about security - my main concern in saying that is about what password verifiers require. You are all correct that it's less secure, but when I have to choose a password for my bank, or whatever, they require me not to use more than 2 consecutive repeating characters, so I don't have a choice. If my password generator keeps outputting those, I'm going to stop using it. That's the point I was making. \$\endgroup\$ Commented May 10, 2018 at 16:04
  • 2
    \$\begingroup\$ Great answer, +1 from me, but you really should edit your last paragraph. Your suggestion is misleading at best. Since you said yourself you don't know very much about security, just leave it at that. Suggest that a new question is asked, perhaps, on Security SE? Surely it is a huge topic by itself... \$\endgroup\$
    – Pedro A
    Commented May 10, 2018 at 23:04
  • 3
    \$\begingroup\$ This is a good review except for the “Security” paragraph which is dangerously wrong. Excluding repeated characters would decrease the security (both by restricting the output space and by adding complexity) for no good reason. Generating a password that complies with stupid restrictions enforced by some applications would be a different exercise, and you'd need many more options and rules (such as ensuring that there's at least one character from each category, a very popular rule that restricts the password space). \$\endgroup\$ Commented May 11, 2018 at 0:31
7
\$\begingroup\$

You should always use braces around if statements, making it easier to add statements later if necessary. Failure to do this caused Apple's goto fail security vulnerability. apparently not (thanks Konrad Rudolph)

You should also add an --all command option that selects all available character sets. Since this is to be used for password generation there should be a warning printed if the resulting password is too small. There is a guide here on password entropy. However, for cryptographic keys that do not need to be memorized, 128 bits is usually seen as a minimum bound instead of "overkill", with 80 bit keys having been phased out by NIST as too small.

Runnning pgen -q currently prints nothing, but it should still print the warning about not having a length. You may also want to exit with a failure exit code when missing a critical argument, instead of exiting with the same exit code after printing a warning.

You have also repeated *const twice in int main(int const argc, char *const *const argv) instead of writing it more conventionally as char **const argv.

When printing the program name you may want to use argv[0] instead of assuming that the binary will always be called pgen.

\$\endgroup\$
2
  • 4
    \$\begingroup\$ “Failure to do this caused Apple's goto fail security vulnerability.” — This is an often-repeated myth but it’s categorically false. The bug was caused by a faulty version control merge, and using braces would not necessarily have prevented it. \$\endgroup\$ Commented May 10, 2018 at 9:06
  • 2
    \$\begingroup\$ *const *const is different than **const. The first is a const pointer to a const pointer to a char, while the second is a const pointer to a non-const pointer to a char. I like to make it clear when I treat things as constant, even when the compiler often optimizes it for me. Tools like cdecl.org are helpful with this. \$\endgroup\$
    – esote
    Commented May 10, 2018 at 14:25
6
\$\begingroup\$

The help message doesn't indicate what's meant by "special" characters. I'd call them something like "punctuation" instead.


I'm not convinced this is what the user wants:

if(lower_b + numeric_b + special_b + upper_b == 0) {
    _warn("no character sets selected");
    exit(EXIT_SUCCESS);
}

At this point, we know a non-zero length is required, but we output nothing and return a success status. That could be misleading, as the calling program might assume the success status means we have given it a strong password. We should either exit(EXIT_FAILURE), or (more usefully) default to including all character types.


There's really no need to allocate memory to hold chars. For this small string (whose life is within the function scope), it's going to be much more efficient to have a local (stack) variable large enough to hold the largest combination of character groups:

char chars[sizeof LOWER + sizeof UPPER + sizeof NUMERIC + sizeof SPECIAL];

(If you're feeling very frugal, subtract the 4 NULs from the size).


If you're going to use a hard-coded program name instead of argv[0], then printf("%s") is overkill:

    puts("Usage: " PROGRAM_NAME " [LEN] [OPTION]...");
    puts("Generate cryptographically secure strings.");
    puts("Example: " PROGRAM_NAME "  16 -LNSU");

Finally, int main(int argc, char *const *argv) isn't a standard signature for main(). In a portable program argv should be an array of mutable pointers to characters.

\$\endgroup\$
4
\$\begingroup\$
#define _warn(...) do {                 \

Don't use names beginning with underscores, so rename _warn to warn. Generally speaking, they're reserved for the standard library and for compiler features. Named beginning with underscore followed by a lowercase letter are allowed for local variables, but best avoided. Even those are forbidden for global variables, type names and macros.

#define LOWER_L 26

Change that to strlen(LOWER) to avoid the risk that LOWER and LOWER_L won't be in synch. Decent compilers will compile strlen of a string literal to an integer constant. Alternatively, change that to sizeof(LOWER)-1 (the size of the string literal includes the trailing null byte). This goes as well for NUMERIC_L, SPECIAL_L and UPPER_L.

    bool lower_b = false;
    bool upper_b = false;
    bool numeric_b = false;
    bool special_b = false;

You're around the threshold where you're repeating the same structure multiple times and should factor it. Normally 4 times would be well above the threshold, but here the structure is a bit of a pain and the repetition is pretty simple. Still, I might go with an array of character category descriptions.

struct character_category {
    const char *characters;
    size_t count;
};
#define MAKE_CHARACTER_CATEGORY(letter, chars) {(letter), chars, sizeof(chars)}
static struct character_categories const categories[] = {
    MAKE_CHARACTER_CATEGORY('L', "abcdefghijklmnopqrstuvwxyz"),
    MAKE_CHARACTER_CATEGORY('N', "0123456789"),
    MAKE_CHARACTER_CATEGORY("S", "!@#$%^&*()-_=+`~[]{}\\|;:'\",.<>/?"),
    MAKE_CHARACTER_CATEGORY('U', "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
    {NULL, 0}
};
        error(EXIT_FAILURE, 0, "first argument must be length");

/* First argument is positional */

That's backward from every other command out there. Generally speaking, options go first. Some programs (mainly GNU ones) allow options after non-options, and some programs have a special syntax where arguments beginning with - are operators and so are intermixed with arguments not beginning with -. Your program does not fall in this last case so it should allow options to come first. Parse options, and mandate that there is exactly one argument left.

        printf("Usage: %s [LEN] [OPTION]...\n", PROGRAM_NAME);

Following my remark above, and given that LEN is not optional, this should be

    printf("Usage: %s [OPTION] LEN...\n", PROGRAM_NAME);
        fprintf(stderr, "Usage: %s [OPTION]...\n", PROGRAM_NAME);

This is missing LEN and should be

        fprintf(stderr, "Usage: %s [OPTION]... LEN\n", PROGRAM_NAME);
uintmax_t const length = strtoumax(key, NULL, STRTOUMAX_BASE);
…
for(size_t i = 0; i < length; ++i)

If the range of size_t is smaller than uintmax_t, this loop won't terminate. This may seem academic if you're used to 64-bit platforms, but on a 16-bit platform where size_t is a 16-bit type and uintmax_t is a 32-bit (not C99-compliant) or 64-bit type, it's an issue. It's fine to limit this program to the size of size_t, but you need to check the size of key and select an appropriate type for i. It would be simpler to use unsigned long and strtoul.

Also, check for invalid values of key (overflow or non-numeric). The strtoxxx functions parse the beginning of the strings, so they accept garbage such as 1jbfirfr. Pass a non-null endptr argument and check that it's set to the end of the string.

Also, the name key is misleading. This has nothing to do with a key.

Also, STRTOUMAX_BASE is not a useful constant. Here you really mean 10, it isn't some value that has a distinct semantics.

char *endptr;
errno = 0;
unsigned long const length = strtoul(argv[optarg], &endptr, 10);
if (endptr == argv[optarg] || *endptr != 0) {
    warn("Invalid length: should be an integer.");
    usage(EXIT_FAILURE);
}
if (length == 0 || (length == ULONG_MAX && errno = ERANGE)) {
    warn("Invalid length: out of range.");
    usage(EXIT_FAILURE);
}
    if(length == 0) {
        _warn("length is zero");
        exit(EXIT_SUCCESS);
    }

That's an error, so exit with EXIT_FAILURE.

   if(lower_b + numeric_b + special_b + upper_b == 0) {
       _warn("no character sets selected");
       exit(EXIT_SUCCESS);
   }

Likewise, if there's an error, exit with EXIT_FAILURE. But it might be a good idea to treat this case specially: if the user doesn't select any character set, treat it as an exception and include all the character sets. Alternatively, you may want to start with all characters allowed and have options to remove some sets. For comparison, pwgen starts defaults to letters and digits of either case, but no punctuation.

    if(key[0] == '-' && !quiet) {
        _warn("it appears you entered a negative value, "
              "this value will be evaluated in an unsigned context");
        sleep(2);
    }

This case isn't even triggered because a leading - is parsed as an option. It should actually be an error. Don't sleep when there's an error! That's pointless and can be very annoying. Print the error message and exit.

Furthermore, “will be evaluated in an unsigned context” is an implementation detail. This isn't something you should show to a user: it is not meaningful to a user. If your program reads a negative integer and wraps it around to a non-negative range, that's a bug in your program. If you were correctly detecting that case, the correct action would be to show an error stating that a negative integer is not meaningful here. See above for error checking on strtoumax/strtoul.

    char *const chars = malloc(chars_l + 1);

Given that this array will never be more than 95 bytes, it would make sense to allocate it with a static size on the stack. Unless you want to avoid a dynamic stack size, you could allocate it on the stack as a variable-length array, with the downside that the program's behavior is undefined in the unlikely case of a stack overflow.

    if(lower_b)
        strncat(chars, LOWER, LOWER_L);
…

Rather than call strncat repeatedly, you could maintain a pointer to the end of the string. This would avoid the cost traversing the string to determine its end each time. It's a micro-optimization though. Unfortunately, the standard library functions don't make it easy to have code that's both nice and efficient.

char *end = chars;
if (lower_b) {
    memcpy(end, LOWER, strlen(LOWER));
    end += strlen(LOWER);
}
        putchar(chars[randombytes_uniform(chars_l)]);

That's correct!

Technically, there's a security risk here, but it isn't applicable to the typical scenarios where such a program would be used. I'm mentioning that more for general interest than because it's a genuine issue for this program. The risk is a timing attack (or more generally side channels, including power consumption and radio emissions, but unless you're writing code for a smartcard those are even less likely to be relevant than timing).

The risk is leaking the value of randombytes_uniform(chars_l) to an observer who can't access the memory of the program and can't see its output. The leak comes from the fact that on many systems (in particular, any system with a memory cache), the timing of an array access a[i] can reveal some information about the value of i (e.g. by leaking which cache line a[i] is retrieved from).

In order to have a realistic chance of exploiting this, the attacker needs to be able to execute code on the same CPU. Furthermore very little data is leaked in any given run of the program. Such attacks are a concern with programs that manipulate keys, where the attacker will arrange to observe many runs with the same key. Here, each byte is only used once, which makes it very difficult for the attacker to get a significant information.

Protecting against this type of threat is difficult. The general idea is to ensure that the code never takes any branch based on a secret value (to avoid timing differences in code execution, and also leaking information through the CPU's branch prediction state) and never makes a memory access at a secret address (to avoid timing differences in memory accesses). Writing constant-time code can require making calculations whether they're needed or not (i.e. instead of if (secret) x = f(); else x = g();, you calculate x1 = f(); x2 = g(); unconditionally) and using bitwise operations to combine results (e.g. x = (x1 & -secret) | (x2 & -secret) instead of x = secret ? x1 : x2, assuming that secret` can only be 0 or 1 and that all the operations involved are executed in constant time). Writing constant-time code also requires fighting against compiler optimizations. As timing attacks become more popular, there's ongoing work to develop libraries and compilers to make it easier to write time-constant code.

\$\endgroup\$
3
\$\begingroup\$

Your use of strncat fits well into the GNU universe, where obscure string functions with a bad API are used all over the place. Everyone else would have simply used strcat or even memcpy.

To really follow the GNU style, you have to indent the braces undecidedly using two spaces:

if (cond)
  {
    action();
  }

There is no Easter egg to bloat the code even more. Even GNU Hello has several of them.

Traditionally the command lines are built mentioning the options first and then the remaining arguments. GNU allows to mix them freely. Your program even requires the options to go behind the arguments. There is no reason for this restriction, so you should follow the style of other programs and use optind, too.

\$\endgroup\$

Not the answer you're looking for? Browse other questions tagged or ask your own question.