6
\$\begingroup\$

I've created an app to manage students' data which also supports file handling. The Student structure looks like this:

typedef struct Student {    //dynamically allocated structure
    char *first_name;
    char *last_name;
    float grade;
}Student;

Considering C is a low-level programming language, I wanted to take full advantage on its dynamic memory allocation features. That is, both the first name and last name of each student will be allocated dynamically. The main data structure I've used to store the students is a vector. By now, the app has the following features:

  • CRUD
  • import/export to CSV
  • import/export to binary file

The file handling part is that interests me the most. I tried to carefully read the C documentation and make my functions less error-prone in terms of memory management and I/O.

The following functions are designed to handle CSV files:

void Read_From_CSV(const char *file_name, Student *p_destination)
{
    if (!strstr(file_name, ".csv"))
    {
        printf("This method only works for CSV files. \n");
        return;
    }

    FILE *p_file = fopen(file_name, "r");
    if (!p_file)
    {
        perror("File not found or corrupted file. \n");
        exit(EXIT_FAILURE);
    }

    unsigned char buffer_lname[30], buffer_fname[30], buffer[256];
    unsigned int count_students = 0, successfully_read = 0;
    while (fscanf(p_file,"%s",buffer)==1)
    {
        successfully_read = sscanf(buffer, "%[^,],%[^,],%f", buffer_lname, buffer_fname, &p_destination[count_students].grade);
        if (successfully_read != 3)
        {
            printf("Error reading from CSV.\n");
            exit(EXIT_FAILURE);
        }

        p_destination[count_students].last_name = (char*)malloc(strlen(buffer_lname) + 1);
        if (!p_destination[count_students].last_name)
        {
            printf("Couldn't allocate memory. \n");
            exit(EXIT_FAILURE);
        }
        strcpy(p_destination[count_students].last_name, buffer_lname);

        p_destination[count_students].first_name = (char*)malloc(strlen(buffer_fname) + 1);
        if (!p_destination[count_students].first_name)
        {
            printf("Couldn't allocate memory. \n");
            exit(EXIT_FAILURE);
        }
        strcpy(p_destination[count_students].first_name, buffer_fname);

        ++count_students;
    }
    fclose(p_file);
}


void Write_To_CSV(const char *file_name, Student *p_source, const unsigned int number_of_students)
{
    if (!strstr(file_name, ".csv"))
    {
        printf("This method only works for CSV files. \n");
        return;
    }

    FILE *p_file = fopen(file_name, "w");
    if (!p_file)
    {
        perror("An error has occured. \n");
        exit(EXIT_FAILURE);
    }

    unsigned int index = 0;
    int successfully_written = 0;
    while (index < number_of_students)
    {

        successfully_written = fprintf(p_file, "%s,", p_source[index].last_name);
        if (successfully_written != strlen(p_source[index].last_name) + 1)
        {
            printf("An error has occured. \n");
            exit(EXIT_FAILURE);
        }
        successfully_written = fprintf(p_file, "%s,", p_source[index].first_name);
        if (successfully_written != strlen(p_source[index].first_name) + 1)
        {
            printf("An error has occured. \n");
            exit(EXIT_FAILURE);
        }
        successfully_written = fprintf(p_file, "%.2f", p_source[index].grade);
        if (successfully_written != 4 && successfully_written != 5)
        {
            printf("Error occured during grade writing. \n");
            exit(EXIT_FAILURE);
        }
        fprintf(p_file, "\n");
        ++index;
    }
    fclose(p_file);
}

And these two are designed for binary files:

void Read_From_Binary(const char *file_name, Student *p_destination, const unsigned int number_of_students)
{
    if (!strstr(file_name, ".bin"))
    {
        printf("This method only works for binary files. \n");
        return;
    }

    FILE *p_file = fopen(file_name, "rb");
    if (!p_file)
    {
        perror("Error opening file. \n");
        exit(EXIT_FAILURE);
    }

    size_t successfully_read;
    unsigned int width = 0;
    for (unsigned int index = 0; index < number_of_students; ++index)
    {
        successfully_read = fread(&width, sizeof(width), 1, p_file);
        if (successfully_read != 1)
        {
            printf("Error reading from the file. \n");
            exit(EXIT_FAILURE);
        }

        p_destination[index].last_name = (char*)malloc(width);
        if (!p_destination[index].last_name)
        {
            printf("Could not allocate memory. \n");
            exit(EXIT_FAILURE);
        }
        successfully_read = fread(p_destination[index].last_name, width, 1, p_file);
        if (successfully_read != 1)
        {
            printf("Error reading from the file. \n");
            exit(EXIT_FAILURE);
        }

        successfully_read = fread(&width, sizeof(width), 1, p_file);
        if (successfully_read != 1)
        {
            printf("Error reading from the file. \n");
            exit(EXIT_FAILURE);
        }

        p_destination[index].first_name = (char*)malloc(width);
        if (!p_destination[index].first_name)
        {
            printf("Could not allocate memory. \n");
            exit(EXIT_FAILURE);
        }
        successfully_read = fread(p_destination[index].first_name, width, 1, p_file);
        if (successfully_read != 1)
        {
            printf("Error reading from the file. \n");
            exit(EXIT_FAILURE);
        }

        successfully_read = fread(&p_destination[index].grade, sizeof(float), 1, p_file);
        if (successfully_read != 1)
        {
            printf("Error reading from the file. \n");
            exit(EXIT_FAILURE);
        }
    }

    fclose(p_file);
}

void Write_To_Binary(const char *file_name, Student *p_source, const unsigned int number_of_students)
{
    if (!strstr(file_name, ".bin"))
    {
        printf("This method only works for binary files. \n");
        return;
    }

    FILE *p_file = fopen(file_name, "wb");
    if (!p_file)
    {
        perror("Error opening the file. \n");
        exit(EXIT_FAILURE);
    }

    size_t successfully_written = 0;
    unsigned int width = 0;
    for (unsigned int index = 0; index < number_of_students; ++index)
    {
        width = strlen(p_source[index].last_name) + 1;

        successfully_written = fwrite(&width, sizeof(width), 1, p_file);
        if (successfully_written != 1)
        {
            printf("Error writing to the file. \n");
            exit(EXIT_FAILURE);
        }

        successfully_written = fwrite(p_source[index].last_name, width, 1, p_file);
        if (successfully_written != 1)
        {
            printf("Error writing to the file. \n");
            exit(EXIT_FAILURE);
        }

        width = strlen(p_source[index].first_name) + 1;

        successfully_written = fwrite(&width, sizeof(width), 1, p_file);
        if (successfully_written != 1)
        {
            printf("Error writing to the file. \n");
            exit(EXIT_FAILURE);
        }

        successfully_written = fwrite(p_source[index].first_name, width, 1, p_file);
        if (successfully_written != 1)
        {
            printf("Error writing to the file. \n");
            exit(EXIT_FAILURE);
        }

        successfully_written = fwrite(&p_source[index].grade, sizeof(float), 1, p_file);
        if (successfully_written != 1)
        {
            printf("Error writing to the file. \n");
            exit(EXIT_FAILURE);
        }
    }

    fclose(p_file);
}

UPDATE

As I've been suggested, I'll add a driver code that exemplifies the usage of the earlier defined functions.

Suppose that we have a Students.csv file which contains the following registrations:

Wayne,Bruce,8.50

Kent,Clark,6.60

Dent,Harvey,5.50

Stark,Daniel,7

The driver code will import data from the CSV file. Then it will print the data on the console, write another CSV file and a binary file. The students array is cleared and then we import data from the binary file that was earlier created. Please consider that the structure and the functions for file handling are already defined.

#include <stdio.h>
#include <stdlib.h>

void Print_Students(const Student *p_first, const Student *p_last)
{
    unsigned int index = 0;
    for (; p_first < p_last; ++p_first)
    {
        printf("Student %d\n", index);
        printf("Last name: %s \n", p_first->last_name);
        printf("First name: %s \n", p_first->first_name);
        printf("Grade: %.2f \n", p_first->grade);
        ++index;
    }
    printf("\n");
}

void main()
{
    Student *students = (Student*)malloc(4 * sizeof(Student));
    if (!students)
    {
        printf("Couldn't allocate memory.\n");
        exit(EXIT_FAILURE);
    }
    Read_From_CSV("Students.csv", students);
    printf("Reading data from CSV. \n");
    Print_Students(students, students + 4);
    Write_To_CSV("Students1.csv", students, 4);
    Write_To_Binary("Students1.bin", students, 4);
    free(students);
    students = (Student*)malloc(4 * sizeof(Student));
    if (!students)
    {
        printf("Couldn't allocate memory.\n");
        exit(EXIT_FAILURE);
    }
    Read_From_Binary("Students1.bin", students, 4);
    printf("Reading data from binary file. \n");
    Print_Students(students, students + 4);
    free(students);
}

My main questions are the following:

  1. Are the variables named properly?
  2. When working with files, did I correctly check the return values from functions like fscanf, fwrite, fread etc.?
  3. Is there a better way to extract data from a CSV file?
  4. Is the memory allocation part done properly?
\$\endgroup\$
4
  • \$\begingroup\$ (Welcome to CR!) Do you have a test scaffold, can you provide a minimum environment to tinker with the code presented? \$\endgroup\$
    – greybeard
    Commented Feb 10, 2018 at 18:59
  • \$\begingroup\$ Thanks for your answer. Of course I can provide that. Should I update my original post and add the driver code? \$\endgroup\$
    – I. S.
    Commented Feb 11, 2018 at 11:41
  • \$\begingroup\$ Should I update my original post and add the driver code? I think that the way to go. \$\endgroup\$
    – greybeard
    Commented Feb 11, 2018 at 11:56
  • \$\begingroup\$ OK, I've got it updated. \$\endgroup\$
    – I. S.
    Commented Feb 11, 2018 at 13:47

3 Answers 3

2
\$\begingroup\$

In addition to other good answers:

Allow spaces in names

while (fscanf(p_file,"%s",buffer)==1) stops scanning with any white-space that follows other characters.

// Messes up input
Wayne,Mary Jo,8.50

Better to use fgets(buffer, sizeof buffer, p_file). This also prevents buffer overrun.

Avoid naked magic numbers

Why 30, 256? Use constants to allow for graceful code updates and self documentation.

Be more tolerant of long names. e.g. and their construction should they contain unexpected characters like "'- ,.", etc.

See also Falsehoods Programmers Believe About Names

// unsigned char buffer_lname[30], buffer_fname[30], buffer[256];
#define NAME_LAST_SIZE 64
#define NAME_FIRST_SIZE 60
#define RECORD_FIRST_SIZE (NAME_LAST_SIZE + 1 + NAME_FIRST_SIZE + 1 + 5 + 2];
unsigned char buffer_lname[NAME_LAST_SIZE];
unsigned char buffer_fname[NAME_FIRST_SIZE];
unsigned char buffer[RECORD_FIRST_SIZE * 2];  // I like 2x max expected size

Enforce limits

sscanf(buffer, "%[^,],%[^,],%f" allows buffer overruns leading to undefined bevaior. Consider "%29[^,],%29[^,],%f".

strdup()

The common strdup() or its equivalent would nicely replace repetitive code. Sample code

    // p_destination[count_students].last_name = (char*)malloc(strlen(buffer_lname) + 1);
    // if (!p_destination[count_students].last_name)
    // {
    //     printf("Couldn't allocate memory. \n");
    //     exit(EXIT_FAILURE);
    // }
    // strcpy(p_destination[count_students].last_name, buffer_lname);
    p_destination[count_students].last_name = strdup(buffer_lname);
    if (p_destination[count_students].last_name == NULL) {
      printf("Couldn't allocate memory for last name.\n");
      exit(EXIT_FAILURE);
    }

Form unique error messages.

Rather than 2 printf("Couldn't allocate memory. \n");. Better for error messages to use stderr.

    fprintf(stderr, "Couldn't allocate memory. Line: %d\n", __LINE__);
    // or  
    fprintf(stderr, "Couldn't allocate memory last name\n");
    fprintf(stderr, "Couldn't allocate memory first name\n");

Create file independence.

OP's code makes the file dependent on the size of unsigned and and this code's endianness. Integer size and endian-ness varies amongst platforms.

Instead choose a wide fixed type and convert to a fixed endian, then write. Reading will need to reverse the process.

// unsigned int width = 0;
// width = strlen(p_source[index].last_name) + 1;
// successfully_written = fwrite(&width, sizeof(width), 1, p_file);

uint32_t width = (uint32_t) (strlen(p_source[index].last_name) + 1);
width = htobe32(width);
successfully_written = fwrite(&width, sizeof width, 1, p_file);

I would create a helper function, maybe My_write_uint32(FILE*, uint32_t) to allow code re-use. Ref: be64toh()

\$\endgroup\$
5
  • \$\begingroup\$ Thanks for the review! @Harald Scheirich suggested me to use fread() instead of fscanf(), and you suggest me to use fgets(). What would be the overall better choice? Taking account of every method's limitations and benefits. Concerning magic numbers, I'll try to avoid them at all, but I have a question. What would be the way to go between const int last_size = 64 and #define NAME_LAST_SIZE 64? About file independence, well, is it possible that sizeof(int) is different on other platforms? For example, on my 64-bit machine, it has 4 bytes. On a 16-bit machine, would it have 2? \$\endgroup\$
    – I. S.
    Commented Feb 13, 2018 at 16:08
  • \$\begingroup\$ @I.Silviu To read binary, use fread(). To read text as lines, use fgets() \$\endgroup\$ Commented Feb 13, 2018 at 16:10
  • \$\begingroup\$ @I.Silviu "What would be the way to go between const int last_size = 64 and #define NAME_LAST_SIZE 64?" both document well enough \$\endgroup\$ Commented Feb 13, 2018 at 16:11
  • 1
    \$\begingroup\$ @I.Silviu An int must be at least 16-bit. The int size is often that same as the native processor size. It could be more or less. Code should avoid making unnecessary assumptions. uint32_t vs int as a convention for everyday programming may be useful. \$\endgroup\$ Commented Feb 13, 2018 at 16:15
  • \$\begingroup\$ OK, thanks for the advises. I'm looking to give my program a revamp based on the suggestions you and your colleagues gave to me. \$\endgroup\$
    – I. S.
    Commented Feb 13, 2018 at 16:24
2
\$\begingroup\$

Allocation and deallocation

When allocating memory, don't cast the result of malloc() family. Additionally, we can keep the freedom to change the type of what's allocated if we calculate using sizeof on the result value:

Student *students = malloc(sizeof *students * 4);

We write the sizeof expression first to guarantee that all calculation is done in terms of size_t - that makes no difference here, but is useful when allocating storage for 2-d arrays (malloc(sizeof *array * x * y)).

When we free the students array, it's important to free each individual member first:

free(students);  // leaks every first_name and every last_name

I recommend you write a Free_Student() function to do the cleanup, and also one to clean up an entire array.


Filename checking

I consider it an abomination for a data consumer to enforce a file naming policy, but if you insist on having it, you might want to check only the end of the file name for a match - assuming you want data.csv.gz to be a non-matching file:

char const suffix[] = ".csv";
size_t const suffixlength = sizeof suffix - 1;  /* ignoring the NUL */
size_t const filenamelength = strlen(file_name);
if (filenamelength < suffixlength
    || strcmp(suffix, file_name + filenamelength - suffixlength)
{ /* handle bad filename */ }

perror doesn't want a newline

Consider

    perror("File not found or corrupted file. \n");

It's more informative to show which file is referenced:

    perror(file_name);

This avoids duplicating the information (e.g. "not found", or "permission denied") that perror gives us.


Over-checking of printf() results

In this code, we check the number of characters actually written:

    successfully_written = fprintf(p_file, "%.2f", p_source[index].grade);
    if (successfully_written != 4 && successfully_written != 5)

Really, we're just interested in whether fprintf() was successful or not, and don't mind if we wrote 100.00 (6 chars). So it's better to simply test for the error case, where the function will return a negative value:

    if (fprintf(p_file, "%.2f", p_source[index].grade) < 0)

And we could combine all the prints into one:

    const Student *const p = p_source+index;
    if (fprintf(p_file, "%s,%s,%.2f\n",
                p->last_name, p->first_name, p->grade)
        < 0)

Think about const Student where appropriate

The write functions don't need to modify the pointed-to students, so they should take a pointer to const student:

void Write_To_CSV(const char *file_name,
                  Student const *p_source,
                  const unsigned int number_of_students)

Sanitize your strings

What happens if you have a student with , in their name? Or "? If that's not allowed, then you need to validate at entry and when reading from file. If those are allowed¹, then you'll need to do a bit more formatting to create and read valid CSV. That's an exercise larger in scope than this single paragraph!

¹ You might not have them in the languages of your students, but do be aware that ' is used in some of the Khoisan languages, and that can sometimes require special attention.

\$\endgroup\$
4
  • \$\begingroup\$ Thank you for the review. OK, if I don't have to cast the result of malloc, then I won't have to cast the result of realloc either, right? About those naming exceptions, could you provide an example that contains , or "? For the start, validation seems easier to implement. Once that is done, I'll think about the other possibility. Offtopic: I just can't figure out how to put a newline (I've read this).. \$\endgroup\$
    – I. S.
    Commented Feb 12, 2018 at 15:58
  • \$\begingroup\$ Correct - the advice applies to the malloc() family, including calloc() and realloc(). As for names including ,, that's something you have to choose whether or not to accept. I do work with someone whose name contains a ' and who is often frustrated at assumptions made in software (and I know someone else who has a similar problem with systems that assume everyone has a surname/family-name). I recommend you read the eye-opening list of Falsehoods Programmers Believe About Names. \$\endgroup\$ Commented Feb 12, 2018 at 16:06
  • \$\begingroup\$ I forgot that sizeof will include the NUL character. Fixed by edit. \$\endgroup\$ Commented Feb 12, 2018 at 17:58
  • 1
    \$\begingroup\$ @TobySpeight For fun, I looked at an alternative: char *dot = strrchr(file_name, '.'); if (dot == NULL || strcmp(dot, ".csv")) handle_bad_filename(file_name);. \$\endgroup\$ Commented Feb 12, 2018 at 18:19
2
\$\begingroup\$

Here is a review for the first part, additionally I'm trying a different format, all my comments are inline after the line. This is only the csv read/write part. I'll update this with the binary in a while.

void Read_From_CSV(const char *file_name, Student *p_destination)
{

    if (!strstr(file_name, ".csv"))
    {
        printf("This method only works for CSV files. \n");
        return;
    }
    // Inconsistent Error Handling:
    // Here you print out an error message and return without an errorcode
    // the caller won't know nothing was done, in the function below 
    // the program is just aborted. What is the difference between the two ?

    FILE *p_file = fopen(file_name, "r");
    if (!p_file)
    {
        perror("File not found or corrupted file. \n");
        exit(EXIT_FAILURE);
    }

    unsigned char buffer_lname[30], buffer_fname[30], buffer[256];
    unsigned int count_students = 0, successfully_read = 0
    // if you have it size_t is the type to use for all indices 

    while (fscanf(p_file,"%s",buffer)==1)
    // Assuming memory size:
    // if you're line is longer than 256 this will overwrite ...
    // fscanf use:
    // if there is a space in the line the %s will stop parsing, this of course 
    // is not so much an issue if you only use this to read data from that you
    //  created with this program, if you want to read other peoples data this would be a problem. 
    // Alternative: fread() will read a whole line up to '\n' and also lets you limit 
    // the amount of characters read
    {
        successfully_read = sscanf(buffer, "%[^,],%[^,],%f", buffer_lname, buffer_fname, &p_destination
        [count_students].grade);
        // Assuming memory sizes: 
        // You're assuming that there p_destination has room for count_students, this usually is not a safe
        // assumption to make, the called doesn't know how many students are in the file.
        // To fix this you'd want either to tell the parser how many students you can take in p_destination
        // or allocate the space dynamically in a list or an array that extends
        // You're also assuming that the names are only 30 characters long if they are longer sscanf
        // will be writing outside of the buffer to be safe here you'd have to make the name buffers 
        // as long as the line buffer
        // Parsing:
        // If this is the only format that you need to parse then this looks fine
        // strtok() would be another option to use but that would require more logic

        if (successfully_read != 3)
        {
            printf("Error reading from CSV.\n");
            exit(EXIT_FAILURE);
        }

        p_destination[count_students].last_name = (char*)malloc(strlen(buffer_lname) + 1);
        // There is no need to cast the result of malloc to the target type

        if (!p_destination[count_students].last_name)
        {
            printf("Couldn't allocate memory. \n");
            // Inexact error message:
            // For debugging you will usually want to say what exactly failed
            // if you have twenty of these in you're program it's hard to see 
            // what actually happened
            exit(EXIT_FAILURE);
        }
        strcpy(p_destination[count_students].last_name, buffer_lname);

        p_destination[count_students].first_name = (char*)malloc(strlen(buffer_fname) + 1);
        if (!p_destination[count_students].first_name)
        {
            printf("Couldn't allocate memory. \n");
            exit(EXIT_FAILURE);
        }
        strcpy(p_destination[count_students].first_name, buffer_fname);

        ++count_students;
    }
    fclose(p_file);
}


void Write_To_CSV(const char *file_name, Student *p_source, const unsigned int number_of_students)
// Naming: 
// i'd probably use p_students, source is generic 
{
    if (!strstr(file_name, ".csv"))
    {
        printf("This method only works for CSV files. \n");
        return;
    }

    FILE *p_file = fopen(file_name, "w");
    if (!p_file)
    {
        perror("An error has occured. \n");
        exit(EXIT_FAILURE);
    }

    unsigned int index = 0;
    int successfully_written = 0;
    while (index < number_of_students)
    // Safer expression is available:
    // if you use a for(size_t index = 0; index < number_of_students; ++index) {}
    // it's much harder to forget to write the ++index at the end of the loop
    {

        successfully_written = fprintf(p_file, "%s,", p_source[index].last_name);
        if (successfully_written != strlen(p_source[index].last_name) + 1)
        {
            printf("An error has occured. \n");
            exit(EXIT_FAILURE);
        }
        successfully_written = fprintf(p_file, "%s,", p_source[index].first_name);
        if (successfully_written != strlen(p_source[index].first_name) + 1)
        {
            printf("An error has occured. \n");
            exit(EXIT_FAILURE);
        }
        successfully_written = fprintf(p_file, "%.2f", p_source[index].grade);
        if (successfully_written != 4 && successfully_written != 5)
        {
            printf("Error occured during grade writing. \n");
            exit(EXIT_FAILURE);
        }
        fprintf(p_file, "\n");
        // Very verbose code:
        // These lines can be change to use: 
        // fprintf(p_file, "%s,%s,%.2f\n", p_source[index].last_name, ... )
        // Makes this much easier to read
        ++index;
    }
    fclose(p_file);
}
\$\endgroup\$
7
  • \$\begingroup\$ I'm not a big fan of the review in comments; I'd much prefer a quote from the original with the review in plain text separately. For one thing, much of this review can only be reached by scrolling through the code block, rather than reading in the normal way. \$\endgroup\$ Commented Feb 12, 2018 at 14:09
  • \$\begingroup\$ Just posted a question of meta with regard to this ... \$\endgroup\$ Commented Feb 12, 2018 at 14:11
  • \$\begingroup\$ Here's the discussion on Meta. \$\endgroup\$ Commented Feb 12, 2018 at 14:42
  • \$\begingroup\$ First of all, thanks for your review. Inconsistent Error Handling Is it possible to actually return an errorcode from a void function through return? I assumed that the second error is more serious than the first one, so I exited the program immediately. Should I treat all the errors in the same manner to ensure consistency throughout the code? Assuming memory size In this case, should I increase the buffer size to what value? Considering that I can't really predict how long every line will be.. Do you consider that using fread is the safer approach? \$\endgroup\$
    – I. S.
    Commented Feb 12, 2018 at 15:38
  • 1
    \$\begingroup\$ No you can't predict how long a line will be but you will want to know when the line that you want to read is longer than the space that you have available. The error handling is actually a similar problem, if you return from your function the caller should be able to observe whether you came back because of an error or because all the work was done \$\endgroup\$ Commented Feb 12, 2018 at 23:02

Not the answer you're looking for? Browse other questions tagged or ask your own question.