C for csv files

I want to post some C code I have written to take a large number of .csv files and read them into memory. The code is a single function call that will assertain how many files you have, and the dimensions of your .csv file.

It assumes your .csv files are all identical in format (number of rows and columns). It assumes you are using a comma as the separator. There is no support for text fields, but it wouldn't be too hard to implement.

The code therefore automates the laborious process of importing seperate .csv files, or writing routines to open each and parse them individually. The files are stored in a 3D array, which you can simply loop through to create averages, run other stats on, or simply merge some how and write out to another .csv for something like MatLab.

When writing your program, #include "collate_csvs.h". You need to declare a typedef'd struct I have made the same way you would any other variable. You pass the address of this to the function call, providing your base file name string. Therefore, in the following brief example your .csv files would be out0.csv, out1.csv, out2.csv... etc.  When you call collateCSVs, it fills in all the member variables of the csv_struct_t for you to use.  If the call returns 0, something went wrong and the function aborted.

int main(void) {

    // Variable declaration
    csv_struct_t my_csv_files;
    int file, row, col;

    // Function call, imports your files.
    collateCSVs( &my_csv_files, "out");

    // Do some loops...
    for( file = 0; file < my_csv_files.num_files; file++ ) {
        for( col = 0; col < my_csv_files.max_cols; col++ ) {
            for( row = 0; row < my_csv_files.max_rows; row++ ) {
                printf("%f\n", my_csv_files.csv_data[file][col][row]);
            }
        }
    }

}



Compile the files with -c, and link to your program like:
gcc -c collate_csvs.c -o collate_csvs.o
gcc -c your_source.c -o your_source.o
gcc collate_csvs.o your_source.o -o COLLATE_CSVS

Lastly, I'm sorry if you find any bugs, or if it is not commented enough for you.
It is missing a routine to free up the 3D array from memory, I only use this code as a one-shot and it dies automatically.  
It is a starting point at least :)
The source code is hidden behind this jump (a bit bulky):



collate_csvs.h:

typedef struct csv_details {
    int num_files;        
    int max_rows;
    int max_cols;
    double *** csv_data;    // Access as 3d array csv_data[FILE][COL][ROW]
} csv_struct_t;


// Function call, provide base file name and pointer to struct.
int collateCSVs( csv_struct_t * csv_struct_ptr, char * base_file );



collate_csvs.c:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include "collate_csvs.h"
#define NEWLINE 10
#define COMMA    44

double *** make3dArray( int x, int y, int z ) {
    double *** array_ptr;
    int i,j;

    array_ptr = malloc( sizeof( double **) * x );
    
    // Check for not enough memory
    if( array_ptr == NULL ) return NULL;

    for( i = 0; i < x; i++ ) {
        
        array_ptr[i] = malloc( sizeof( double * ) * y );
    
        // Check for not enough memory
        if( array_ptr == NULL ) return NULL;

        for( j = 0; j < y; j++ ) {
        
            array_ptr[i][j] = malloc( sizeof( double ) * z );
            
            // Check for not enough memory
            if( array_ptr == NULL ) return NULL;
        }
    }    

    return array_ptr;
}

int collateCSVs( csv_struct_t * csv_struct_ptr, char * base_file) {
    
    int file;
    int col;
    int row;
    int i,j;
    char buf[40];
    int c;
    FILE * fp;

    printf("\n Collating .csv files with base filename '%s'\n", base_file);
    printf("\n Attempting to find out how many files you have...\n");
    
    // We try to open file zero.
    file = 0;
    sprintf(buf, "%s%d.csv", base_file, file);
    fp = fopen( buf, "r");
    if( fp == NULL ) {
        printf(" - Could not open file 0! Assuming no files. Abort\n");
        return 0;
    }

    // We keep opening files until we run out
    do {
        file++;
        
        // If we opened the last file ok, close it.
        if( fp != NULL ) {
            fclose( fp );
        }
        
        // try to open the next file.
        sprintf(buf, "%s%d.csv", base_file, file);
        fp = fopen( buf, "r" );    
        


    } while ( fp != NULL );    

    printf(" - I found %d files (set limit to %d)\n", file -1, file);

    csv_struct_ptr->num_files = file;

    if( csv_struct_ptr->num_files < 1 ) {
        printf(" - Too few files, abort\n");
        return 0;
    }

    // We open the first file and use it to judge how many rows and 
    // columns we will need.
    sprintf(buf, "%s0.csv", base_file); 
    fp = fopen( buf, "r" );
    if( fp == NULL ) {
        printf(" I can't open the first file \"%s\", abort\n", buf);
        return 0;
    }    


    printf("\n Attempting to find out row and column dimensions...\n");
    row = 0;
    col = 0;
    i = 0;
    j = 0;
    while( ( c = fgetc( fp )) != EOF ) {
        if( c != COMMA && c != NEWLINE ) {
            j++;
        } else if( c == COMMA ) {
            i++;
            j = 0;
        } else if ( c == NEWLINE ) {
            if( j != 0 ) i++;
            if( i > col ) col = i;
            
            i = 0;
            row++;
        }
    }
    fclose( fp );
    
    printf(" - Sampled file 0, I found a max of %d columns, %d rows\n", col, row);

    // Create an array large enough to hold all of our data    
    csv_struct_ptr->max_cols = col;
    csv_struct_ptr->max_rows = row;
    csv_struct_ptr->csv_data = make3dArray( csv_struct_ptr->num_files, col, row );

    if( csv_struct_ptr->csv_data == NULL ) {
        printf(" Not enough memory :(\n");
        printf(" abort\n");
        return 0;
    }
        
    // Finally, read in the data from each file and store
    // in our array.
    for( file = 0; file < csv_struct_ptr->num_files; file++ ) {

        // Open the file to read, use sprintf to format
        // the filename string.
        sprintf(buf, "%s%d.csv", base_file, file );
        
        printf(" Opening \"%s\" to read data: ", buf);

        fp = fopen( buf, "r" );
        if( fp == NULL ) {
            printf(" Error, could not open %s, abort\n", buf);
            return 0;
        }    
        
        // Reset variables that index the arrays
        i = 0;
        col = 0;
        row = 0;
        c = 0;    
        memset( buf, '\0', sizeof(buf));

        // Read in the file data.
        while( ( c = fgetc( fp ) ) != EOF )  {

            if( c != COMMA && c != NEWLINE ) {
                    
                // We have a character
                buf[i] = c;
                i++;

            } else if ( c == COMMA ) {
                // We save the data, jump a column
                csv_struct_ptr->csv_data[file][col][row] = atof( buf );
                
                // Clear the buffer
                memset( buf, '\0', sizeof( buf ));

                col++;
                i = 0;

            } else if ( c == NEWLINE ) {
                if( i != 0 ) {
                    // We save the data, jump a column
                    csv_struct_ptr->csv_data[file][col][row] = atof( buf );
                    memset( buf, '\0', sizeof( buf ));
                }
                // We want to drop a line.
                col = 0;
                i = 0;
                row++;    

            }    
        }    

        // Close that file, ready for the next
        fclose( fp );
        printf(" - Done\n");
    }    

    return 1;
    
}