1、每一次只能读取同一种数据类型,不能读取字符串。
2、第次读取会返回一个 CSV数据结构,有源数据和二维数组,行列数信息
3、可以转换二维数组,但总大小不能变
123.csv
Month,"CO (ppm) mauna loa, 1965-1980",,,CO2 (ppm) mauna loa
Jan-,319.32,1,,,
Feb-,320.36,2,,,
Mar-,320.82,3,,,
Apr-,322.06,4,,,
May-,322.17,5,,,
Jun-,321.95,6,,,
头文件 read_csv_data.h
#include <stdio.h>
#include <memory.h> // for memset
#include <stdlib.h> // for malloc, free
#include <string.h> // for strtok
//fgets函数读取的最大字节数
#define MAX_LINE_SIZE
//字符串分割结构
struct str_split
{
int count;
char *str;
char **str_array;
};
//可变数据类型
typedef union dtype_u{
long int s;
char s;
short int s;
unsigned char u;
unsigned short int u;
unsigned long int u;
float f;
double f;
} Dtype;
//CSV文件结构
struct csv_s{
Dtype **darray; //二维数组
int drow;
int dcol;
int dnum; //总有多少个
Dtype data[];
};
enum{
S,
S,
S,
U,
U,
U,
F,
F,
DEF = S
};
/*
释放CSV文件结构,使用方法,查看testFile()
*/
void FreeCsvData(struct csv_s **csv_ptr);
/*
对CSV数据进行二维转换,生成新的二维数据。使用方法,查看testFile()
*/
int setNewArray( struct csv_s *tt, int row, int col);
/*
读取CSV文件数据,生成CSV结构。使用方法,查看testFile()
csvFilePath:文件路径
delimiter: 分割符
skiprows: 跳过头N行
ColumnList:选把指定列,从列开始,
dclass: 数据类型
*/
struct csv_s *ReadCsvData(char* csvFilePath, char delimiter, int skiprows, char *ColumnList, int dclass);
源码 read_csv_data.c
#include "read_csv_data.h"
int str_split_func(struct str_split *split, char * src, char delimiter)
{
int count =;
char *pchar, **ptr;
if ( NULL != split ) {
memset(split,, sizeof(struct str_split));
}
if(NULL == split || NULL == src || src[] == '\0')
{
return;
}
split->str = strdup(src);
if(NULL == split->str)
{
return;
}
count =;
pchar = src;
while('\' != *pchar)
{
if (delimiter == *pchar)
{
count++;
}
pchar++;
}
split->str_array = (char **)malloc(count*sizeof(char*));
if(NULL == split->str_array)
{
return;
}
split->count = count;
ptr = split->str_array;
*ptr = split->str;
pchar = split->str;
while('\' != *pchar && count > 1)
{
if (delimiter == *pchar)
{
ptr++;
*ptr = pchar+;
*pchar = '\';
count--;
}
pchar++;
}
return;
}
int str_split_free(struct str_split *split)
{
if(split == NULL)
{
return;
}
if(split->str!=NULL)
{
free(split->str);
split->str=NULL;
}
if(split->str_array != NULL)
{
free(split->str_array);
split->str_array=NULL;
}
return;
}
//计算csv文件中的总大小(字节)
int GetTotalSize(FILE * fp)
{
if(fp==NULL){
return -;
}
fseek(fp,,SEEK_END);
return ftell(fp);
}
//计算csv文件中的总行数
int GetTotalLineCount(FILE * fp)
{
int i =;
char strLine[MAX_LINE_SIZE];
fseek(fp,,SEEK_SET);
while (fgets(strLine, MAX_LINE_SIZE, fp))
i++;
fseek(fp,,SEEK_SET);
return i;
}
//计算csv文件中的总列数(以第一行的列数为基准)
int GetTotalColCount(FILE * fp, char delimiter)
{
int i =;
char strLine[MAX_LINE_SIZE];
struct str_split tss;
fseek(fp,,SEEK_SET);
char *str=NULL;
str = fgets(strLine, MAX_LINE_SIZE, fp);
if (str)
{
str_split_func(&tss, strLine, delimiter);
i = tss.count;
str_split_free(&tss);
}
return i;
}
// 通过指针*giCsvData给数据动态分配内存空间
int strToData(Dtype *d, char *str, int dtpye)
{
switch(dtpye){
case S:
*((char *)d) =(char) strtol(str,NULL,);
break;
case S:
*((short int *)d) =(short int) strtol(str,NULL,);
break;
case S:
*((long int *)d) =(long int) strtol(str,NULL,);
break;
case U:
*((unsigned char *)d) =(unsigned char ) strtol(str,NULL,);
break;
case U:
*((unsigned short int *)d) =(unsigned short int) strtol(str,NULL,);
break;
case U:
*((unsigned long int *)d) =(unsigned long int) strtol(str,NULL,);
break;
case F:
*((float *)d) =(float) strtod(str,NULL);
break;
case F:
*((double *)d) =(double) strtod(str,NULL);
break;
default:
printf("读取数据类型不对\n");
return -;
}
return;
}
// 对CSV结构中的二维数组进行转换
int setNewArray( struct csv_s *tt, int row, int col)
{
int i;
//CSV结构不NULL
if(tt == NULL)return -;
//新的行列总个数,与旧的数据个数不等
if(row * col != tt->dnum)return -;
Dtype **darray=(Dtype **)malloc( sizeof(Dtype *[row]) );
//分配内存空间失败,出错的可能性不大
if(!darray)return -;
if(tt->darray)free(tt->darray);
tt->drow = row;
tt->dcol = col;
tt->dnum = row * col;
tt->darray = darray;
for(i=; i<row; i++){
tt->darray[i]=(Dtype *)(tt->data+i*col);
}
return;
}
// 释放动态数据内存
void FreeCsvData(struct csv_s **csv_ptr)
{
if(*csv_ptr){
if((*csv_ptr)->darray)
free((*csv_ptr)->darray);
free(*csv_ptr);
}
*csv_ptr=NULL;
return ;
}
// 从csv文件中读取数据
struct csv_s *ReadCsvData(char* csvFilePath, char delimiter, int skiprows, char *ColumnList, int dclass)
{
/*
delimiter:分割符
dtype:数据类型,多个数据类型使用逗号分割,从第一列到最后一列开始一一对应。默认其他的是float 类型。
例:有列 dtype=“str, str, str” 则前三列为string类型,后两列为float类型
skiprows:跳过开头N行
ColumnList:取N列数据。
例:",1,3",取第 2,1,3列数据返回
*/
FILE* fCsv=NULL;
int rowTotal=;
int colTotal=;
struct str_split tss;
struct csv_s *temp_csv=NULL;
int cur_ptr=;
char strLine[MAX_LINE_SIZE];
int i;
int j;
//获取指定列
int t_column_list=;
int *t_column_list_ptr=NULL;
if(access(csvFilePath,)<0){
printf("%s 文件不存在\n", csvFilePath);
goto label_error;
}
// 打开文件
fCsv = fopen( csvFilePath, "r" );
if( fCsv==NULL )
{
printf("open file %s failed",csvFilePath);
goto label_error;
}
rowTotal = GetTotalLineCount(fCsv);
colTotal = GetTotalColCount(fCsv, delimiter);
//获取指定列
if(ColumnList==NULL){
t_column_list=colTotal;
t_column_list_ptr = malloc(sizeof(t_column_list) * t_column_list);
for(i=;i<tss.count;i++){
t_column_list_ptr[i]=i;
}
}else{
str_split_func(&tss, ColumnList, ',');
t_column_list = tss.count;
t_column_list_ptr = malloc(sizeof(t_column_list) * t_column_list);
for(i=;i<tss.count;i++){
t_column_list_ptr[i]=strtol(tss.str_array[i], NULL,);
}
str_split_free(&tss);
//检查参数ColumnList
if (t_column_list>colTotal){
printf("[%s] 超过最大列数 %d\n", ColumnList, colTotal);
goto label_error;
}
for(i=; i<t_column_list; i++){
if(t_column_list_ptr[i] > colTotal){
printf("[%s] 超过最大列数 %d\n", ColumnList, colTotal);
goto label_error;
}
}
}
//检查是否超过,最大列数
if(rowTotal < || colTotal <1 || skiprows>rowTotal){
printf("数据不对,有%d行,%d列\n", rowTotal, colTotal);
goto label_error;
}
//分配内在空间
rowTotal = rowTotal - skiprows;
temp_csv = (struct csv_s *)malloc(sizeof(struct csv_s) + (rowTotal * t_column_list)*sizeof(Dtype));
if(!temp_csv){
printf("分配内存失败 \n");
goto label_error;
}
// 读取数据
i = skiprows;
fseek(fCsv,, SEEK_SET);
//跳过开头N行
while(i> && fgets(strLine,MAX_LINE_SIZE,fCsv) )i--;
i = rowTotal;
while( i>= && fgets(strLine,MAX_LINE_SIZE,fCsv)){
//printf("%d %s\n", i, strLine);
i--;
str_split_func(&tss, strLine, delimiter);
if(t_column_list > tss.count){
printf("read error\n");
goto label_error;
}
int t_res =;
for(j=; j<t_column_list; j++){
t_res = strToData(temp_csv->data+cur_ptr, tss.str_array[t_column_list_ptr[j]], dclass);
if(t_res <){
printf("str to data error\n");
goto label_error;
}
cur_ptr++;
}
str_split_free(&tss);
}
temp_csv->drow = rowTotal;
temp_csv->dcol = t_column_list;
temp_csv->dnum = rowTotal * t_column_list;
//printf("%d %d %d\n", i, rowTotal, t_column_list);
//for(i=;i<temp_csv->dnum; i++)
// printf("%d ", temp_csv->data[i].s);
//temp_csv->dtype = dtype;
temp_csv->darray=NULL;
setNewArray(temp_csv,temp_csv->drow, temp_csv->dcol);
// 关闭文件
fclose(fCsv);
free(t_column_list_ptr);
return temp_csv;
label_error:
if(fCsv)fclose(fCsv);
if(temp_csv)free(temp_csv);
if(t_column_list_ptr)free(t_column_list_ptr);
return NULL;
}
int testData()
{
int size=;
//分配内存空间
struct csv_s *stdata=(struct csv_s *)malloc(sizeof(struct csv_s) + size*sizeof(Dtype));
int i;
//初始化数据
for(i=; i<size; i++){
// *((long int *)(stdata->data+i)) = i;
*((double *)(stdata->data+i)) = i*.0;
}
//随机测试 初始化数据
*((double *)(stdata->data+)) = 10*1.0;
printf("%f asdfasfasdf\n", *((double *)(stdata->data+)));
//初始化结构
int row =;
int col =;
stdata->drow= row;
stdata->dcol = col;
//stdata->dtype = sizeof(Dtype);
stdata->dnum = row * col;
int j=;
j = setNewArray(stdata,row, col);
if(j<){
printf("setNewArray %d error\n", j);
return;
}
//显示结构数据
printf("显示定义结构最后一行数据 \n");
for(j=; j<col; j++){
printf("%f\n", stdata->darray[row-][j].f64);
}
//结构转换
row =;
col =;
j = setNewArray(stdata,row, col);
if(j<){
printf("setNewArray 转换 %d error\n", j);
return;
}
printf("显示结构转换最后一行数据 \n");
//显示结构数据
for(j=; j<col; j++){
printf("%f\n", stdata->darray[row-][j].f64);
}
return;
}
//测试浮点数
void testFile()
{
struct csv_s *stdata;
//stdata = ReadCsvData(".csv", ',', 1, "1,2", F32);
stdata = ReadCsvData(".csv", ',', 1, "2,1", F32);
printf("testFile row %d col %d\n", stdata->drow, stdata->dcol);
int i, j;
//单独以Dtype显示数据,是一个二维数组,以stdata->drow, stdata->dcol为行,列:Dtype show[stdata->drow][stdata->dcol]
Dtype ** show = stdata->darray;
printf("显示最后一行数据 \n");
for(j=; j<stdata->dcol; j++){
printf("%f\n", show[stdata->drow-][j].f32);
}
//结构转换
int row =;
int col =;
j = setNewArray(stdata,row, col);
if(j<){
printf("setNewArray 转换 %d error\n", j);
}else{
//显示结构数据
printf("显示结构转换最后一行数据 \n");
for(j=; j<col; j++){
printf("%f\n", stdata->darray[row-][j].f32);
}
}
printf("转换失败测试\n");
//结构转换
row =;
col =;
j = setNewArray(stdata,row, col);
if(j<){
printf("setNewArray 转换 %d error\n", j);
}
//释放内存
FreeCsvData(&stdata);
if(stdata==NULL)
printf("ok t is null\n");
else
printf("ok t is not null\n");
}
//测试整数类型
void testFile()
{
struct csv_s *stdata;
//stdata = ReadCsvData(".csv", ',', 1, "1,2", F32);
stdata = ReadCsvData(".csv", ',', 1, "2,1", DEF);
printf("testFile row %d col %d\n", stdata->drow, stdata->dcol);
int i, j;
//单独以Dtype显示数据,是一个二维数组,以stdata->drow, stdata->dcol为行,列:Dtype show[stdata->drow][stdata->dcol]
//注意:这个默认long int 数据类型
Dtype ** show = stdata->darray;
printf("显示最后一行数据 \n");
for(j=; j<stdata->dcol; j++){
printf("%d\n", show[stdata->drow-][j]);
}
//结构转换
int row =;
int col =;
j = setNewArray(stdata,row, col);
if(j<){
printf("setNewArray 转换 %d error\n", j);
}else{
//显示结构数据
printf("显示结构转换最后一行数据 \n");
for(j=; j<col; j++){
printf("%d\n", stdata->darray[row-][j]);
}
}
printf("转换失败测试\n");
//结构转换
row =;
col =;
j = setNewArray(stdata,row, col);
if(j<){
printf("setNewArray 转换 %d error\n", j);
}
//释放内存
FreeCsvData(&stdata);
if(stdata==NULL)
printf("ok t is null\n");
else
printf("ok t is not null\n");
}
int main(int args, char *argv)
{
// testData();
testFile();
testFile();
return;
}