CubeWorldMods/LocalizationMod/CCSVParse.cpp

474 lines
13 KiB
C++

//
// CCSVParse.cpp
// CPPAlgorithm
//
// Created by xujw on 16/2/26.
// Copyright © 2016年 xujw. All rights reserved.
//
#include "CCSVParse.h"
#include <assert.h>
CCSVParse::CCSVParse():_useSimpleModel(false){}
CCSVParse::~CCSVParse(){}
std::vector<std::string> CCSVParse::splitString(const std::string &str, const std::string &separator)
{
std::vector<std::string> resVec;
if ("" == str)
{
return resVec;
}
//方便截取最后一段数据
std::string dataStr = str + separator;
size_t pos = dataStr.find(separator);
size_t size = dataStr.size();
while (pos != std::string::npos)
{
std::string x = dataStr.substr(0,pos);
resVec.push_back(x);
dataStr = dataStr.substr(pos+1,size);
pos = dataStr.find(separator);
}
return resVec;
// //Method 2
// size_t nStartPosFound = str.find(separator, 0);
// size_t nFieldStart = 0;
// for (; nStartPosFound != -1; nStartPosFound = str.find(separator, nStartPosFound))
// {
// std::string strSub = str.substr(nFieldStart, nStartPosFound - nFieldStart);
// nStartPosFound = nStartPosFound + separator.size();
// nFieldStart = nStartPosFound;
//
//
// resVec.push_back(strSub);
// }
//
// // 加入最后一个字段
// if (nFieldStart < str.size())
// {
// std::string strSub = str.substr(nFieldStart, str.size() - nFieldStart);
// resVec.push_back(strSub);
// }
// return resVec;
}
std::string CCSVParse::loadCsvFile(const std::string &fileName)
{
FILE *pFile = fopen(fileName.c_str(), "rb");
if (0 == pFile)
{
return "";
}
fseek(pFile, 0, SEEK_END); //指针移动到文件结尾
long len = ftell(pFile); //获取文件大小
char *pBuffer = new char[len+1];
fseek(pFile, 0, SEEK_SET); //指针移动到文件开头
fread(pBuffer, 1, len, pFile); //读取文件
fclose(pFile);
//等价于std::string s;s.assign(pBuffer,len);
pBuffer[len] = 0;
std::string strRead(pBuffer,len);
delete [] pBuffer;
return strRead;
}
std::vector<std::vector<std::string>> CCSVParse::parseCsvFile(const std::string &fileName,const std::string &separator)
{
_gridData.clear();
std::string strAllData = loadCsvFile(fileName);
if (strAllData.size() == 0)
{
return _gridData;
}
//分隔符只能是一个字符
assert(separator.size() == 1);
//简易模式,字段里面不能包含分隔符
if (_useSimpleModel)
{
//分出行和字段
std::vector<std::string> ret = splitString(strAllData, "\n");
for (size_t i=0; i<ret.size(); i++)
{
std::vector<std::string> rowData = splitString(ret.at(i), separator);
_gridData.push_back(rowData);
}
return _gridData;
}
//标准模式,字段里面可以包含分隔符
//定义状态
typedef enum stateType
{
kNewFieldStart = 0, //新字段开始
kNonQuotesField, //非引号字段
kQuotesField, //引号字段
kFieldSeparator, //字段分隔
kQuoteInQuotesField, //引号字段中的引号
kBackSlash, //转义符号
kBackSlashInQuotesField, //引号字段中的转义符号
kRowSeparator, //行分隔符(回车)
kError //语法错误
}StateType;
//分出行
std::vector<std::string> vecRows = splitString(strAllData, "\n");
for (int i=0; i<vecRows.size(); i++)
{
//一行一行处理
std::string strRowData = vecRows.at(i);
if (0 == strRowData.size())
{
continue;
}
std::vector< std::string > vecFields;
std::string strField;
StateType state = kNewFieldStart;
for (int j=0; j<strRowData.size(); j++)
{
const char &ch = strRowData.at(j);
switch ( state )
{
case kNewFieldStart:
{
if (ch == '"')
{
state = kQuotesField;
}
else if (ch == separator.at(0))
{
vecFields.push_back("");
state = kFieldSeparator;
}
else if (ch == '\r' || ch == '\n')
{
state = kRowSeparator;
}
else if (ch == '\\'){
state = kBackSlash;
}
else
{
strField.push_back(ch);
state = kNonQuotesField;
}
}
break;
case kNonQuotesField:
{
if (ch == separator.at(0))
{
vecFields.push_back(strField);
strField.clear();
state = kFieldSeparator;
}
else if (ch == '\r' || ch == '\n')
{
vecFields.push_back(strField);
state = kRowSeparator;
}
else if (ch == '\\'){
state = kBackSlash;
}
else
{
strField.push_back(ch);
}
}
break;
case kQuotesField:
{
if (ch == '"')
{
state = kQuoteInQuotesField;
}
else if (ch == '\\')
{
state = kBackSlashInQuotesField;
}
else
{
strField.push_back(ch);
}
}
break;
case kFieldSeparator:
{
if (ch == separator.at(0))
{
vecFields.push_back("");
}
else if (ch == '"')
{
strField.clear();
state = kQuotesField;
}
else if (ch == '\r' || ch == '\n')
{
vecFields.push_back("");
state = kRowSeparator;
}
else
{
strField.push_back(ch);
state = kNonQuotesField;
}
}
break;
case kQuoteInQuotesField:
{
if (ch == separator.at(0))
{
//引号闭合
vecFields.push_back(strField);
strField.clear();
state = kFieldSeparator;
}
else if (ch == '\r' || ch == '\n')
{
vecFields.push_back(strField);
state = kRowSeparator;
}
else if (ch == '"')
{
//转义引号
strField.push_back(ch);
state = kQuotesField;
}
else
{
//引号字段里包含引号时,需要对内引号进行加引号转义
std::cout<<"语法错误: 转义字符 \" 不能完成转义 或 引号字段结尾引号没有紧贴字段分隔符"<<std::endl;
assert(false);
}
}
break;
case kBackSlash:
{
if (ch == '"')
{
//转义引号
strField.push_back(ch);
state = kNonQuotesField;
}
else if (ch == 'n')
{
//转义引号
strField.push_back('\n');
state = kNonQuotesField;
}
else if (ch == '\\')
{
//转义反斜杠
strField.push_back('\\');
state = kNonQuotesField;
}
else
{
//字段里包含引号时,需要对内引号进行加引号转义
std::cout<<"语法错误: 转义字符 \" 不能完成转义 或 引号字段结尾引号没有紧贴字段分隔符"<<std::endl;
assert(false);
}
}
break;
case kBackSlashInQuotesField:
{
if (ch == '"')
{
//转义引号
strField.push_back(ch);
state = kQuotesField;
}
else if (ch == 'n')
{
//转义引号
strField.push_back('\n');
state = kQuotesField;
}
else if (ch == '\\')
{
//转义反斜杠
strField.push_back('\\');
state = kQuotesField;
}
else
{
//引号字段里包含引号时,需要对内引号进行加引号转义
std::cout<<"语法错误: 转义字符 \" 不能完成转义 或 引号字段结尾引号没有紧贴字段分隔符"<<std::endl;
assert(false);
}
}
break;
case kRowSeparator:
{
_gridData.push_back(vecFields);
continue;
}
break;
case kError:
{
}
break;
default:
break;
}
}
switch (state)
{
case kNonQuotesField:
{
vecFields.push_back(strField);
_gridData.push_back(vecFields);
}
break;
case kQuoteInQuotesField:
{
vecFields.push_back(strField);
_gridData.push_back(vecFields);
}
break;
case kFieldSeparator:
{
vecFields.push_back("");
_gridData.push_back(vecFields);
}
break;
case kRowSeparator:
{
_gridData.push_back(vecFields);
}
break;
default:
break;
}
}
return _gridData;
}
void CCSVParse::printParseData() const
{
std::cout<<"以下是解析的csv数据:"<<std::endl;
std::cout<<"row counts:"<<_gridData.size()<<std::endl;
for (int row=0; row<_gridData.size(); row++)
{
std::vector<std::string> rowData = _gridData.at(row);
for (int col = 0; col<rowData.size(); col++)
{
std::cout<<rowData.at(col)<<"\t";
}
std::cout<<"\n"<<std::endl;
};
}
std::map<std::string, std::map<std::string, std::string> > CCSVParse::parseCsvFileToMap(const std::string &fileName,const std::string &separator)
{
//先获取所有的行列数据
std::vector<std::vector<std::string>> allData = parseCsvFile(fileName,separator);
//转为字典形式
std::map<std::string, std::map<std::string, std::string> > mapAllData;
for (size_t i=1; i<allData.size(); i++)
{
std::vector<std::string> rowData = allData.at(i);
//数据第一行为数据类型key
std::vector<std::string> keyData = allData.at(0);
std::map<std::string, std::string> mapRow;
for (int i=0; i<keyData.size(); i++)
{
std::string key = keyData.at(i);
std::string value = rowData.at(i);
mapRow[key] = value;
}
//每一行数据的第一列是id
mapAllData[rowData.at(0)] = mapRow;
}
return mapAllData;
}
#pragma mark--全局函数 类型转换
int conToInt(std::string &source)
{
std::stringstream ss;
int res;
ss<<source;
ss>>res;
return res;
}
float conToFloat(std::string &source)
{
std::stringstream ss;
float res;
ss<<source;
ss>>res;
return res;
}
double conToDouble(std::string &source)
{
std::stringstream ss;
double res;
ss<<source;
ss>>res;
return res;
}
std::string conToString(int s)
{
std::stringstream ss;
std::string res;
ss<<s;
ss>>res;
return res;
}
std::string conToString(float s)
{
std::stringstream ss;
std::string res;
ss<<s;
ss>>res;
return res;
}
std::string conToString(double s)
{
std::stringstream ss;
std::string res;
ss<<s;
ss>>res;
return res;
}