C# 分析用引号分隔的csv逗号
假设我有一个输入字符串:C# 分析用引号分隔的csv逗号,c#,csv,C#,Csv,假设我有一个输入字符串: String test = "\"item one\",\"item,2\",12345"; String[] arr = test.Split(','); 结果是: [0]: "item one" [1]: "item" [2]: "2" [3]: "12345" 但我想: [0]: "item one" [1]: "item, 2" [2]: "12345" 基本上,我的输入字符串将有引号,以允许引号之间使用逗号,而不是分隔符 将该行解析为数组的最佳方法是
String test = "\"item one\",\"item,2\",12345";
String[] arr = test.Split(',');
结果是:
[0]: "item one"
[1]: "item"
[2]: "2"
[3]: "12345"
但我想:
[0]: "item one"
[1]: "item, 2"
[2]: "12345"
基本上,我的输入字符串将有引号,以允许引号之间使用逗号,而不是分隔符
将该行解析为数组的最佳方法是什么?下面是简单而完整的CsvHelper,它实现了从csv数据行到csv数据行的解码行的编码。在我的上一个项目中,我想使用FastCSVReader,但它的内部似乎相当复杂,因为我不希望使用尽可能快、最健壮的解决方案,所以我编写了我的小助手
/// <summary>
/// CsvHelper
/// </summary>
public static class CsvHelper
{
#region Public methods
/// <summary>
/// Codes fields as a line for csv file
/// </summary>
/// <param name="fields"></param>
/// <returns></returns>
public static string CodeLine(string[] fields)
{
if (fields == null || fields.Length == 0)
return null;
var sb = new StringBuilder(1024);
for (int i = 0; i < fields.Length; i++)
{
if (fields[i] != null)
{
bool mustBeQuoted = fields[i].Contains("\"") || fields[i].Contains(",");
if (mustBeQuoted)
{
sb.Append("\"");
sb.Append(fields[i].Replace("\"", "\"\""));
sb.Append("\"");
}
else
{
sb.Append(fields[i]);
}
}
if (i != fields.Length - 1)
sb.Append(",");
}
return sb.ToString();
}
/// <summary>
/// Decodes line from csv file into fields
/// </summary>
/// <param name="line"></param>
/// <param name="fields"></param>
/// <returns>Returns true if decoding was successful</returns>
public static bool DecodeLine(string line, out string[] fields)
{
fields = null;
if (string.IsNullOrEmpty(line))
return false;
int index = 0;
var res = new List<string>();
while (index != line.Length)
{
string field;
if (ReadField(line, ref index, out field))
{
res.Add(field);
}
else
{
return false;
}
}
if (line[line.Length - 1] == ',')
{
res.Add(string.Empty);
}
fields = res.ToArray();
return true;
}
#endregion
#region Other methods
private static bool ReadField(string line, ref int index, out string field)
{
field = null;
if (index >= line.Length)
return false;
var sb = new StringBuilder(512);
int state = 0;
while (true)
{
char c = line[index];
char? c1 = (index + 1 < line.Length - 1) ? (char?)line[index + 1] : null;
index++;
switch (state)
{
case 0: // START
if (c == '"') //text in field starts by quotation mark, text of filed in quotation marks
{
state = 4;
}
else if (c == ',') //empty text of filed
{
field = sb.ToString();
return true;
}
else
{
state = 1;
sb.Append(c);
}
break;
case 1: //Not quoted text in field
if (c == '"') // error, cannot contains " in the middle of the field
{
return false;
}
else if (c == ',')
{
field = sb.ToString();
return true;
}
else
{
sb.Append(c);
}
break;
case 3: //Escaping quotation mark
if (c == '"') //previous quotation mark was escape char for this quotation mark
{
state = 4;
sb.Append(c);
}
else //error, cannot contais any other char
{
return false;
}
break;
case 4: //Text in between quotation marks
if (c == '"') //closing quoted text or escape char for following qoatation mark - based on which char is following
{
if (c1 != null && c1.Value == '"') //current quotation mark is escape char for following quotation mark
{
state = 3;
}
else
{
state = 5;
}
}
else
{
sb.Append(c);
}
break;
case 5: //Just after closing quotation mark of quoted text
if (c == ',') //closing quoted text
{
field = sb.ToString();
return true;
}
else //error, cannot contais any other char
{
return false;
}
break;
}
if (index == line.Length)
{
if (state == 1 || state == 5)
{
field = sb.ToString();
return true;
}
return false;
}
}
}
#endregion
}
//
///CsvHelper
///
公共静态类CsvHelper
{
#区域公共方法
///
///将字段编码为csv文件的一行
///
///
///
公共静态字符串代码行(字符串[]字段)
{
if(fields==null | | fields.Length==0)
返回null;
var sb=新的StringBuilder(1024);
for(int i=0;i=行长度)
返回false;
var sb=新的StringBuilder(512);
int state=0;
while(true)
{
char c=行[索引];
char?c1=(索引+1<行长度-1)?(char?)行[索引+1]:空;
索引++;
开关(状态)
{
案例0://开始
如果(c==“”)//字段中的文本以引号开头,则字段中的文本以引号开头
{
状态=4;
}
else if(c==',')//字段的空文本
{
field=sb.ToString();
返回true;
}
其他的
{
状态=1;
sb.附加(c);
}
打破
案例1://字段中未引用文本
如果(c=='')/ /错误,则不能包含在字段的中间
{
返回false;
}
else如果(c==',')
{
field=sb.ToString();
返回true;
}
其他的
{
sb.附加(c);
}
打破
案例3://转义引号
if(c==“”)//上一个引号是此引号的转义字符
{
状态=4;
sb.附加(c);
}
else//错误,不能包含任何其他字符
{
返回false;
}
打破
案例4://引号之间的文本
if(c==“”)//结束带引号的文本或转义字符,用于后面的Qatation标记-基于后面的字符
{
if(c1!=null&&c1.Value==“”)//当前引号是以下引号的转义字符
{
状态=3;
}
其他的
{
状态=5;
}
}
其他的
{
sb.附加(c);
}
打破
案例5://在引用文本的结束引号之后
if(c==',')//结束带引号的文本
{
field=sb.ToString();
返回true;
}
else//错误,不能包含任何其他字符
{
返回false;
}
打破
}
如果(索引==行长度)
{
如果(状态==1 | |状态==5)
{
field=sb.ToString();
返回true;
}
返回false;
}
}
}
#端区
}
我可能会使用正则表达式,如<代码>([])[^ ] ** 1(\d+)但当然也有可能需要考虑其他格式。