C# 如何提高代码的性能和速度(特别是在double.Parse中)?
我有testFile.txt文件(大约400mg)。它包含OHLC股票价格,时间范围为1分钟 它的结构是:“股票名称、日期、时间、开盘价、高价、低价、收盘价、成交量”->“其他,20010102230100,1.9007,1.9007,1.9007,1.9007,4”(这只是一个例子) 我的主要问题-这段代码非常慢。我测量了速度,发现关键部分是double.Parse部分。是否可以更改代码以提高性能? 我的c#解析代码:C# 如何提高代码的性能和速度(特别是在double.Parse中)?,c#,C#,我有testFile.txt文件(大约400mg)。它包含OHLC股票价格,时间范围为1分钟 它的结构是:“股票名称、日期、时间、开盘价、高价、低价、收盘价、成交量”->“其他,20010102230100,1.9007,1.9007,1.9007,1.9007,4”(这只是一个例子) 我的主要问题-这段代码非常慢。我测量了速度,发现关键部分是double.Parse部分。是否可以更改代码以提高性能? 我的c#解析代码: using System; using System.Collection
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Globalization;
namespace ConsoleApplication3
{
class Program
{
static void Main(string[] args)
{
string sourceDir = "D:\\testFile.txt",
outDir = "D:\\result.txt";
Thread.CurrentThread.CurrentCulture = System.Globalization.CultureInfo.InvariantCulture;
using (StreamReader sr = new StreamReader(sourceDir))
{
int divider = 5;
string line = sr.ReadLine();
StreamWriter sw = new StreamWriter(outDir);
List<string> listLine = new List<string>();
List<double> listOpen = new List<double>();
List<double> listHigh = new List<double>();
List<double> listLow = new List<double>();
List<double> listClose = new List<double>();
List<double> listVolume = new List<double>();
DateTime dateTimeOut = new DateTime();
string formatDate = "yyyyMMddHHmmss";
string newLine = "";
double priceOpen, priceHigh, priceLow, priceClose, volume;
//read first line, but don't write it
line = sr.ReadLine();
while (line != null)
{
listLine = line.Split(',').ToList();
dateTimeOut = DateTime.ParseExact(listLine[1] + listLine[2], formatDate, null);
double.TryParse(listLine[3], out priceOpen);
double.TryParse(listLine[4], out priceHigh);
double.TryParse(listLine[5], out priceLow);
double.TryParse(listLine[6], out priceClose);
double.TryParse(listLine[7], out volume);
listOpen.Add(priceOpen);
listHigh.Add(priceHigh);
listLow.Add(priceLow);
listClose.Add(priceClose);
listVolume.Add(volume);
if (dateTimeOut.Minute % divider == 0)
{
newLine = dateTimeOut + "," + listOpen[0] + "," + listHigh.Max() + "," + listLow.Min() + "," + listClose[4] + "," + listVolume.Max();
sw.WriteLine(newLine);
}
line = sr.ReadLine();
}
sr.Close();
}
}
}
}
我不认为
Double.Parse()
是瓶颈
我编写了一个测试程序(如下所示)。发布版本在不到二十秒的时间内解析了一亿个翻倍:
using System;
using System.Diagnostics;
namespace Demo
{
internal class Program
{
private void run()
{
string s = "12345.6789";
double result;
Stopwatch sw = Stopwatch.StartNew();
for (int i = 0; i < 100000000; ++i)
double.TryParse(s, out result);
Console.WriteLine("Took " + sw.Elapsed);
}
private static void Main()
{
new Program().run();
}
}
}
使用系统;
使用系统诊断;
名称空间演示
{
内部课程计划
{
私家车
{
字符串s=“12345.6789”;
双重结果;
秒表sw=Stopwatch.StartNew();
对于(int i=0;i<100000000;++i)
双锥虫(s,输出结果);
控制台写入线(“take”+sw.appeased);
}
私有静态void Main()
{
新程序().run();
}
}
}
double.Parse非常慢,因为有很多方法可以表示双值:1000;1000.1; 1e3、1.353e+34、-23.24e-123等。
如果您只有一种预定义格式(很可能有),比如10394.324,而不支持指数形式,那么您可以实现更高效的自定义解析器:从流中逐个字符读取,检查它是空格、数字还是点,并累加结果或相应地处理结果。它的实现相对简单,并将提供更好的性能。我想,如果您的硬盘允许读取速度如此之快,那么400MB的文件可以在不到10秒的时间内解析出来=)
另外,我不建议使用string.Split来处理如此大量的字符串,因为它会消耗您所有的内存,并使垃圾收集频繁发生,这可能会使您的代码速度降低到double.Parse以上。Instread逐字节读取流
还要提到的一点是,ToList()创建新列表并将源集合的所有元素复制(引用)到其中。这也是一个耗费大量时间和内存的不必要操作
最后,字符串连接不应该使用“+”运算符完成
因此,我认为你的问题可能在于:
line.Split(',').ToList();
newLine = dateTimeOut + "," + listOpen[0] + "," + listHigh.Max() + "," + listLow.Min() + "," + listClose[4] + "," + listVolume.Max();
如果运行你的程序消耗了所有的机器内存,那么99%的人认为问题就在这里
尝试用对sw.Write()的少量后续调用替换第二行代码>,以减轻“+”运算符的负担,并实现不需要字符串拆分的流式双解析器。您使用的是迭代整个集合的LINQ Max()和Min()函数。由于它们在一个循环中被调用了数千次,并且集合包含数百万个元素,因此效率非常低。而是在循环外部存储最小值和最大值,并在每次迭代时更新它们:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Globalization;
namespace ConsoleApplication3
{
class Program
{
static void Main(string[] args)
{
string sourceDir = "D:\\testFile.txt",
outDir = "D:\\result.txt";
Thread.CurrentThread.CurrentCulture = System.Globalization.CultureInfo.InvariantCulture;
using (StreamReader sr = new StreamReader(sourceDir))
{
int divider = 5;
string line = sr.ReadLine();
StreamWriter sw = new StreamWriter(outDir);
List<string> listLine = new List<string>();
List<double> listOpen = new List<double>();
List<double> listHigh = new List<double>();
List<double> listLow = new List<double>();
List<double> listClose = new List<double>();
List<double> listVolume = new List<double>();
DateTime dateTimeOut = new DateTime();
string formatDate = "yyyyMMddHHmmss";
string newLine = "";
double priceOpen, priceHigh, priceLow, priceClose, volume;
//read first line, but don't write it
line = sr.ReadLine();
double highMax = double.MinValue;
double lowMin = double.MaxValue;
double volumeMax = double.MinValue;
while (line != null)
{
listLine = line.Split(',').ToList();
dateTimeOut = DateTime.ParseExact(listLine[1] + listLine[2], formatDate, null);
double.TryParse(listLine[3], out priceOpen);
double.TryParse(listLine[4], out priceHigh);
double.TryParse(listLine[5], out priceLow);
double.TryParse(listLine[6], out priceClose);
double.TryParse(listLine[7], out volume);
listOpen.Add(priceOpen);
listHigh.Add(priceHigh);
listLow.Add(priceLow);
listClose.Add(priceClose);
listVolume.Add(volume);
/*Here is implementation of accumulative max/min calculation*/
if (highMax < priceHigh)
{
highMax = priceHigh;
}
if (lowMin > priceLow)
{
lowMin = priceLow;
}
if (volumeMax < volume)
{
volumeMax = volume;
}
if (dateTimeOut.Minute % divider == 0)
{
newLine = dateTimeOut + "," + listOpen[0] + "," + highMax + "," + lowMin + "," + listClose[4] + "," + volumeMax;
sw.WriteLine(newLine);
}
line = sr.ReadLine();
}
sr.Close();
}
}
}
}
使用系统;
使用System.Collections.Generic;
使用System.IO;
使用System.Linq;
使用系统文本;
使用系统线程;
使用System.Threading.Tasks;
利用制度全球化;
命名空间控制台应用程序3
{
班级计划
{
静态void Main(字符串[]参数)
{
string sourceDir=“D:\\testFile.txt”,
outDir=“D:\\result.txt”;
Thread.CurrentThread.CurrentCulture=System.Globalization.CultureInfo.InvariantCulture;
使用(StreamReader sr=新的StreamReader(sourceDir))
{
整数除法器=5;
字符串行=sr.ReadLine();
StreamWriter sw=新StreamWriter(outDir);
List listLine=新列表();
List listOpen=新列表();
List listHigh=新列表();
List listLow=新列表();
List listClose=新列表();
List listVolume=新列表();
DateTime dateTimeOut=新的日期时间();
字符串formatDate=“yyyyMMddHHmmss”;
字符串换行=”;
双倍价格开盘、高价、低价、收盘、成交量;
//读第一行,但不要写
line=sr.ReadLine();
double highMax=double.MinValue;
double lowMin=double.MaxValue;
double volumeMax=double.MinValue;
while(行!=null)
{
listLine=line.Split(',').ToList();
dateTimeOut=DateTime.ParseExact(listLine[1]+listLine[2],formatDate,null);
double.TryParse(列表行[3],价格公开);
double.TryParse(列表行[4],价格过高);
double.TryParse(列表行[5],价格较低);
double.TryParse(列表行[6],价格接近);
double.TryParse(列表行[7],输出量);
添加(priceOpen);
添加(priceHigh);
添加(priceLow);
添加(priceClose);
添加(卷);
/*这里是累计最大/最小计算的实现*/
如果(最高值<最高价格)
{
highMax=价格高;
}
如果(低分钟>低价格)
{
lowMin=价格低;
}
如果(体积最大<体积)
{
体积最大=体积;
}
if(dateTimeOut.Minute%divider==0)
{
换行符=dateTimeOut+”、“+listOpen[0]+”、“+highMax+”、“+lowMin+”、“+listClose[4]+”、”
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Globalization;
namespace ConsoleApplication3
{
class Program
{
static void Main(string[] args)
{
string sourceDir = "D:\\testFile.txt",
outDir = "D:\\result.txt";
Thread.CurrentThread.CurrentCulture = System.Globalization.CultureInfo.InvariantCulture;
using (StreamReader sr = new StreamReader(sourceDir))
{
int divider = 5;
string line = sr.ReadLine();
StreamWriter sw = new StreamWriter(outDir);
List<string> listLine = new List<string>();
List<double> listOpen = new List<double>();
List<double> listHigh = new List<double>();
List<double> listLow = new List<double>();
List<double> listClose = new List<double>();
List<double> listVolume = new List<double>();
DateTime dateTimeOut = new DateTime();
string formatDate = "yyyyMMddHHmmss";
string newLine = "";
double priceOpen, priceHigh, priceLow, priceClose, volume;
//read first line, but don't write it
line = sr.ReadLine();
double highMax = double.MinValue;
double lowMin = double.MaxValue;
double volumeMax = double.MinValue;
while (line != null)
{
listLine = line.Split(',').ToList();
dateTimeOut = DateTime.ParseExact(listLine[1] + listLine[2], formatDate, null);
double.TryParse(listLine[3], out priceOpen);
double.TryParse(listLine[4], out priceHigh);
double.TryParse(listLine[5], out priceLow);
double.TryParse(listLine[6], out priceClose);
double.TryParse(listLine[7], out volume);
listOpen.Add(priceOpen);
listHigh.Add(priceHigh);
listLow.Add(priceLow);
listClose.Add(priceClose);
listVolume.Add(volume);
/*Here is implementation of accumulative max/min calculation*/
if (highMax < priceHigh)
{
highMax = priceHigh;
}
if (lowMin > priceLow)
{
lowMin = priceLow;
}
if (volumeMax < volume)
{
volumeMax = volume;
}
if (dateTimeOut.Minute % divider == 0)
{
newLine = dateTimeOut + "," + listOpen[0] + "," + highMax + "," + lowMin + "," + listClose[4] + "," + volumeMax;
sw.WriteLine(newLine);
}
line = sr.ReadLine();
}
sr.Close();
}
}
}
}