C# 阅读IMDB电影列表的最有效方法
我正在从我的硬盘上的文本文件中阅读IMDB电影列表(最初可从IMDB网站上获得) 在我的机器上(基本信息:Win7 x64bit、16GB RAM、500 GB SATA硬盘7200 RPM)使用下面的代码逐行读取此文件大约需要5分钟 我有两个问题:C# 阅读IMDB电影列表的最有效方法,c#,.net,filestream,streamreader,imdb,C#,.net,Filestream,Streamreader,Imdb,我正在从我的硬盘上的文本文件中阅读IMDB电影列表(最初可从IMDB网站上获得) 在我的机器上(基本信息:Win7 x64bit、16GB RAM、500 GB SATA硬盘7200 RPM)使用下面的代码逐行读取此文件大约需要5分钟 我有两个问题: 有什么方法可以优化代码以提高读取时间吗 数据访问不需要按顺序进行,因为我不介意从上到下/从下到上或任何顺序读取数据,只要它一次读取一行。我想知道有没有一种方法可以在多个方向阅读,以提高阅读时间 该应用程序是一个Windows控制台应用程序 更新:许
//代码块
string file = @"D:\movies.list";
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
while (sr.Peek() >= 0)
{
Console.WriteLine(sr.ReadLine());
}
}
我不确定这是否更有效,但另一种方法是使用:
我不确定这是否更有效,但另一种方法是使用:
在.net 4中,您可以使用File.ReadLines进行延迟计算,从而在处理大型文件时降低RAM使用率 您可以直接对文件执行linq操作,这与File.ReadLines一起可以缩短加载时间 为了更好地理解,您可以查看 您也可以进行比较,但要设置时间间隔
但是,如果您制作web应用程序,您可以在应用程序启动事件时读取整个文件,并将其缓存在应用程序池中以获得更好的性能。在.net 4中,您可以使用file.ReadLines进行延迟评估,从而在处理大文件时降低RAM使用率 您可以直接对文件执行linq操作,这与File.ReadLines一起可以缩短加载时间 为了更好地理解,您可以查看 您也可以进行比较,但要设置时间间隔
但是,如果您制作web应用程序,您可以在应用程序启动事件时读取整个文件,并将其缓存在应用程序池中以获得更好的性能。我不是c#开发人员,但如何使用该文件(这将是一次)将其批量插入数据库中。然后,您可以重用数据并进行导出 我不是一名c#开发人员,但如何使用该文件向数据库中进行大容量插入(这将是一次)。然后,您可以重用数据并进行导出 这个问题的答案实际上取决于你将如何处理这些数据。如果您真正的目的是读取文件并将内容转储到控制台屏幕,那么最好使用创建一个包含1000行的字符串,然后将内容转储到屏幕,重置字符串,然后再读取1000行,转储它们,等等 但是,如果您正在尝试构建属于较大项目一部分的内容,并且正在使用.NET 4.0,则可以使用来读取文件并创建一个“窗口”,以创建一个仅对部分数据进行操作的“窗口”,而不是读取整个文件 另一种选择是使线程同时读取文件的不同部分,然后最终将其全部放在一起 如果你能更具体地说明你打算用这些数据做什么,我可以帮你更多。希望这有帮助 编辑: 试试这个密码。我能够使用线程在3秒内阅读整个列表:
using System;
using System.IO;
using System.Text;
using System.Threading;
namespace ConsoleApplication36
{
class Program
{
private const string FileName = @"C:\Users\Public\movies.list";
private const long ThreadReadBlockSize = 50000;
private const int NumberOfThreads = 4;
private static byte[] _inputString;
static void Main(string[] args)
{
var fi = new FileInfo(FileName);
long totalBytesRead = 0;
long fileLength = fi.Length;
long readPosition = 0L;
Console.WriteLine("Reading Lines From {0}", FileName);
var threads = new Thread[NumberOfThreads];
var instances = new ReadThread[NumberOfThreads];
_inputString = new byte[fileLength];
while (totalBytesRead < fileLength)
{
for (int i = 0; i < NumberOfThreads; i++)
{
var rt = new ReadThread { StartPosition = readPosition, BlockSize = ThreadReadBlockSize };
instances[i] = rt;
threads[i] = new Thread(rt.Read);
threads[i].Start();
readPosition += ThreadReadBlockSize;
}
for (int i = 0; i < NumberOfThreads; i++)
{
threads[i].Join();
}
for (int i = 0; i < NumberOfThreads; i++)
{
if (instances[i].BlockSize > 0)
{
Array.Copy(instances[i].Output, 0L, _inputString, instances[i].StartPosition,
instances[i].BlockSize);
totalBytesRead += instances[i].BlockSize;
}
}
}
string finalString = Encoding.ASCII.GetString(_inputString);
Console.WriteLine(finalString.Substring(104250000, 50000));
}
private class ReadThread
{
public long StartPosition { get; set; }
public long BlockSize { get; set; }
public byte[] Output { get; private set; }
public void Read()
{
Output = new byte[BlockSize];
var inStream = new FileStream(FileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
inStream.Seek(StartPosition, SeekOrigin.Begin);
BlockSize = inStream.Read(Output, 0, (int)BlockSize);
inStream.Close();
}
}
}
}
使用系统;
使用System.IO;
使用系统文本;
使用系统线程;
命名空间控制台应用程序36
{
班级计划
{
private const string FileName=@“C:\Users\Public\movies.list”;
private const long threadblocksize=50000;
私有常量int NumberOfThreads=4;
专用静态字节[]_输入字符串;
静态void Main(字符串[]参数)
{
var fi=新文件信息(文件名);
长totalBytesRead=0;
long fileLength=fi.Length;
长读取位置=0升;
WriteLine(“从{0}读取行”,文件名);
var threads=新线程[NumberOfThreads];
var instances=newreadthread[NumberOfThreads];
_inputString=新字节[fileLength];
while(totalBytesRead0)
{
Array.Copy(实例[i].Output,0L,_inputString,实例[i].StartPosition,
实例[i]。块大小);
totalBytesRead+=实例[i]。块大小;
}
}
}
string finalString=Encoding.ASCII.GetString(_inputString);
控制台写入线(最终字符串子字符串(104250000,50000));
}
私有类读线程
{
公共长起始位置{get;set;}
公共长块大小{get;set;}
公共字节[]输出{get;private set;}
公共无效读取()
{
输出=新字节[块大小];
var inStream=newfilestream(文件名,FileMode.Open,FileAccess.Read,FileShare.ReadWrite);
在流内搜索(起始位置,见Korigin.Begin);
using System;
using System.IO;
using System.Text;
using System.Threading;
namespace ConsoleApplication36
{
class Program
{
private const string FileName = @"C:\Users\Public\movies.list";
private const long ThreadReadBlockSize = 50000;
private const int NumberOfThreads = 4;
private static byte[] _inputString;
static void Main(string[] args)
{
var fi = new FileInfo(FileName);
long totalBytesRead = 0;
long fileLength = fi.Length;
long readPosition = 0L;
Console.WriteLine("Reading Lines From {0}", FileName);
var threads = new Thread[NumberOfThreads];
var instances = new ReadThread[NumberOfThreads];
_inputString = new byte[fileLength];
while (totalBytesRead < fileLength)
{
for (int i = 0; i < NumberOfThreads; i++)
{
var rt = new ReadThread { StartPosition = readPosition, BlockSize = ThreadReadBlockSize };
instances[i] = rt;
threads[i] = new Thread(rt.Read);
threads[i].Start();
readPosition += ThreadReadBlockSize;
}
for (int i = 0; i < NumberOfThreads; i++)
{
threads[i].Join();
}
for (int i = 0; i < NumberOfThreads; i++)
{
if (instances[i].BlockSize > 0)
{
Array.Copy(instances[i].Output, 0L, _inputString, instances[i].StartPosition,
instances[i].BlockSize);
totalBytesRead += instances[i].BlockSize;
}
}
}
string finalString = Encoding.ASCII.GetString(_inputString);
Console.WriteLine(finalString.Substring(104250000, 50000));
}
private class ReadThread
{
public long StartPosition { get; set; }
public long BlockSize { get; set; }
public byte[] Output { get; private set; }
public void Read()
{
Output = new byte[BlockSize];
var inStream = new FileStream(FileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
inStream.Seek(StartPosition, SeekOrigin.Begin);
BlockSize = inStream.Read(Output, 0, (int)BlockSize);
inStream.Close();
}
}
}
}
class Program
{
private static readonly string file = @"movies.list";
private static readonly int testStart = 1;
private static readonly int numOfTests = 2;
private static readonly int MinTimingVal = 1000;
private static string[] testNames = new string[] {
"Naive",
"OneCallToWrite",
"SomeCallsToWrite",
"InParallel",
"InParallelBlcoks",
"IceManMinds",
"TestTiming"
};
private static double[] avgSecs = new double[numOfTests];
private static int[] testIterations = new int[numOfTests];
public static void Main(string[] args)
{
Console.WriteLine("Starting tests...");
Debug.WriteLine("Starting tests...");
Console.WriteLine("");
Debug.WriteLine("");
//*****************************
//The console is the bottle-neck, so we can
//speed-up redrawing it by only showing 1 line at a time.
Console.WindowHeight = 1;
Console.WindowWidth = 50;
Console.BufferHeight = 100;
Console.BufferWidth = 50;
//******************************
Action[] actionArray = new Action[numOfTests];
actionArray[0] = naive;
actionArray[1] = oneCallToWrite;
actionArray[2] = someCallsToWrite;
actionArray[3] = inParallel;
actionArray[4] = inParallelBlocks;
actionArray[5] = iceManMinds;
actionArray[6] = testTiming;
for (int i = testStart; i < actionArray.Length; i++)
{
Action a = actionArray[i];
DoTiming(a, i);
}
printResults();
Console.WriteLine("");
Debug.WriteLine("");
Console.WriteLine("Tests complete.");
Debug.WriteLine("Tests complete.");
Console.WriteLine("Press Enter to Close Console...");
Debug.WriteLine("Press Enter to Close Console...");
Console.ReadLine();
}
private static void DoTiming(Action a, int num)
{
a.Invoke();
Stopwatch watch = new Stopwatch();
Stopwatch loopWatch = new Stopwatch();
bool shouldRetry = false;
int numOfIterations = 2;
do
{
watch.Start();
for (int i = 0; i < numOfIterations; i++)
{
a.Invoke();
}
watch.Stop();
shouldRetry = false;
if (watch.ElapsedMilliseconds < MinTimingVal) //if the time was less than the minimum, increase load and re-time.
{
shouldRetry = true;
numOfIterations *= 2;
watch.Reset();
}
} while (shouldRetry);
long totalTime = watch.ElapsedMilliseconds;
double avgTime = ((double)totalTime) / (double)numOfIterations;
avgSecs[num] = avgTime / 1000.00;
testIterations[num] = numOfIterations;
}
private static void printResults()
{
Console.WriteLine("");
Debug.WriteLine("");
for (int i = testStart; i < numOfTests; i++)
{
TimeSpan t = TimeSpan.FromSeconds(avgSecs[i]);
Console.WriteLine("ElapsedTime: {0:N4}, " + "test: " + testNames[i], t.ToString() );
Debug.WriteLine("ElapsedTime: {0:N4}, " + "test: " + testNames[i], t.ToString() );
}
}
public static void naive()
{
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
while (sr.Peek() >= 0)
{
Console.WriteLine( sr.ReadLine() );
}
}
}
public static void oneCallToWrite()
{
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
StringBuilder sb = new StringBuilder();
while (sr.Peek() >= 0)
{
string s = sr.ReadLine();
sb.Append("\n" + s);
}
Console.Write(sb);
}
}
public static void someCallsToWrite()
{
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None, 8, FileOptions.None);
using (StreamReader sr = new StreamReader(fs))
{
StringBuilder sb = new StringBuilder();
int count = 0;
int mod = 10000;
while (sr.Peek() >= 0)
{
count++;
string s = sr.ReadLine();
sb.Append("\n" + s);
if (count % mod == 0)
{
Console.Write(sb);
sb = new StringBuilder();
}
}
Console.Write( sb );
}
}
public static void inParallel()
{
string[] wordsFromFile = File.ReadAllLines( file );
int length = wordsFromFile.Length;
Parallel.For( 0, length, i => {
Console.WriteLine( wordsFromFile[i] );
});
}
public static void inParallelBlocks()
{
string[] wordsFromFile = File.ReadAllLines(file);
int length = wordsFromFile.Length;
Parallel.For<StringBuilder>(0, length,
() => { return new StringBuilder(); },
(i, loopState, sb) =>
{
sb.Append("\n" + wordsFromFile[i]);
return sb;
},
(x) => { Console.Write(x); }
);
}
#region iceManMinds
public static void iceManMinds()
{
string FileName = file;
long ThreadReadBlockSize = 50000;
int NumberOfThreads = 4;
byte[] _inputString;
var fi = new FileInfo(FileName);
long totalBytesRead = 0;
long fileLength = fi.Length;
long readPosition = 0L;
Console.WriteLine("Reading Lines From {0}", FileName);
var threads = new Thread[NumberOfThreads];
var instances = new ReadThread[NumberOfThreads];
_inputString = new byte[fileLength];
while (totalBytesRead < fileLength)
{
for (int i = 0; i < NumberOfThreads; i++)
{
var rt = new ReadThread { StartPosition = readPosition, BlockSize = ThreadReadBlockSize };
instances[i] = rt;
threads[i] = new Thread(rt.Read);
threads[i].Start();
readPosition += ThreadReadBlockSize;
}
for (int i = 0; i < NumberOfThreads; i++)
{
threads[i].Join();
}
for (int i = 0; i < NumberOfThreads; i++)
{
if (instances[i].BlockSize > 0)
{
Array.Copy(instances[i].Output, 0L, _inputString, instances[i].StartPosition,
instances[i].BlockSize);
totalBytesRead += instances[i].BlockSize;
}
}
}
string finalString = Encoding.ASCII.GetString(_inputString);
Console.WriteLine(finalString);//.Substring(104250000, 50000));
}
private class ReadThread
{
public long StartPosition { get; set; }
public long BlockSize { get; set; }
public byte[] Output { get; private set; }
public void Read()
{
Output = new byte[BlockSize];
var inStream = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
inStream.Seek(StartPosition, SeekOrigin.Begin);
BlockSize = inStream.Read(Output, 0, (int)BlockSize);
inStream.Close();
}
}
#endregion
public static void testTiming()
{
Thread.Sleep(500);
}
}