比较两个文件或者为什么Java中的代码比C++；？为什么java中的代码比C++快？我需要逐字节比较两个文件。例如，当比较两个文件大小时，650MB需要40秒到C++，10秒为java。_Java_C++_File_Ifstream

比较两个文件或者为什么Java中的代码比C++；？为什么java中的代码比C++快？我需要逐字节比较两个文件。例如，当比较两个文件大小时，650MB需要40秒到C++，10秒为java。

java c++ file

比较两个文件或者为什么Java中的代码比C++；？为什么java中的代码比C++快？我需要逐字节比较两个文件。例如，当比较两个文件大小时，650MB需要40秒到C++，10秒为java。,java,c++,file,ifstream,Java,C++,File,Ifstream,C++代码： //bufferSize = 8mb std::ifstream lFile(lFilePath.c_str(), std::ios::in | std::ios::binary); std::ifstream rFile(rFilePath.c_str(), std::ios::in | std::ios::binary); std::streamsize lReadBytesCount = 0; std::streamsize rReadBytesCount = 0; do

C++代码：

//bufferSize = 8mb
std::ifstream lFile(lFilePath.c_str(), std::ios::in | std::ios::binary);
std::ifstream rFile(rFilePath.c_str(), std::ios::in | std::ios::binary);

std::streamsize lReadBytesCount = 0;
std::streamsize rReadBytesCount = 0;

do {
    lFile.read(p_lBuffer, *bufferSize);
    rFile.read(p_rBuffer, *bufferSize);
    lReadBytesCount = lFile.gcount();
    rReadBytesCount = rFile.gcount();

    if (lReadBytesCount != rReadBytesCount ||
        std::memcmp(p_lBuffer, p_rBuffer, lReadBytesCount) != 0)
    {
        return false;
    }
} while (lFile.good() || rFile.good());

return true;

和Java代码：

InputStream is1 = new BufferedInputStream(new FileInputStream(f1)); 
InputStream is2 = new BufferedInputStream(new FileInputStream(f2)); 

byte[] buffer1 = new byte[64];
byte[] buffer2 = new byte[64];

int readBytesCount1 = 0, readBytesCount2 = 0;

while (
    (readBytesCount1 = is1.read(buffer1)) != -1 &&
    (readBytesCount2 = is2.read(buffer2)) != -1
) {             
    if (Arrays.equals(buffer1, buffer2) && readBytesCount1 == readBytesCount2)
        countItr++;
    else {
        result = false
        break;
    }
}

<>一个可能的答案是C++代码使用8 MB的缓冲区，而java版本使用64字节。如果差异在前几个字节内，会发生什么情况？然后java版本只需要读取64字节，找到差异，C++版本需要读取800万字节。如果您希望比较它们，您应该使用相同的缓冲区大小

此外，如果文件相同，则可能还有其他原因造成差异。考虑分配8 MB数据（甚至跨越多个页面）所需的时间，而不是简单分配64个字节所需的时间。由于您正在逐次读取，开销实际上在内存方面。

< P> C++的代码使用8 MB的缓冲区，java版本使用64字节。如果差异在前几个字节内，会发生什么情况？然后java版本只需要读取64字节，找到差异，C++版本需要读取800万字节。如果您希望比较它们，您应该使用相同的缓冲区大小

此外，如果文件相同，则可能还有其他原因造成差异。考虑分配8 MB数据（甚至跨越多个页面）所需的时间，而不是简单分配64个字节所需的时间。由于您是按顺序读取，因此开销实际上在内存方面。

虽然缓冲区大小的答案非常好，而且可能非常重要，但问题的另一个可能来源是使用

iostream

库。我一般不会用那个图书馆做这种工作。例如，这可能导致的一个问题是额外的复制，因为

iostream

为您提供了缓冲。我将使用原始的

读取

和

写入

调用

例如，在Linux C++11平台上，我会这样做：

#include <array>
#include <algorithm>
#include <string>
#include <stdexcept>

// Needed for open and close on a Linux platform
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

using ::std::string;

bool same_contents(const string &fname1, const string &fname2)
{
   int fd1 = ::open(fname1.c_str(), O_RDONLY);
   if (fd1 < 0) {
      throw ::std::runtime_error("Open of " + fname1 + " failed.");
   }
   int fd2 = ::open(fname2.c_str(), O_RDONLY);
   if (fd2 < 0) {
      ::close(fd1);
      fd1 = -1;
      throw ::std::runtime_error("Open of " + fname2 + " failed.");
   }

   bool same = true;
   try {
      ::std::array<char, 4096> buf1;
      ::std::array<char, 4096> buf2;
      bool done = false;

      while (!done) {
         int read1 = ::read(fd1, buf1.data(), buf1.size());
         if (read1 < 0) {
            throw ::std::runtime_error("Error reading " + fname1);
         }
         int read2 = ::read(fd2, buf2.data(), buf2.size());
         if (read2 < 0) {
            throw ::std::runtime_error("Error reading " + fname2);
         }
         if (read1 != read2) {
            same = false;
            done = true;
         }
         if (same && read1 > 0) {
            const auto compare_result = ::std::mismatch(buf1.begin(),
                                                        buf1.begin() + read1,
                                                        buf2.begin());
            if (compare_result.first != (buf1.begin() + read1)) {
               same = false;
            }
         }
         if (!same || (buf1.size() > read1)) {
            done = true;
         }
      }
   } catch (...) {
      if (fd1 >= 0) ::close(fd1);
      if (fd2 >= 0) ::close(fd2);
      throw;
   }
   if (fd1 >= 0) ::close(fd1);
   if (fd2 >= 0) ::close(fd2);
   return same;
}

#包括
#包括
#包括
#包括
//需要在Linux平台上打开和关闭
#包括
#包括
#包括
#包括
使用：：std：：string；
bool相同的内容（常量字符串和fname1、常量字符串和fname2）
{
int fd1=：：open（fname1.c_str（），O_RDONLY）；
if（fd1<0）{
throw:：std:：runtime_错误（“打开“+fname1+”失败”）；
}
int fd2=：：open（fname2.c_str（），O_RDONLY）；
if（fd2<0）{
：：关闭（fd1）；
fd1=-1；
throw:：std:：runtime_错误（“打开“+fname2+”失败”）；
}
布尔相同=正确；
试一试{
：：std：：数组buf1；
：：std：：数组buf2；
bool done=false；
而（！完成）{
int read1=：：read（fd1，buf1.data（），buf1.size（））；
如果（读1<0）{
throw:：std:：runtime_错误（“错误读取”+fname1）；
}
int read2=：：read（fd2，buf2.data（），buf2.size（））；
如果（读数2<0）{
throw:：std:：runtime_错误（“错误读取”+fname2）；
}
如果（read1！=read2）{
相同=错误；
完成=正确；
}
如果（相同&&read1>0）{
常量自动比较结果=：：std:：不匹配（buf1.begin（），
buf1.begin（）+read1，
buf2.begin（））；
如果（首先比较结果！=（buf1.begin（）+read1））{
相同=错误；
}
}
如果（！same | |（buf1.size（）>read1））{
完成=正确；
}
}
}捕获（…）{
如果（fd1>=0）：：关闭（fd1）；
如果（fd2>=0）：：关闭（fd2）；
投掷；
}
如果（fd1>=0）：：关闭（fd1）；
如果（fd2>=0）：：关闭（fd2）；
返回相同的值；
}

虽然缓冲区大小的答案非常好，而且可能非常重要，但问题的另一个可能来源是使用

iostream

库。我一般不会用那个图书馆做这种工作。例如，这可能导致的一个问题是额外的复制，因为

iostream

为您提供了缓冲。我将使用原始的

读取

和

写入

调用

例如，在Linux C++11平台上，我会这样做：

#include <array>
#include <algorithm>
#include <string>
#include <stdexcept>

// Needed for open and close on a Linux platform
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

using ::std::string;

bool same_contents(const string &fname1, const string &fname2)
{
   int fd1 = ::open(fname1.c_str(), O_RDONLY);
   if (fd1 < 0) {
      throw ::std::runtime_error("Open of " + fname1 + " failed.");
   }
   int fd2 = ::open(fname2.c_str(), O_RDONLY);
   if (fd2 < 0) {
      ::close(fd1);
      fd1 = -1;
      throw ::std::runtime_error("Open of " + fname2 + " failed.");
   }

   bool same = true;
   try {
      ::std::array<char, 4096> buf1;
      ::std::array<char, 4096> buf2;
      bool done = false;

      while (!done) {
         int read1 = ::read(fd1, buf1.data(), buf1.size());
         if (read1 < 0) {
            throw ::std::runtime_error("Error reading " + fname1);
         }
         int read2 = ::read(fd2, buf2.data(), buf2.size());
         if (read2 < 0) {
            throw ::std::runtime_error("Error reading " + fname2);
         }
         if (read1 != read2) {
            same = false;
            done = true;
         }
         if (same && read1 > 0) {
            const auto compare_result = ::std::mismatch(buf1.begin(),
                                                        buf1.begin() + read1,
                                                        buf2.begin());
            if (compare_result.first != (buf1.begin() + read1)) {
               same = false;
            }
         }
         if (!same || (buf1.size() > read1)) {
            done = true;
         }
      }
   } catch (...) {
      if (fd1 >= 0) ::close(fd1);
      if (fd2 >= 0) ::close(fd2);
      throw;
   }
   if (fd1 >= 0) ::close(fd1);
   if (fd2 >= 0) ::close(fd2);
   return same;
}

#包括
#包括
#包括
#包括
//需要在Linux平台上打开和关闭
#包括
#包括
#包括
#包括
使用：：std：：string；
bool相同的内容（常量字符串和fname1、常量字符串和fname2）
{
int fd1=：：open（fname1.c_str（），O_RDONLY）；
if（fd1<0）{
throw:：std:：runtime_错误（“打开“+fname1+”失败”）；
}
int fd2=：：open（fname2.c_str（），O_RDONLY）；
if（fd2<0）{
：：关闭（fd1）；
fd1=-1；
throw:：std:：runtime_错误（“打开“+fname2+”失败”）；
}
布尔相同=正确；
试一试{
：：std：：数组buf1；
：：std：：数组buf2；
bool done=false；
而（！完成）{
int read1=：：read（fd1，buf1.data（），buf1.size（））；
如果（读1<0）{
throw:：std:：runtime_错误（“错误读取”+fname1）；
}
int read2=：：read（fd2，buf2.data（），buf2.size（））；
如果（读数2<0）{
throw:：std:：runtime_错误（“错误读取”+fname2）；
}
如果（read1！=read2）{
相同=错误；
完成=正确；
}
如果（相同&&read1>0）{
常量自动比较结果=：：std:：不匹配（buf1.begin（），
buf1.begin（）+read1，
buf2.begin（））；
如果（首先比较结果！=（buf1.begin（）+read1））{
相同=错误；
}
}
如果（！same | |（buf1.size（）>read1））{
完成=正确；
}
}
}捕获（…）{
如果（fd1>=0）
//bufferSize = 8mb

#include <iostream>
#include <fstream>
#include <cstring>

const size_t N = 8 * 1024 * 1024;
char buf1[N], buf2[N];

int main(int argc, char **argv)
{
    std::iostream::sync_with_stdio(false);
    std::ifstream f1(argv[1]);
    std::ifstream f2(argv[2]);
    while (f1.read(buf1, sizeof(buf1)) && f2.read(buf2, sizeof(buf2))) {
        size_t n1 = f1.gcount(), n2 = f2.gcount();
        if (n1 != n2 || memcmp(buf1, buf2, n1) != 0)
            return 1;
    }

    return 0;
}