String http Gzip响应的精确副本压缩为字符串_String_Http_Gzip_D

String http Gzip响应的精确副本压缩为字符串

string http d

String http Gzip响应的精确副本压缩为字符串,string,http,gzip,d,String,Http,Gzip,D,我需要帮助我正在尝试获取内容编码为gzip的网站内容，在Windows上使用dmd v2.066.1。这是我的测试网址：“http://diaboli.pl/test2.html” 我的HTTP请求是： GET /test2.html HTTP/1.1 Host: diaboli.pl Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 Accept-Language: pl,en-US;q=0.7,

我需要帮助

我正在尝试获取内容编码为gzip的网站内容，在Windows上使用dmd v2.066.1。这是我的测试网址：“

http://diaboli.pl/test2.html

”

我的HTTP请求是：

GET /test2.html HTTP/1.1
Host: diaboli.pl
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
Accept-Language: pl,en-US;q=0.7,en;q=0.3
Accept-Encoding: gzip, deflate
User-Agent: My Browser
Referer: http://google.pl
DNT: 1

服务器响应为：

HTTP/1.1 200 OK
Date: Sat, 24 Jan 2015 23:02:00 GMT
Server: Apache
Last-Modified: Sat, 24 Jan 2015 22:48:44 GMT
ETag: "5c468ad-83f-50d6db511eb00"
Accept-Ranges: bytes
Vary: Accept-Encoding,User-Agent
Content-Encoding: gzip
Content-Length: 942
Content-Type: text/html

.)┘R!SĽ╣ň┌KRB:éş^»{█ĺ.ç}aOě_DźŢ░▼'dĘ$ëĚk\|j\pý§Ěí▀k║Ź■ß♠┐}ú2žŢ  ´dĹĺńMłÎ▒└╚‼/§B⌂Ĺ▬°'˘uŕNá☺■█Ór↓m(┘đ▬Ţ┼ńĺ╦⌂
§gŰůqýä╗˘%p▬■&B♂M]§Üú3ý^ý-ÎD`x!Ő╔&M♥~╣y╬uşëňZ@▒]˘ä2}Ś╣xdÄyWüm§?ąě░Äd4,d‼î-▬
┬♣Bön°6{őu└♀☺█UĂ└,aF˘├☼☻OŔ˛mţË▄▀Čó¸ö31ÎňEÖKŮţĄîÔŐ←ôň¸HÉ┌bŤ}Dnń'ń9┌
Îă♠¶U♣VI^▲hËőŃ└_zďĆ6┬6█¨}{╝╦ÄřeđŠoŤčů¤űU´öěŁ*ŠxĂ☻(,─AôlZ»Ú^ßćş¸ő╬↓M`¬PË═qí¨Ýç▼7╣§y♫<J╬ÓŇëb#PćR§bˇĽ>Ěz╣┴âž7uř┐ `$SřítR¶╗u ź☻‼ĘXçf☺°NH▄˛☻ şp─RĄ►¬w╬\758GN║K)     ;ĺ\ÝŇľ♫╩┼╬|ABYÍţ∟═Yů+╔y?ťkVĐ┼
nş║☼jv¶ĐSô9Dů♠▓Ç˙üK╬2\˝d[☼ <ľ┘Ń↓ü╠âG ˇ¸
ľyŇđd■ß▲e☼Â¸♣e_ÂśúQ÷śń,ÖĹ¬[N╝b┼Ř└ŕ↓ÚcS┴3╗╠w▀[ş↕ĺŽCňđś↕⌂═őç˛ţHW∟d=╩║Y►│Ô]sČšX§_ˇ↔ĹCČŤI┬y┤ŕ▲╬Ő↕╩§┌}í m\∟Öç#<W*Ű┐h˘g2SęćĐqš►EËý üXđ.S▀kš2←↑►â☼Ň5Ę╬♀6∟\←B|fđşÚ*ZŽ%▀Î↓@ěEŕ♦TNgcż,→‼│→p-←î˘ă☻p$Ř%ôe
♠♀ŻýŁ8JiŔ▒"L■♀óą↨Č┘´☻«┌:ŰńĹ>♣§╝×░♂öĄT`=BÂ|5mˇ|Ňs)ŐRĹ═▒é┴\yru▬ć=Rďĺ]↔ŰýÉĆ☼─ć↑¬pZÇ▓9PC§ę4 ×@ş Ź☺╬ňLj█Á¨uĄ:│§Bšš∟ďŃ?▼nvO!0↔}î*╠aŢ ţh
Ľ*7Îĺ$vn ŔIŘM¸♀˙¶ÎŞŞb⌂♫äý"´♂çK}⌂Y♀ ♣XŽëM

我可以确定内容的长度，它与HTTP内容长度头的值相等，但我可以看到，它与原始的字符串不同

同样有趣的是，我可以用zlib uncompress（）函数解压坏的内容字符串，它不会返回zlib数据错误，而是解压后的内容。当然，像FF或IE这样的浏览器可以毫无问题地显示完整的解压缩内容

我正在连接到服务器，如下所示：

import std.stdio, import std.string, std.conv, std.socket, std.stream, std.socketstream, std.zlib;

ushort port=80; string domain="diaboli.pl"; 
string request_uri; int[] pos; string request; string buffer; string znak; string line; 
int contentlength=-1; int[] postab; string bodybuffer; string headerbuffer; int readingbody=0; 
std.zlib.UnCompress u; const(void)[] udata;

Socket sock = new TcpSocket(new InternetAddress(domain, port));
Stream ss   = new SocketStream(sock);

request="GET " ~ request_uri ~ " HTTP/1.1\r\n";
request~="Host: " ~ domain ~ "\r\n";
request~="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n";
request~="Accept-Language: pl,en-US;q=0.7,en;q=0.3\r\n";
request~="Accept-Encoding: gzip, deflate\r\n";
request~="User-Agent: My Browser\r\n";
request~="Referer: http://google.pl\r\n";
request~="DNT: 1\r\n";
request~="\r\n";

writeln("HTTP request:\n---");
writeln(request);
writeln("---");

ss.writeString(request);

writeln("\nAll response from the server character by character:\n---");
line="";
while (1)
{
    if (readingbody==1) readingbody=2; //the way to separate headers and the content - first part.

    znak = to!string(ss.getc());
    if (ss.eof()) break;
    line~=znak;
    //if (readingbody==2) 
    write(znak);

    if (znak=="\n")
    {
        if (strpos(line,"Content-Length: ")>-1) 
        {
            postab ~= strpos(line,"\r");
            postab ~= strpos(line,"\n");
            contentlength=to!int(substr(line,16,postab.sort[0]-16));
        }

        if (readingbody==0 && line=="\r\n") readingbody=1;
        line="";
    }

    buffer ~= znak;

    //the way to separate headers and the content - second part.
    if (readingbody==0 && line=="\r\n") readingbody=1;
    if (readingbody==2) bodybuffer ~= znak;
    else headerbuffer ~= znak;
}

sock.close();

writeln("\n---");

write("Content-Length="); writeln(contentlength); //This is the Content-Length determined from the HTTP Content-Length header.
write("bodybuffer.length="); writeln(bodybuffer.length); //This the length of the content string

writeln("\nAll response copied into the string:\n---");
writeln(buffer);

writeln("---\nOnly content:\n---");
writeln(bodybuffer);

writeln("---\nUncompressed:\n---");
u = new UnCompress(HeaderFormat.determineFromData);
udata = u.uncompress(bodybuffer);
writeln(cast(string)udata);

//These are my simple text processing functions similar to php.
int strpos(string str,string tofind,int caseinsensitive=0)
{
    int pos=-1;
    if (caseinsensitive==1)
    {
        str=toUpper(str);
        tofind=toUpper(tofind);
    }
    if (str.length>=tofind.length)
    {
        for(int i=0;i<str.length;i++)
        {
            if (i+tofind.length>str.length) break;
            if (str[i..i+tofind.length]==tofind) 
            {
                pos=i;
                break;
            }
        }
    }
    return pos;
}

string substr(string str,int pos, int offset)
{
    string substring="";
    if (str.length>0 && pos>-1 && offset>0)
    {
        substring=str[pos..pos+offset];
    }
    return substring;
}

导入std.stdio、导入std.string、std.conv、std.socket、std.stream、std.socketstream、std.zlib；
ushort端口=80；string domain=“diaboli.pl”；
字符串请求uri；int[]pos；字符串请求；字符串缓冲区；字符串znak；弦线；
int contentlength=-1；国际邮政局；字符串缓冲区；线头缓冲器；int readingbody=0；
标准zlib.uncompressu；常量（无效）[]udata；
Socket sock=新的TcpSocket（新的InternetAddress（域、端口））；
流ss=新的SocketStream（sock）；
request=“GET”~request\u uri~“HTTP/1.1\r\n”；
请求~=“主机：”~domain~“\r\n”；
请求~=“接受：text/html，application/xhtml+xml，application/xml；q=0.9，*/*；q=0.8\r\n”；
请求~=“接受语言：pl，en-US；q=0.7，en；q=0.3\r\n”；
请求~=“接受编码：gzip，deflate\r\n”；
请求~=“用户代理：我的浏览器\r\n”；
请求~=”参考者：http://google.pl\r\n“；
请求~=“DNT:1\r\n”；
请求~=“\r\n”；
writeln（“HTTP请求：\n--”）；
书面（请求）；
书面形式（“--”）；
ss.书面限制（请求）；
writeln（“\n来自服务器的所有响应字符：\n---”；
第“”行；
而(1)
{
if（readingbody==1）readingbody=2；//分离标题和内容的方法-第一部分。
znak=to！字符串（ss.getc（））；
如果（ss.eof（））中断；
line~=znak；
//if（readingbody==2）
写入（znak）；
如果（znak==“\n”）
{
if（strpos（第行，“内容长度：”）>-1）
{
postab~=strpos（行“\r”）；
postab~=strpos（行“\n”）；
contentlength=to！int（substr（行，16，postab.sort[0]-16））；
}
如果（readingbody==0&&line==“\r\n”）readingbody=1；
第“”行；
}
缓冲液~=znak；
//分离标题和内容的方法-第二部分。
如果（readingbody==0&&line==“\r\n”）readingbody=1；
如果（readingbody==2）bodybuffer~=znak；
else-headerbuffer~=znak；
}
sock.close（）；
writeln（“\n--”）；
写入（“内容长度=”）；writeln（contentlength）//这是根据HTTP内容长度标头确定的内容长度。
写入（“bodybuffer.length=”）；writeln（bodybuffer.length）//这是内容字符串的长度
writeln（“\n将所有响应复制到字符串：\n---”；
写（缓冲区）；
writeln（“--\n仅内容：\n--”）；
writeln（bodybuffer）；
writeln（“--\n未压缩：\n--”）；
u=新解压缩（HeadPerformat.determineFromData）；
udata=u.解压（bodybuffer）；
writeln（cast（string）udata）；
//这些是我的简单文本处理函数，类似于php。
int strpos（字符串str，字符串tofind，int不区分大小写=0）
{
int pos=-1；
if（不区分大小写==1）
{
str=toUpper（str）；
tofind=toUpper（tofind）；
}
如果（str.length>=tofind.length）
{
对于（int i=0；istr.length）中断；
if（str[i..i+tofind.length]==tofind）
{
pos=i；
打破
}
}
}
返回pos；
}
字符串子字符串（字符串str、int pos、int offset）
{
字符串子字符串=”；
如果（str.length>0&&pos>-1&&offset>0）
{
子字符串=str[pos..pos+offset]；
}
返回子串；
}

您的代码有三个问题：

您可以使用执行换行转换的

Stream.getc

。这将损坏二进制数据。您可以通过替换以下内容来解决此问题：

znak = to!string(ss.getc());

与：

虽然最好完全避免使用

std.stream

，但这是一个有待替换的古老代码

您指定了HTTP版本的1.1，因此服务器使用

传输编码：chunked

发回内容。您的程序无法处理此传输编码。您可以将协议版本更改为1.0

使用

std.zlib

类时，必须在管道传输所有数据后调用

flush

。添加此行：

udata ~= u.flush();

通过这些更改，您的程序对我来说运行良好。

我怀疑您的控制台打印（或未打印）了特定字节（例如控制字符），因此您没有复制发送给您的确切字节。尝试重定向到某个文件，然后在十六进制编辑器中打开该文件。

char c; ss.readBlock(&c, 1); znak = to!string(c);

udata ~= u.flush();