C 返回跟踪字符的http套接字请求_C_Sockets_Http

C 返回跟踪字符的http套接字请求

c sockets http

C 返回跟踪字符的http套接字请求,c,sockets,http,C,Sockets,Http,我正在学习C语言的网络编程，并试图创建一个玩具版的wget 但是，当我运行程序时，我得到的页面在开头和结尾都有一些尾随字符（本例中为0&f43）该程序包含两个.c和两个.h文件一个用于解析（天真地）地址，另一个用于发出网络请求并转储数据以下是用于解析输入的文件： url.h #ifndef URL_H #define URL_H /* information of an URL*/ struct url_info { char* url; //full url char

我正在学习C语言的网络编程，并试图创建一个玩具版的wget

但是，当我运行程序时，我得到的页面在开头和结尾都有一些尾随字符（本例中为0&f43）

该程序包含两个.c和两个.h文件

一个用于解析（天真地）地址，另一个用于发出网络请求并转储数据

以下是用于解析输入的文件：

url.h

#ifndef URL_H
#define URL_H

/* information of an URL*/
struct url_info
{
    char* url; //full url
    char* protocol; // protocol type: http, ftp, etc...
    char* host; // host name
    int port;   //port number
    char* path; //path
};
typedef struct url_info url_info;

static const char P_HTTP[] = "http";

void parse_url(char* url, url_info *info);

void exit_with_error(char* message);

void print_url_info(url_info info);

#endif //URL_H

#ifndef WGETX_H_
#define WGETX_H_

#define B_SIZE 1024 * 5000

void write_data(const char *path, const char *data);

char* download_page(url_info info, char *buff);

char* http_get_request(char* path, char* host);

char* read_http_reply(char* recv_buf_t);

unsigned long ipfromhost(const char *host);

#endif

url.c

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"url.h"


void parse_url(char* url, url_info *info)
{
    // url format: [http://]<hostname>[:<port>]/<path>

    char *full_url = malloc((strlen(url) + 1) * sizeof(char));
  char *protocol;
  char *path;
  char *host;
  int port;

  strcpy(full_url, url);
    info->url = full_url;

  char *protocol_token = strstr(url, "://");
  if (protocol_token){
    protocol = url;
    *protocol_token = '\0';
    url = protocol_token + 3;
  } else {
    protocol = "http";
  }
    info->protocol = protocol;

  char *port_token = strstr(url, ":");
  char *path_token = strstr(url, "/");

  if (port_token && port_token < path_token){
        port = atoi(port_token + 1);
        *port_token = '\0';
  } else {
    port = 80;
  }
    info->port = port;

  if (path_token){
    *path_token = '\0';
    host = url;
    path = path_token + 1;
        info->host = host;
        info->path = path;
  } else {
    exit_with_error("No trailing /.");
  }
}


void print_url_info(url_info info){
    printf("The URL contains following information: \n");
    printf("Full url:\t%s\n", info.url);
    printf("Protocol type:\t%s\n", info.protocol);
    printf("Host name:\t%s\n", info.host);
    printf("Port No.:\t%d\n", info.port);
    printf("Path:\t\t%s\n", info.path);
}


void exit_with_error(char *message)
{
    fprintf(stderr, "%s\n", message);
    exit(EXIT_FAILURE);
}

 #include <sys/types.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <unistd.h>
 #include <netdb.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
 #include <string.h>

#include "url.h"
#include "wgetX.h"

int main(int argc, char* argv[])
{

    url_info info;


    if (argc != 2) {
        exit_with_error("The wgetX must have exactly 1 parameter as input. \n");
    }
    char *url = argv[1];
    parse_url(url, &info);

    char *buf;
    buf = malloc(sizeof(char)*B_SIZE);
    bzero(buf, B_SIZE);

    download_page(info, buf);
  printf("%s", buf);

    free(buf);
    return (EXIT_SUCCESS);
}

char* download_page(url_info info, char *buf)
{
    struct sockaddr_in dest;
    int len, sz, mysocket;
    char *request = http_get_request(info.path, info.host);

    mysocket = socket(AF_INET, SOCK_STREAM, 0);
    memset(&dest, 0, sizeof(dest));
    dest.sin_family = AF_INET;
    dest.sin_addr.s_addr = ipfromhost(info.host);
    dest.sin_port = htons(info.port);
    connect(mysocket, (struct sockaddr *)&dest, sizeof(struct sockaddr));
    send(mysocket, request, strlen(request), 0);

    len = 0;
    sz = 0;
    do {
        len = recv(mysocket, buf + sz, B_SIZE - sz, 0);
        if (len == -1) {continue;}
        sz += len;
    } while (len > 0);

    *(buf + sz) = '\0';

  close(mysocket);
    return buf;
}


char* http_get_request(char* path, char* host) {
    char * request_buffer = (char *) malloc(1024);
    memset(request_buffer, 0, sizeof(*request_buffer));
    snprintf(request_buffer, 1024, "GET /%s HTTP/1.1\r\nHost: %s\r\nConnection: close\r\n\r\n",
            path, host);
    return request_buffer;
}

unsigned long ipfromhost(const char *host){
  struct in_addr **addr_list;
  struct hostent *he;
  if ((he = gethostbyname(host)) != NULL){
    addr_list = (struct in_addr **) he->h_addr_list;
    int i;
    for (i = 0; addr_list[i] != NULL; i++){
      return addr_list[i]->s_addr;
    }
    exit_with_error("Couldn't resolve host to ip adress\n");
    return 0;
  } else {
    exit_with_error("Couldn't resolve host to ip adress\n");
    return 0;
  }
}

wgetX.c

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"url.h"


void parse_url(char* url, url_info *info)
{
    // url format: [http://]<hostname>[:<port>]/<path>

    char *full_url = malloc((strlen(url) + 1) * sizeof(char));
  char *protocol;
  char *path;
  char *host;
  int port;

  strcpy(full_url, url);
    info->url = full_url;

  char *protocol_token = strstr(url, "://");
  if (protocol_token){
    protocol = url;
    *protocol_token = '\0';
    url = protocol_token + 3;
  } else {
    protocol = "http";
  }
    info->protocol = protocol;

  char *port_token = strstr(url, ":");
  char *path_token = strstr(url, "/");

  if (port_token && port_token < path_token){
        port = atoi(port_token + 1);
        *port_token = '\0';
  } else {
    port = 80;
  }
    info->port = port;

  if (path_token){
    *path_token = '\0';
    host = url;
    path = path_token + 1;
        info->host = host;
        info->path = path;
  } else {
    exit_with_error("No trailing /.");
  }
}


void print_url_info(url_info info){
    printf("The URL contains following information: \n");
    printf("Full url:\t%s\n", info.url);
    printf("Protocol type:\t%s\n", info.protocol);
    printf("Host name:\t%s\n", info.host);
    printf("Port No.:\t%d\n", info.port);
    printf("Path:\t\t%s\n", info.path);
}


void exit_with_error(char *message)
{
    fprintf(stderr, "%s\n", message);
    exit(EXIT_FAILURE);
}

 #include <sys/types.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <unistd.h>
 #include <netdb.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
 #include <string.h>

#include "url.h"
#include "wgetX.h"

int main(int argc, char* argv[])
{

    url_info info;


    if (argc != 2) {
        exit_with_error("The wgetX must have exactly 1 parameter as input. \n");
    }
    char *url = argv[1];
    parse_url(url, &info);

    char *buf;
    buf = malloc(sizeof(char)*B_SIZE);
    bzero(buf, B_SIZE);

    download_page(info, buf);
  printf("%s", buf);

    free(buf);
    return (EXIT_SUCCESS);
}

char* download_page(url_info info, char *buf)
{
    struct sockaddr_in dest;
    int len, sz, mysocket;
    char *request = http_get_request(info.path, info.host);

    mysocket = socket(AF_INET, SOCK_STREAM, 0);
    memset(&dest, 0, sizeof(dest));
    dest.sin_family = AF_INET;
    dest.sin_addr.s_addr = ipfromhost(info.host);
    dest.sin_port = htons(info.port);
    connect(mysocket, (struct sockaddr *)&dest, sizeof(struct sockaddr));
    send(mysocket, request, strlen(request), 0);

    len = 0;
    sz = 0;
    do {
        len = recv(mysocket, buf + sz, B_SIZE - sz, 0);
        if (len == -1) {continue;}
        sz += len;
    } while (len > 0);

    *(buf + sz) = '\0';

  close(mysocket);
    return buf;
}


char* http_get_request(char* path, char* host) {
    char * request_buffer = (char *) malloc(1024);
    memset(request_buffer, 0, sizeof(*request_buffer));
    snprintf(request_buffer, 1024, "GET /%s HTTP/1.1\r\nHost: %s\r\nConnection: close\r\n\r\n",
            path, host);
    return request_buffer;
}

unsigned long ipfromhost(const char *host){
  struct in_addr **addr_list;
  struct hostent *he;
  if ((he = gethostbyname(host)) != NULL){
    addr_list = (struct in_addr **) he->h_addr_list;
    int i;
    for (i = 0; addr_list[i] != NULL; i++){
      return addr_list[i]->s_addr;
    }
    exit_with_error("Couldn't resolve host to ip adress\n");
    return 0;
  } else {
    exit_with_error("Couldn't resolve host to ip adress\n");
    return 0;
  }
}

输出

我收到了http回复消息，其中包含状态代码和所有内容，就在“之前” 我得到的是垃圾字符：在html的末尾是零，在html的开头是“f43”

欢迎来到HTTP的奇妙世界。请注意，HTTP并不是一个微不足道的协议，尽管它看起来可能是这样的。它应该说，最初在中发布的HTTP/1.1标准有176页的文本

您可能在这里看到的是内容的分块传输编码。在这种编码中，内容不是作为一个单独的片段传输的，而是以几个块传输的，每个块都以长度（十六进制）作为前缀。例如：

 HTTP/1.1 200 ok
 Transfer-Encoding: chunked

 a
 0123456789
 12
 These are 18 bytes
 0

在您的特定情况下，初始f43“就在html开始之前”是以下区块的长度（f43十六进制，十进制为3907），“结尾处的零”是最后区块的长度（0）

有关更多信息，请参阅或。

这太多的代码了，我敢打赌你没有正确地在某个地方对字符串进行零终止。此外，如果你想与http服务器通信，为什么不直接使用libcurl或类似的工具呢？我想这会更容易。这是为了学校，所以我必须以这种方式使用套接字。这是为了学习目的！关于：

cc-g-Wall-o$@-c$OT：关于函数：exit_with_error（）
，语句：fprintf（stderr，“%s\n”，message）；
这不会告诉用户发生错误的原因。建议使用peror（）
where appropriate.谢谢！我可以强制传输编码不分块吗？@aripy887:只要你声称支持HTTP/1.1
你就必须支持分块编码。如果你在请求中只使用HTTP/1.0
，那么服务器就不能使用分块编码，因为这只是用HTTP/1.1 。