以c语言输出文件,同时丢弃类似的字符组
我有一个包含以下文本的输入文件以c语言输出文件,同时丢弃类似的字符组,c,C,我有一个包含以下文本的输入文件 <html> <head><title>My web page</title></head> <body> <p>Foo bar<br /> Hi there!<br /> How is it going? </p> <p> I'm fine. And you? </p> <p> Here is a <
<html>
<head><title>My web page</title></head>
<body>
<p>Foo bar<br />
Hi there!<br />
How is it going?
</p>
<p>
I'm fine. And you?
</p>
<p>
Here is a <a href="somelink.html">link</a> to follow.
</p>
</body>
</html>
建议:
),即使有,您也只对
和
感兴趣if..else if..else..
结构。HTML可以用相当简洁的有限状态机表示。出于您的目的,您需要两个部分:读取和写入纯文本,以及解析任何HTML命令。纯文本可以按字符处理;对于HTML命令,您至少需要它的完整标记名\n
序列)定义得不太严格。此问题在文本字符串的开头和结尾显示为“意外空格”
(中间的圆点表示空格)。请(a)正确设置代码的格式,并且(b)不要使用void main
我试图正确设置它的格式以便发布,我的导师更喜欢void main,所以我使用了它。如果您指的是visual studio中的代码格式,我总是在完成后将其清理干净。您认为上面的代码格式正确吗?另外,如果你的讲师更喜欢void main
,那么听起来他需要读一本关于C的好书。除了过多的空白,是的,所有的东西似乎都正确地缩进、嵌套和注释了。也许您愿意解释格式不正确的地方。我想您可能引用的是html代码,即从文本文件中复制意大利面。您的代码在
中一次检查一个字符,这就是您捕获
的原因。它还将捕获
和
中的“p”和
中的“br”。您可能希望在
void main(){
FILE *ifp, *ofp;//input/output file pointers
int c, c1, c2;//variables used to store and compare input characters, c2 is used only to check for a </p> tag
int n = 0;
int count = 0;//counter for total characters in file
int putCount = 0;//counter for number of outputted characters
int discardTag = 0; //counter to store number of discarded tags
float charDiff = 0;//variable to store file size difference
int br = 0; //counter for <br />
int p = 0;//counter for <p>
ifp = fopen("prog1in1.txt", "r");
ofp = fopen("prog1in1out.txt", "w");
do{
c = getc(ifp);
count ++;
//compares the current character to '<' if its found starts a while loop
if(c == '<'){
//loops until it reaches the end of the tag
while( c != '>'){
count ++;
c = getc(ifp);
/*compares the first two characters to determine if it is a <br /> tag
if true outputs a null line and counts the number of <br /> tags*/
if(c == 'b' ){
c = getc(ifp);
count ++;
if( /*c == 'b' &&*/ c == 'r'){
br ++;
c = '\n';
putc( c , ofp);
count += 1;
}
}//end br if
/*else if if the tag is <p> outputs two null lines
and counts the number of <p> tags*/
else if ( c == 'p' ){
p ++;
c = '\n';
putc( c ,ofp);
putc( c, ofp);
count +=2;
}//end p if
//counts the number of tags that are not <br />
else{ //if ( c2 != 'b' && c1 != 'r' || c1 != 'p'){
discardTag ++;
}// end discard tag
}//end while
}
/*checks if the current character is not '>'
if true outputs the current character*/
if( c != '>'){
putc( c , ofp);
putCount++;
}
else if( c == EOF){
//does nothing here yet
}
}while(c != EOF);
fclose(ifp);
}//end main
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int main (void)
{
FILE *ifp, *ofp;//input/output file pointers
int c, c1, c2;//variables used to store and compare input characters, c2 is used only to check for a </p> tag
int n = 0;
int count = 0;//counter for total characters in file
int putCount = 0;//counter for number of outputted characters
int discardTag = 0; //counter to store number of discarded tags
float charDiff = 0;//variable to store file size difference
int br = 0; //counter for <br />
int p = 0;//counter for <p>
ifp = fopen("prog1in1.txt", "r");
ofp = fopen("prog1in1out.txt", "w");
char html_tag_buf[32];
int len_html_tag;
int inside_pre_block = 0;
int wrote_a_space = 0;
do
{
c = getc(ifp);
count ++;
//compares the current character to '<' if its found starts a while loop
switch (c)
{
case EOF:
break;
// both newline and tab are considered a single space in HTML
// HTML does not support multiple spaces, except in PRE../PRE
case '\n': case '\t': case ' ':
if (inside_pre_block)
{
putc(c , ofp);
putCount++;
break;
}
if (!wrote_a_space)
{
wrote_a_space = 1;
putc( ' ' , ofp);
putCount++;
}
break;
case '<':
wrote_a_space = 0;
//loops until it reaches the end of the tag
len_html_tag = 0;
while( c != '>' && c != ' ' && c != '\n' && c != '\t')
{
c = getc(ifp);
count++;
if (c == EOF)
break;
if (c != '>' && c != ' ' && c != '\n' && c != '\t')
{
html_tag_buf[len_html_tag] = toupper(c);
len_html_tag++;
if (len_html_tag > 30)
break;
}
}
while (c != '>')
{
c = getc(ifp);
count++;
}
html_tag_buf[len_html_tag] = 0;
printf ("<%s>", html_tag_buf);
if (!strcmp (html_tag_buf, "P"))
{
wrote_a_space = 1;
putc('\n' , ofp);
putc('\n' , ofp);
} else
if (!strcmp (html_tag_buf, "BR"))
{
wrote_a_space = 1;
putc('\n' , ofp);
} else
{
if (!strcmp (html_tag_buf, "PRE"))
inside_pre_block = 1;
if (!strcmp (html_tag_buf, "/PRE"))
inside_pre_block = 0;
//counts the number of tags that are not <br />
discardTag ++;
}
break;
default:
wrote_a_space = 0;
putc( c , ofp);
putCount++;
}
} while(c != EOF);
fclose(ifp);
} //end main