C++ C++;解码电子邮件';s主题
我用Poco/Net/POP3客户端会话下载了邮件,我想将电子邮件主题转换为人类可读的主题,所以我尝试从这里使用解决方案: 不幸的是,它不起作用:C++ C++;解码电子邮件';s主题,c++,email,decoding,pop3,poco-libraries,C++,Email,Decoding,Pop3,Poco Libraries,我用Poco/Net/POP3客户端会话下载了邮件,我想将电子邮件主题转换为人类可读的主题,所以我尝试从这里使用解决方案: 不幸的是,它不起作用: #include <Poco/Net/POP3ClientSession.h> #include <Poco/Net/MailMessage.h> #include <iostream> #include <string> using namespace std; using namespace Poc
#include <Poco/Net/POP3ClientSession.h>
#include <Poco/Net/MailMessage.h>
#include <iostream>
#include <string>
using namespace std;
using namespace Poco::Net;
#include <iconv.h>
const size_t BUF_SIZE=1024;
class IConv {
iconv_t ic_;
public:
IConv(const char* to, const char* from)
: ic_(iconv_open(to,from)) { }
~IConv() { iconv_close(ic_); }
bool convert(char* input, char* output, size_t& out_size) {
size_t inbufsize = strlen(input)+1;
return iconv(ic_, &input, &inbufsize, &output, &out_size);
}
};
int main()
{
POP3ClientSession session("poczta.o2.pl");
session.login("my mail", "my password");
POP3ClientSession::MessageInfoVec messages;
session.listMessages(messages);
cout << "id: " << messages[0].id << " size: " << messages[0].size << endl;
MailMessage message;
session.retrieveMessage(messages[0].id, message);
const string subject = message.getSubject();
cout << "Original subject: " << subject << endl;
IConv iconv_("UTF8","ISO-8859-2");
char from[BUF_SIZE];// "=?ISO-8859-2?Q?Re: M=F3j sen o JP II?=";
subject.copy(from, sizeof(from));
char to[BUF_SIZE] = "bye";
size_t outsize = BUF_SIZE;//you will need it
iconv_.convert(from, to, outsize);
cout << "converted: " << to << endl;
}
有趣的是,当我尝试用POCO转换主题时,它失败了:
cout << "Encoded with POCO: " << MailMessage::encodeWord("Re: Mój sen o JP II", "ISO-8859-2") << endl; // output: Encoded with POCO: =?ISO-8859-2?q?Re=3A_M=C3=B3j_sen_o_JP_II?=
cout与您的情况相关的RFC是。该RFC指定如何在邮件消息中对非ASCII数据进行编码。基本要点是,除可打印ASCII字符外,所有字节都以“=”字符后跟两个十六进制数字的形式转义。由于“ó”在ISO-8859-2中由字节0xF3
表示,并且0xF3
不是可打印的ASCII字符,因此将其编码为“=F3”。您需要对邮件中的所有编码字符进行解码。我找到了解决问题的方法(我不确定这是否是100%正确的解决方案),但它似乎足以使用:
Poco::UTF8Encoding::convert to convert from characterCode to utf8:
#include <Poco/Net/POP3ClientSession.h>
#include <Poco/Net/MessageHeader.h>
#include <Poco/Net/MailMessage.h>
#include <Poco/UTF8Encoding.h>
#include <iostream>
#include <string>
using namespace std;
using namespace Poco::Net;
class EncoderLatin2
{
public:
EncoderLatin2(const string& encodedSubject)
{
/// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
int charsetBeginPosition = strlen("=?");
int charsetEndPosition = encodedSubject.find("?", charsetBeginPosition);
charset = encodedSubject.substr(charsetBeginPosition, charsetEndPosition-charsetBeginPosition);
int encodingPosition = charsetEndPosition + strlen("?");
encoding = encodedSubject[encodingPosition];
if ("ISO-8859-2" != charset)
throw std::invalid_argument("Invalid encoding!");
const int lenghtOfEncodedText = encodedSubject.length() - encodingPosition-strlen("?=")-2;
extractedEncodedSubjectToConvert = encodedSubject.substr(encodingPosition+2, lenghtOfEncodedText);
}
string convert()
{
size_t positionOfAssignment = -1;
while (true)
{
positionOfAssignment = extractedEncodedSubjectToConvert.find('=', positionOfAssignment+1);
if (string::npos != positionOfAssignment)
{
const string& charHexCode = extractedEncodedSubjectToConvert.substr(positionOfAssignment + 1, 2);
replaceAllSubstringsWithUnicode(extractedEncodedSubjectToConvert, charHexCode);
}
else
break;
}
return extractedEncodedSubjectToConvert;
}
void replaceAllSubstringsWithUnicode(string& s, const string& charHexCode)
{
const int charCode = stoi(charHexCode, nullptr, 16);
char buffer[10] = {};
encodingConverter.convert(charCode, (unsigned char*)buffer, sizeof(buffer));
replaceAll(s, '=' + charHexCode, buffer);
}
void replaceAll(string& s, const string& replaceFrom, const string& replaceTo)
{
size_t needlePosition = -1;
while (true)
{
needlePosition = s.find(replaceFrom, needlePosition + 1);
if (string::npos == needlePosition)
break;
s.replace(needlePosition, replaceFrom.length(), replaceTo);
}
}
private:
string charset;
char encoding;
Poco::UTF8Encoding encodingConverter;
string extractedEncodedSubjectToConvert;
};
int main()
{
POP3ClientSession session("poczta.o2.pl");
session.login("my mail", "my password");
POP3ClientSession::MessageInfoVec messages;
session.listMessages(messages);
MessageHeader header;
MailMessage message;
auto currentMessage = messages[0];
session.retrieveHeader(currentMessage.id, header);
session.retrieveMessage(currentMessage.id, message);
const string subject = message.getSubject();
EncoderLatin2 encoder(subject);
cout << "Original subject: " << subject << endl;
cout << "Encoded: " << encoder.convert() << endl;
}
#包括
#包括
#包括
#包括
#包括
#包括
使用名称空间std;
使用名称空间Poco::Net;
第2类编码器
{
公众:
EncoderLatin2(常量字符串和encodedSubject)
{
///编码字=“=?”字符集“?”编码“?”编码文本“=”
int charsetBeginPosition=strlen(=?);
int charsetEndPosition=encodedSubject.find(“?”,charsetBeginPosition);
charset=encodedSubject.substr(charsetBeginPosition,charsetEndPosition charsetBeginPosition);
int encodingPosition=charsetEndPosition+strlen(“?”);
encoding=encodedSubject[encodingPosition];
如果(“ISO-8859-2”!=字符集)
抛出std::无效的_参数(“无效编码!”);
const int lenghtOfEncodedText=encodedSubject.length()-encodingPosition strlen(“?=”)-2;
extractedEncodedSubjectToConvert=encodedSubject.substr(encodingPosition+2,lenghtOfEncodedText);
}
字符串转换()
{
分配的大小和位置=-1;
while(true)
{
positionOfAssignment=extractededCodedSubjectToConvert.find(“=”,positionOfAssignment+1);
if(字符串::npos!=分配位置)
{
const string&charHexCode=extractededCodedSubjectToConvert.substr(分配位置+1,2);
替换所有子字符串SwithUnicode(提取的编码SubjectToConvert,charHexCode);
}
其他的
打破
}
返回ExtractedCodedSubjectToConvert;
}
void replaceAllSubstringsWithUnicode(字符串&s、常量字符串&charHexCode)
{
常量int charCode=stoi(charHexCode,nullptr,16);
字符缓冲区[10]={};
convert(charCode,(unsigned char*)缓冲区,sizeof(buffer));
replaceAll(s,“=”+charHexCode,缓冲区);
}
void replaceAll(字符串&s、常量字符串&replaceFrom、常量字符串&replaceTo)
{
针位大小=-1;
while(true)
{
针刺位置=s.find(replaceFrom,针刺位置+1);
if(字符串::npos==针位)
打破
s、 replace(针位,replaceFrom.length(),replaceTo);
}
}
私人:
字符串字符集;
字符编码;
Poco::UTF8编码转换器;
字符串ExtractedCodedSubjectToConvert;
};
int main()
{
POP3客户端会话(“poczta.o2.pl”);
登录(“我的邮件”、“我的密码”);
POP3客户端会话::MessageInfoVec消息;
会话.列表消息(消息);
消息头;
邮件信息;
自动当前消息=消息[0];
session.retrieveHeader(currentMessage.id,header);
session.retrieveMessage(currentMessage.id,message);
常量字符串subject=message.getSubject();
编码器LATIN2编码器(受试者);
我找到了另一个比以前更好的解决办法。
我注意到,一些电子邮件主题有不同的编码:
- 拉丁文2,编码如下:=?ISO-8859-2?Q=
- UTF-8 Base64类似:
=?utf-8?B?WM9IYWN6Y2llignvigrsBxyxMgChJ6EWDVDG93YWxPzTesAviHn0EWN6ZCwCGTGFzzxWyxJRDQ===
- UTF-8引用可打印,如:
=?utf-8?Q=
- 没有编码(如果只有ASCII字符),如:
因此,使用POCO(Base64解码器、Latin2编码、UTF8编码、QuotedPrintableDecoder),我成功地转换了所有情况:
#include <iostream>
#include <string>
#include <sstream>
#include <Poco/Net/POP3ClientSession.h>
#include <Poco/Net/MessageHeader.h>
#include <Poco/Net/MailMessage.h>
#include <Poco/Base64Decoder.h>
#include <Poco/Latin2Encoding.h>
#include <Poco/UTF8Encoding.h>
#include <Poco/Net/QuotedPrintableDecoder.h>
using namespace std;
class Encoder
{
public:
Encoder(const string& encodedText)
{
isStringEncoded = isEncoded(encodedText);
if (!isStringEncoded)
{
extractedEncodedSubjectToConvert = encodedText;
return;
}
splitEncodedText(encodedText);
}
string convert()
{
if (isStringEncoded)
{
if (Poco::Latin2Encoding().isA(charset))
return decodeFromLatin2();
if (Poco::UTF8Encoding().isA(charset))
return decodeFromUtf8();
}
return extractedEncodedSubjectToConvert;
}
private:
void splitEncodedText(const string& encodedText)
{
/// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
const int charsetBeginPosition = strlen(sequenceBeginEncodedText);
const int charsetEndPosition = encodedText.find("?", charsetBeginPosition);
charset = encodedText.substr(charsetBeginPosition, charsetEndPosition-charsetBeginPosition);
const int encodingPosition = charsetEndPosition + strlen("?");
encoding = encodedText[encodingPosition];
const int lenghtOfEncodedText = encodedText.length() - encodingPosition-strlen(sequenceBeginEncodedText)-strlen(sequenceEndEncodedText);
extractedEncodedSubjectToConvert = encodedText.substr(encodingPosition+2, lenghtOfEncodedText);
}
bool isEncoded(const string& encodedSubject)
{
if (encodedSubject.size() < 4)
return false;
if (0 != encodedSubject.find(sequenceBeginEncodedText))
return false;
const unsigned positionOfLastTwoCharacters = encodedSubject.size() - strlen(sequenceEndEncodedText);
return positionOfLastTwoCharacters == encodedSubject.rfind(sequenceEndEncodedText);
}
string decodeFromLatin2()
{
size_t positionOfAssignment = -1;
while (true)
{
positionOfAssignment = extractedEncodedSubjectToConvert.find('=', positionOfAssignment+1);
if (string::npos != positionOfAssignment)
{
const string& charHexCode = extractedEncodedSubjectToConvert.substr(positionOfAssignment + 1, 2);
replaceAllSubstringsWithUnicode(extractedEncodedSubjectToConvert, charHexCode);
}
else
break;
}
return extractedEncodedSubjectToConvert;
}
void replaceAllSubstringsWithUnicode(string& s, const string& charHexCode)
{
static Poco::UTF8Encoding encodingConverter;
const int charCode = stoi(charHexCode, nullptr, 16);
char buffer[10] = {};
encodingConverter.convert(charCode, (unsigned char*)buffer, sizeof(buffer));
replaceAll(s, '=' + charHexCode, buffer);
}
void replaceAll(string& s, const string& replaceFrom, const string& replaceTo)
{
size_t needlePosition = -1;
while (true)
{
needlePosition = s.find(replaceFrom, needlePosition + 1);
if (string::npos == needlePosition)
break;
s.replace(needlePosition, replaceFrom.length(), replaceTo);
}
}
string decodeFromUtf8()
{
if('B' == toupper(encoding))
{
return decodeFromBase64();
}
else // if Q:
{
return decodeFromQuatedPrintable();
}
}
string decodeFromBase64()
{
istringstream is(extractedEncodedSubjectToConvert);
Poco::Base64Decoder e64(is);
extractedEncodedSubjectToConvert.clear();
string buffer;
while(getline(e64, buffer))
extractedEncodedSubjectToConvert += buffer;
return extractedEncodedSubjectToConvert;
}
string decodeFromQuatedPrintable()
{
replaceAll(extractedEncodedSubjectToConvert, "_", " ");
istringstream is(extractedEncodedSubjectToConvert);
Poco::Net::QuotedPrintableDecoder qp(is);
extractedEncodedSubjectToConvert.clear();
string buffer;
while(getline(qp, buffer))
extractedEncodedSubjectToConvert += buffer;
return extractedEncodedSubjectToConvert;
}
private:
string charset;
char encoding;
string extractedEncodedSubjectToConvert;
bool isStringEncoded;
static constexpr const char* sequenceBeginEncodedText = "=?";
static constexpr const char* sequenceEndEncodedText = "?=";
};
int main()
{
Poco::Net::POP3ClientSession session("poczta.o2.pl");
session.login("my mail", "my password");
Poco::Net::POP3ClientSession::MessageInfoVec messages;
session.listMessages(messages);
Poco::Net::MessageHeader header;
Poco::Net::MailMessage message;
auto currentMessage = messages[0];
session.retrieveHeader(currentMessage.id, header);
session.retrieveMessage(currentMessage.id, message);
const string subject = message.getSubject();
Encoder encoder(subject);
cout << "Original subject: " << subject << endl;
cout << "Encoded: " << encoder.convert() << endl;
}
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
使用名称空间std;
类编码器
{
公众:
编码器(常量字符串和编码文本)
{
IsStringCoded=isEncoded(encodedText);
如果(!IsStringCoded)
{
extractedEncodedSubjectToConvert=encodedText;
返回;
}
splitEncodedText(encodedText);
}
字符串转换()
{
if(ISSTRingCoded)
{
if(Poco::Latin2Encoding().isA(字符集))
返回decodeFromLatin2();
if(Poco::UTF8Encoding().isA(字符集))
返回decodeFromUtf8();
}
返回ExtractedCodedSubjectToConvert;
}
私人:
void splitEncodedText(常量字符串和encodedText)
{
///编码字=“=?”字符集“?”编码“?”编码文本“=”
const int charsetBeginPosition=strlen(sequenceBeginEncodedText);
const int charsetEndPosition=encodedText.find(“?”,charsetBeginPosition);
charset=encodedText.substr(charsetBeginPosition,charsetEndPosition charsetBeginPosition);
const int encodingPosition=charsetEndPosition+strlen(“?”);
encoding=encodedText[encodingPosition];
const int lenghtOfEncodedText=encodedText.length()-encodingPosition strlen(sequenceBeginEncodedText)-strlen(sequenceEndEncodedText);
extractedEncodedSubjectToConvert=encodedText.substr(encodingPosition+2,lenghtOfEncodedText);
}
布尔isEncoded(常量字符串和编码对象)
{
#include <iostream>
#include <string>
#include <sstream>
#include <Poco/Net/POP3ClientSession.h>
#include <Poco/Net/MessageHeader.h>
#include <Poco/Net/MailMessage.h>
#include <Poco/Base64Decoder.h>
#include <Poco/Latin2Encoding.h>
#include <Poco/UTF8Encoding.h>
#include <Poco/Net/QuotedPrintableDecoder.h>
using namespace std;
class Encoder
{
public:
Encoder(const string& encodedText)
{
isStringEncoded = isEncoded(encodedText);
if (!isStringEncoded)
{
extractedEncodedSubjectToConvert = encodedText;
return;
}
splitEncodedText(encodedText);
}
string convert()
{
if (isStringEncoded)
{
if (Poco::Latin2Encoding().isA(charset))
return decodeFromLatin2();
if (Poco::UTF8Encoding().isA(charset))
return decodeFromUtf8();
}
return extractedEncodedSubjectToConvert;
}
private:
void splitEncodedText(const string& encodedText)
{
/// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
const int charsetBeginPosition = strlen(sequenceBeginEncodedText);
const int charsetEndPosition = encodedText.find("?", charsetBeginPosition);
charset = encodedText.substr(charsetBeginPosition, charsetEndPosition-charsetBeginPosition);
const int encodingPosition = charsetEndPosition + strlen("?");
encoding = encodedText[encodingPosition];
const int lenghtOfEncodedText = encodedText.length() - encodingPosition-strlen(sequenceBeginEncodedText)-strlen(sequenceEndEncodedText);
extractedEncodedSubjectToConvert = encodedText.substr(encodingPosition+2, lenghtOfEncodedText);
}
bool isEncoded(const string& encodedSubject)
{
if (encodedSubject.size() < 4)
return false;
if (0 != encodedSubject.find(sequenceBeginEncodedText))
return false;
const unsigned positionOfLastTwoCharacters = encodedSubject.size() - strlen(sequenceEndEncodedText);
return positionOfLastTwoCharacters == encodedSubject.rfind(sequenceEndEncodedText);
}
string decodeFromLatin2()
{
size_t positionOfAssignment = -1;
while (true)
{
positionOfAssignment = extractedEncodedSubjectToConvert.find('=', positionOfAssignment+1);
if (string::npos != positionOfAssignment)
{
const string& charHexCode = extractedEncodedSubjectToConvert.substr(positionOfAssignment + 1, 2);
replaceAllSubstringsWithUnicode(extractedEncodedSubjectToConvert, charHexCode);
}
else
break;
}
return extractedEncodedSubjectToConvert;
}
void replaceAllSubstringsWithUnicode(string& s, const string& charHexCode)
{
static Poco::UTF8Encoding encodingConverter;
const int charCode = stoi(charHexCode, nullptr, 16);
char buffer[10] = {};
encodingConverter.convert(charCode, (unsigned char*)buffer, sizeof(buffer));
replaceAll(s, '=' + charHexCode, buffer);
}
void replaceAll(string& s, const string& replaceFrom, const string& replaceTo)
{
size_t needlePosition = -1;
while (true)
{
needlePosition = s.find(replaceFrom, needlePosition + 1);
if (string::npos == needlePosition)
break;
s.replace(needlePosition, replaceFrom.length(), replaceTo);
}
}
string decodeFromUtf8()
{
if('B' == toupper(encoding))
{
return decodeFromBase64();
}
else // if Q:
{
return decodeFromQuatedPrintable();
}
}
string decodeFromBase64()
{
istringstream is(extractedEncodedSubjectToConvert);
Poco::Base64Decoder e64(is);
extractedEncodedSubjectToConvert.clear();
string buffer;
while(getline(e64, buffer))
extractedEncodedSubjectToConvert += buffer;
return extractedEncodedSubjectToConvert;
}
string decodeFromQuatedPrintable()
{
replaceAll(extractedEncodedSubjectToConvert, "_", " ");
istringstream is(extractedEncodedSubjectToConvert);
Poco::Net::QuotedPrintableDecoder qp(is);
extractedEncodedSubjectToConvert.clear();
string buffer;
while(getline(qp, buffer))
extractedEncodedSubjectToConvert += buffer;
return extractedEncodedSubjectToConvert;
}
private:
string charset;
char encoding;
string extractedEncodedSubjectToConvert;
bool isStringEncoded;
static constexpr const char* sequenceBeginEncodedText = "=?";
static constexpr const char* sequenceEndEncodedText = "?=";
};
int main()
{
Poco::Net::POP3ClientSession session("poczta.o2.pl");
session.login("my mail", "my password");
Poco::Net::POP3ClientSession::MessageInfoVec messages;
session.listMessages(messages);
Poco::Net::MessageHeader header;
Poco::Net::MailMessage message;
auto currentMessage = messages[0];
session.retrieveHeader(currentMessage.id, header);
session.retrieveMessage(currentMessage.id, message);
const string subject = message.getSubject();
Encoder encoder(subject);
cout << "Original subject: " << subject << endl;
cout << "Encoded: " << encoder.convert() << endl;
}