解析邮件头中的MIME日期(C#但与语言无关)
我正在开发一个小型的本地c#应用程序来处理电子邮件。我使用的是S22/Imap,它可以下载和分离标题。当我试图获取电子邮件的日期时,它会返回字符串版本,如MIME标题中所示 对于大多数日期,DateTime.TryParse运行良好,但对于某些日期,它失败。以下是每一个示例,我找不到如何在RFC中处理它们:解析邮件头中的MIME日期(C#但与语言无关),c#,email,parsing,mime,C#,Email,Parsing,Mime,我正在开发一个小型的本地c#应用程序来处理电子邮件。我使用的是S22/Imap,它可以下载和分离标题。当我试图获取电子邮件的日期时,它会返回字符串版本,如MIME标题中所示 对于大多数日期,DateTime.TryParse运行良好,但对于某些日期,它失败。以下是每一个示例,我找不到如何在RFC中处理它们: 2016年1月15日星期五20:21:44-0600-0700 2011年1月3日星期一20:32:19+0000(GMT+00:00) 2012年6月12日星期二19:22:280200
- 2016年1月15日星期五20:21:44-0600-0700
- 2011年1月3日星期一20:32:19+0000(GMT+00:00)
- 2012年6月12日星期二19:22:280200(是+隐含的吗?)
- 2010年1月11日星期一17:28:393600(不在RFC中)
- 2010年10月7日星期四17:31:207200
- 2009年7月24日星期五21:13:28+0100(巴黎,马德里)(我必须忽略+0100之后的内容吗?我可以不丢失信息吗?)
- 2015年5月28日星期四10:58:24+0200(巴黎,马德里(Heured'té))(同一问题)
提前感谢您的帮助。我必须编写自己的类来解析这些内容(如果您感兴趣,我有一个与S22.Imap竞争的库,名为,它使用我的库解析消息、日期等) 我冒昧地将我的日期解析器从MimeKit中分离出来,以便将它作为一个独立的组件粘贴到这里
using System;
using System.Text;
using System.Collections.Generic;
namespace DateParserUtils {
[Flags]
enum DateTokenFlags : byte
{
None = 0,
NonNumeric = (1 << 0),
NonWeekday = (1 << 1),
NonMonth = (1 << 2),
NonTime = (1 << 3),
NonAlphaZone = (1 << 4),
NonNumericZone = (1 << 5),
HasColon = (1 << 6),
HasSign = (1 << 7),
}
class DateToken
{
public DateTokenFlags Flags { get; private set; }
public int StartIndex { get; private set; }
public int Length { get; private set; }
public bool IsNumeric {
get { return (Flags & DateTokenFlags.NonNumeric) == 0; }
}
public bool IsWeekday {
get { return (Flags & DateTokenFlags.NonWeekday) == 0; }
}
public bool IsMonth {
get { return (Flags & DateTokenFlags.NonMonth) == 0; }
}
public bool IsTimeOfDay {
get { return (Flags & DateTokenFlags.NonTime) == 0 && (Flags & DateTokenFlags.HasColon) != 0; }
}
public bool IsNumericZone {
get { return (Flags & DateTokenFlags.NonNumericZone) == 0 && (Flags & DateTokenFlags.HasSign) != 0; }
}
public bool IsAlphaZone {
get { return (Flags & DateTokenFlags.NonAlphaZone) == 0; }
}
public bool IsTimeZone {
get { return IsNumericZone || IsAlphaZone; }
}
public DateToken (DateTokenFlags flags, int startIndex, int length)
{
StartIndex = startIndex;
Length = length;
Flags = flags;
}
}
/// <summary>
/// Utility methods to parse and format rfc822 date strings.
/// </summary>
/// <remarks>
/// Utility methods to parse and format rfc822 date strings.
/// </remarks>
public static class DateUtils
{
internal static readonly DateTime UnixEpoch = new DateTime (1970, 1, 1, 0, 0, 0, 0);
const string MonthCharacters = "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember";
const string WeekdayCharacters = "SundayMondayTuesdayWednesdayThursdayFridaySaturday";
const string AlphaZoneCharacters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const string NumericZoneCharacters = "+-0123456789";
const string NumericCharacters = "0123456789";
const string TimeCharacters = "0123456789:";
static readonly string[] Months = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
static readonly string[] WeekDays = {
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
};
static readonly Dictionary<string, int> timezones;
static readonly DateTokenFlags[] datetok;
static DateUtils ()
{
timezones = new Dictionary<string, int> {
{ "UT", 0 }, { "UTC", 0 }, { "GMT", 0 },
{ "EDT", -400 }, { "EST", -500 },
{ "CDT", -500 }, { "CST", -600 },
{ "MDT", -600 }, { "MST", -700 },
{ "PDT", -700 }, { "PST", -800 },
// Note: rfc822 got the signs backwards for the military
// timezones so some sending clients may mistakenly use the
// wrong values.
{ "A", 100 }, { "B", 200 }, { "C", 300 },
{ "D", 400 }, { "E", 500 }, { "F", 600 },
{ "G", 700 }, { "H", 800 }, { "I", 900 },
{ "K", 1000 }, { "L", 1100 }, { "M", 1200 },
{ "N", -100 }, { "O", -200 }, { "P", -300 },
{ "Q", -400 }, { "R", -500 }, { "S", -600 },
{ "T", -700 }, { "U", -800 }, { "V", -900 },
{ "W", -1000 }, { "X", -1100 }, { "Y", -1200 },
{ "Z", 0 },
};
datetok = new DateTokenFlags[256];
var any = new char[2];
for (int c = 0; c < 256; c++) {
if (c >= 0x41 && c <= 0x5a) {
any[1] = (char) (c + 0x20);
any[0] = (char) c;
} else if (c >= 0x61 && c <= 0x7a) {
any[0] = (char) (c - 0x20);
any[1] = (char) c;
}
if (NumericZoneCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonNumericZone;
if (AlphaZoneCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonAlphaZone;
if (WeekdayCharacters.IndexOfAny (any) == -1)
datetok[c] |= DateTokenFlags.NonWeekday;
if (NumericCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonNumeric;
if (MonthCharacters.IndexOfAny (any) == -1)
datetok[c] |= DateTokenFlags.NonMonth;
if (TimeCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonTime;
}
datetok[':'] |= DateTokenFlags.HasColon;
datetok['+'] |= DateTokenFlags.HasSign;
datetok['-'] |= DateTokenFlags.HasSign;
}
static bool TryGetWeekday (DateToken token, byte[] text, out DayOfWeek weekday)
{
weekday = DayOfWeek.Sunday;
if (!token.IsWeekday || token.Length < 3)
return false;
var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
if (name.Length > 3)
name = name.Substring (0, 3);
for (int day = 0; day < WeekDays.Length; day++) {
if (WeekDays[day].Equals (name, StringComparison.OrdinalIgnoreCase)) {
weekday = (DayOfWeek) day;
return true;
}
}
return false;
}
static bool TryParseInt32 (byte[] text, ref int index, int endIndex, out int value)
{
int startIndex = index;
value = 0;
while (index < endIndex && text[index] >= (byte) '0' && text[index] <= (byte) '9') {
int digit = text[index] - (byte) '0';
if (value > int.MaxValue / 10) {
// integer overflow
return false;
}
if (value == int.MaxValue / 10 && digit > int.MaxValue % 10) {
// integer overflow
return false;
}
value = (value * 10) + digit;
index++;
}
return index > startIndex;
}
static bool TryGetDayOfMonth (DateToken token, byte[] text, out int day)
{
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
day = 0;
if (!token.IsNumeric)
return false;
if (!TryParseInt32 (text, ref index, endIndex, out day))
return false;
if (day <= 0 || day > 31)
return false;
return true;
}
static bool TryGetMonth (DateToken token, byte[] text, out int month)
{
month = 0;
if (!token.IsMonth || token.Length < 3)
return false;
var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
if (name.Length > 3)
name = name.Substring (0, 3);
for (int i = 0; i < Months.Length; i++) {
if (Months[i].Equals (name, StringComparison.OrdinalIgnoreCase)) {
month = i + 1;
return true;
}
}
return false;
}
static bool TryGetYear (DateToken token, byte[] text, out int year)
{
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
year = 0;
if (!token.IsNumeric)
return false;
if (!TryParseInt32 (text, ref index, endIndex, out year))
return false;
if (year < 100)
year += (year < 70) ? 2000 : 1900;
return year >= 1969;
}
static bool TryGetTimeOfDay (DateToken token, byte[] text, out int hour, out int minute, out int second)
{
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
hour = minute = second = 0;
if (!token.IsTimeOfDay)
return false;
if (!TryParseInt32 (text, ref index, endIndex, out hour) || hour > 23)
return false;
if (index >= endIndex || text[index++] != (byte) ':')
return false;
if (!TryParseInt32 (text, ref index, endIndex, out minute) || minute > 59)
return false;
// Allow just hh:mm (i.e. w/o the :ss?)
if (index >= endIndex || text[index++] != (byte) ':')
return true;
if (!TryParseInt32 (text, ref index, endIndex, out second) || second > 59)
return false;
return index == endIndex;
}
static bool TryGetTimeZone (DateToken token, byte[] text, out int tzone)
{
tzone = 0;
if (token.IsNumericZone) {
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
int sign;
if (text[index] == (byte) '-')
sign = -1;
else if (text[index] == (byte) '+')
sign = 1;
else
return false;
index++;
if (!TryParseInt32 (text, ref index, endIndex, out tzone) || index != endIndex)
return false;
tzone *= sign;
} else if (token.IsAlphaZone) {
if (token.Length > 3)
return false;
var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
if (!timezones.TryGetValue (name, out tzone))
return false;
} else if (token.IsNumeric) {
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
if (!ParseUtils.TryParseInt32 (text, ref index, endIndex, out tzone) || index != endIndex)
return false;
}
return true;
}
static bool IsWhiteSpace (byte c)
{
return c == ' ' || c == '\t';
}
static bool IsTokenDelimeter (byte c)
{
return c == (byte) '-' || c == (byte) '/' || c == (byte) ',' || IsWhiteSpace (c);
}
static bool SkipWhiteSpace (byte[] text, ref int index, int endIndex)
{
int startIndex = index;
while (index < endIndex && IsWhiteSpace (text[index]))
index++;
return index > startIndex;
}
static bool SkipComment (byte[] text, ref int index, int endIndex)
{
bool escaped = false;
int depth = 1;
index++;
while (index < endIndex && depth > 0) {
if (text[index] == (byte) '\\') {
escaped = !escaped;
} else if (!escaped) {
if (text[index] == (byte) '(')
depth++;
else if (text[index] == (byte) ')')
depth--;
escaped = false;
} else {
escaped = false;
}
index++;
}
return depth == 0;
}
static bool SkipCommentsAndWhiteSpace (byte[] text, ref int index, int endIndex)
{
SkipWhiteSpace (text, ref index, endIndex);
while (index < endIndex && text[index] == (byte) '(') {
int startIndex = index;
if (!SkipComment (text, ref index, endIndex))
return false;
SkipWhiteSpace (text, ref index, endIndex);
}
return true;
}
static IEnumerable<DateToken> TokenizeDate (byte[] text, int startIndex, int length)
{
int endIndex = startIndex + length;
int index = startIndex;
DateTokenFlags mask;
int start;
while (index < endIndex) {
if (!SkipCommentsAndWhiteSpace (text, ref index, endIndex))
break;
if (index >= endIndex)
break;
// get the initial mask for this token
if ((mask = datetok[text[index]]) != DateTokenFlags.None) {
start = index++;
// find the end of this token
while (index < endIndex && !IsTokenDelimeter (text[index]))
mask |= datetok[text[index++]];
yield return new DateToken (mask, start, index - start);
}
// skip over the token delimeter
index++;
}
yield break;
}
static bool TryParseStandardDateFormat (IList<DateToken> tokens, byte[] text, out DateTimeOffset date)
{
int day, month, year, tzone;
int hour, minute, second;
DayOfWeek weekday;
//bool haveWeekday;
int n = 0;
date = new DateTimeOffset ();
// we need at least 5 tokens, 6 if we have a weekday
if (tokens.Count < 5)
return false;
// Note: the weekday is not required
if (TryGetWeekday (tokens[n], text, out weekday)) {
if (tokens.Count < 6)
return false;
//haveWeekday = true;
n++;
}
if (!TryGetDayOfMonth (tokens[n++], text, out day))
return false;
if (!TryGetMonth (tokens[n++], text, out month))
return false;
if (!TryGetYear (tokens[n++], text, out year))
return false;
if (!TryGetTimeOfDay (tokens[n++], text, out hour, out minute, out second))
return false;
if (!TryGetTimeZone (tokens[n], text, out tzone))
tzone = 0;
while (tzone < -1400)
tzone += 2400;
while (tzone > 1400)
tzone -= 2400;
int minutes = tzone % 100;
int hours = tzone / 100;
var offset = new TimeSpan (hours, minutes, 0);
try {
date = new DateTimeOffset (year, month, day, hour, minute, second, offset);
} catch (ArgumentOutOfRangeException) {
return false;
}
return true;
}
static bool TryParseUnknownDateFormat (IList<DateToken> tokens, byte[] text, out DateTimeOffset date)
{
int? day = null, month = null, year = null, tzone = null;
int hour = 0, minute = 0, second = 0;
bool numericMonth = false;
bool haveWeekday = false;
bool haveTime = false;
DayOfWeek weekday;
TimeSpan offset;
for (int i = 0; i < tokens.Count; i++) {
int value;
if (!haveWeekday && tokens[i].IsWeekday) {
if (TryGetWeekday (tokens[i], text, out weekday)) {
haveWeekday = true;
continue;
}
}
if ((month == null || numericMonth) && tokens[i].IsMonth) {
if (TryGetMonth (tokens[i], text, out value)) {
if (numericMonth) {
numericMonth = false;
day = month;
}
month = value;
continue;
}
}
if (!haveTime && tokens[i].IsTimeOfDay) {
if (TryGetTimeOfDay (tokens[i], text, out hour, out minute, out second)) {
haveTime = true;
continue;
}
}
if (tzone == null && tokens[i].IsTimeZone) {
if (TryGetTimeZone (tokens[i], text, out value)) {
tzone = value;
continue;
}
}
if (tokens[i].IsNumeric) {
if (tokens[i].Length == 4) {
if (year == null) {
if (TryGetYear (tokens[i], text, out value))
year = value;
} else if (tzone == null) {
if (TryGetTimeZone (tokens[i], text, out value))
tzone = value;
}
continue;
}
if (tokens[i].Length > 2)
continue;
// Note: we likely have either YYYY[-/]MM[-/]DD or MM[-/]DD[-/]YY
int endIndex = tokens[i].StartIndex + tokens[i].Length;
int index = tokens[i].StartIndex;
TryParseInt32 (text, ref index, endIndex, out value);
if (month == null && value > 0 && value <= 12) {
numericMonth = true;
month = value;
continue;
}
if (day == null && value > 0 && value <= 31) {
day = value;
continue;
}
if (year == null && value >= 69) {
year = 1900 + value;
continue;
}
}
// WTF is this??
}
if (year == null || month == null || day == null) {
date = new DateTimeOffset ();
return false;
}
if (!haveTime)
hour = minute = second = 0;
if (tzone != null) {
int minutes = tzone.Value % 100;
int hours = tzone.Value / 100;
offset = new TimeSpan (hours, minutes, 0);
} else {
offset = new TimeSpan (0);
}
try {
date = new DateTimeOffset (year.Value, month.Value, day.Value, hour, minute, second, offset);
} catch (ArgumentOutOfRangeException) {
date = new DateTimeOffset ();
return false;
}
return true;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the supplied buffer starting at the given index
/// and spanning across the specified number of bytes.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="buffer">The input buffer.</param>
/// <param name="startIndex">The starting index of the input buffer.</param>
/// <param name="length">The number of bytes in the input buffer to parse.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="buffer"/> is <c>null</c>.
/// </exception>
/// <exception cref="System.ArgumentOutOfRangeException">
/// <paramref name="startIndex"/> and <paramref name="length"/> do not specify
/// a valid range in the byte array.
/// </exception>
public static bool TryParse (byte[] buffer, int startIndex, int length, out DateTimeOffset date)
{
if (buffer == null)
throw new ArgumentNullException ("buffer");
if (startIndex < 0 || startIndex > buffer.Length)
throw new ArgumentOutOfRangeException ("startIndex");
if (length < 0 || length > (buffer.Length - startIndex))
throw new ArgumentOutOfRangeException ("length");
var tokens = new List<DateToken> (TokenizeDate (buffer, startIndex, length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the supplied buffer starting at the specified index.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="buffer">The input buffer.</param>
/// <param name="startIndex">The starting index of the input buffer.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="buffer"/> is <c>null</c>.
/// </exception>
/// <exception cref="System.ArgumentOutOfRangeException">
/// <paramref name="startIndex"/> is not within the range of the byte array.
/// </exception>
public static bool TryParse (byte[] buffer, int startIndex, out DateTimeOffset date)
{
if (buffer == null)
throw new ArgumentNullException ("buffer");
if (startIndex < 0 || startIndex > buffer.Length)
throw new ArgumentOutOfRangeException ("startIndex");
int length = buffer.Length - startIndex;
var tokens = new List<DateToken> (TokenizeDate (buffer, startIndex, length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the specified buffer.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="buffer">The input buffer.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="buffer"/> is <c>null</c>.
/// </exception>
public static bool TryParse (byte[] buffer, out DateTimeOffset date)
{
if (buffer == null)
throw new ArgumentNullException ("buffer");
var tokens = new List<DateToken> (TokenizeDate (buffer, 0, buffer.Length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the specified text.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="text">The input text.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="text"/> is <c>null</c>.
/// </exception>
public static bool TryParse (string text, out DateTimeOffset date)
{
if (text == null)
throw new ArgumentNullException ("text");
var buffer = Encoding.UTF8.GetBytes (text);
var tokens = new List<DateToken> (TokenizeDate (buffer, 0, buffer.Length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
}
}
使用系统;
使用系统文本;
使用System.Collections.Generic;
芸苔{
[旗帜]
枚举DateTokenFlags:字节
{
无=0,
非数字=(1=endIndex | | text[index++]!=(字节):)
返回false;
如果(!TryParseInt32(文本,参考索引,结束索引,输出分钟)| |分钟>59)
返回false;
//仅允许hh:mm(即不带:ss?)
如果(索引>=endIndex | | text[index++]!=(字节)“:”)
返回true;
如果(!TryParseInt32(文本,参考索引,结束索引,输出秒)| |秒>59)
返回false;
返回索引==结束索引;
}
静态bool-TryGetTimeZone(DateToken令牌,字节[]文本,out-int-tzone)
{
tzone=0;
if(令牌.IsNumericZone){
int endIndex=token.StartIndex+token.Length;
int index=token.StartIndex;
整数符号;
如果(文本[索引]==(字节)-)
符号=-1;
else if(文本[索引]==(字节)“+”)
符号=1;
其他的
返回false;
索引++;
如果(!TryParseInt32(text,ref index,endIndex,out tzone)| | index!=endIndex)
返回false;
tzone*=符号;
}else if(令牌.IsAlphaZone){
如果(token.Length>3)
返回false;
var name=Encoding.ASCII.GetString(text,token.StartIndex,token.Length);
如果(!timezones.TryGetValue(名称,out-tzone))
返回false;
}else if(token.IsNumeric){
int endIndex=token.StartIndex+token.Length;
int index=token.StartIndex;
如果(!ParseUtils.TryParseInt32(text,ref index,endIndex,out tzone)| | index!=endIndex)
返回false;
}
返回true;
}
静态bool IsWhiteSpace(字节c)
{
返回c=''| | c='\t';
}
静态布尔指数计(字节c)
{
返回c==(字节)“-”| c==(字节)/“| c==(字节)”,| IsWhiteSpace(c);
}
静态bool SkipWhiteSpace(字节[]文本,ref int index,int endIndex)
{
int startIndex=索引;
while(index起始指数;
}
静态bool SkipComment(字节[]文本,ref int index,int endIndex)
{
布尔逃逸=假;
int深度=1;
索引++;
而(索引0){
如果(文本[索引]==(字节)\\\){
逃脱=!逃脱;
}否则,如果(!已转义){
如果(文本[索引]==(字节)'('))
深度++;
else if(文本[索引]==(字节)')
深度--;
逃逸=假;
}否则{
逃逸=假;
}
索引++;
}
返回深度==0;
}
静态bool skipcommentsandwitespace(字节[]文本,ref int索引,int endIndex)
{
SkipWhiteSpace(文本、参考索引、结束索引);
而(索引=endIndex)
打破
//获取此令牌的初始掩码
if((mask=datetok[text[index]])!=DateTokenFlags.None){
开始=索引++;
//查找此令牌的结尾
而(索引