C# 如何在C中将rtf字符串转换为文本#
有没有一种简单的方法可以从Rtf字符串中提取文本而无需使用 例如:C# 如何在C中将rtf字符串转换为文本#,c#,C#,有没有一种简单的方法可以从Rtf字符串中提取文本而无需使用 例如: {\rtf1\ansi\ansicpg1252\uc1\htmautsp\deff2{\fonttbl{\f0\fcharset0 Times New Roman;}{\f2\fcharset0 Segoe UI;}}{\colortbl\red0\green0\blue0;\red255\green255\blue255;}\loch\hich\dbch\pard\plain\ltrpar\itap0{\lang1033\fs
{\rtf1\ansi\ansicpg1252\uc1\htmautsp\deff2{\fonttbl{\f0\fcharset0 Times New Roman;}{\f2\fcharset0 Segoe UI;}}{\colortbl\red0\green0\blue0;\red255\green255\blue255;}\loch\hich\dbch\pard\plain\ltrpar\itap0{\lang1033\fs18\f2\cf0 \cf0\ql{\f2 {\lang2070\ltrch foo}\li0\ri0\sa0\sb0\fi0\ql\par}
{\f2 {\lang2070\ltrch bar }\li0\ri0\sa0\sb0\fi0\ql\par}
}
}
应返回:
foo
bar
有一篇关于MSDN的简单文章可以实现您的目标: 如何在纯C#中执行此操作,而无需参考其他库: 这家伙写了一个类,按照OP的要求将RTF剥离为纯文本。 这是你的电话号码 这是他的密码:
/// <summary>
/// Rich Text Stripper
/// </summary>
/// <remarks>
/// Translated from Python located at:
/// http://stackoverflow.com/a/188877/448
/// </remarks>
public static class RichTextStripper
{
private class StackEntry
{
public int NumberOfCharactersToSkip { get; set; }
public bool Ignorable { get; set; }
public StackEntry(int numberOfCharactersToSkip, bool ignorable)
{
NumberOfCharactersToSkip = numberOfCharactersToSkip;
Ignorable = ignorable;
}
}
private static readonly Regex _rtfRegex = new Regex(@"\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'([0-9a-f]{2})|\\([^a-z])|([{}])|[\r\n]+|(.)", RegexOptions.Singleline | RegexOptions.IgnoreCase);
private static readonly List<string> destinations = new List<string>
{
"aftncn","aftnsep","aftnsepc","annotation","atnauthor","atndate","atnicn","atnid",
"atnparent","atnref","atntime","atrfend","atrfstart","author","background",
"bkmkend","bkmkstart","blipuid","buptim","category","colorschememapping",
"colortbl","comment","company","creatim","datafield","datastore","defchp","defpap",
"do","doccomm","docvar","dptxbxtext","ebcend","ebcstart","factoidname","falt",
"fchars","ffdeftext","ffentrymcr","ffexitmcr","ffformat","ffhelptext","ffl",
"ffname","ffstattext","field","file","filetbl","fldinst","fldrslt","fldtype",
"fname","fontemb","fontfile","fonttbl","footer","footerf","footerl","footerr",
"footnote","formfield","ftncn","ftnsep","ftnsepc","g","generator","gridtbl",
"header","headerf","headerl","headerr","hl","hlfr","hlinkbase","hlloc","hlsrc",
"hsv","htmltag","info","keycode","keywords","latentstyles","lchars","levelnumbers",
"leveltext","lfolevel","linkval","list","listlevel","listname","listoverride",
"listoverridetable","listpicture","liststylename","listtable","listtext",
"lsdlockedexcept","macc","maccPr","mailmerge","maln","malnScr","manager","margPr",
"mbar","mbarPr","mbaseJc","mbegChr","mborderBox","mborderBoxPr","mbox","mboxPr",
"mchr","mcount","mctrlPr","md","mdeg","mdegHide","mden","mdiff","mdPr","me",
"mendChr","meqArr","meqArrPr","mf","mfName","mfPr","mfunc","mfuncPr","mgroupChr",
"mgroupChrPr","mgrow","mhideBot","mhideLeft","mhideRight","mhideTop","mhtmltag",
"mlim","mlimloc","mlimlow","mlimlowPr","mlimupp","mlimuppPr","mm","mmaddfieldname",
"mmath","mmathPict","mmathPr","mmaxdist","mmc","mmcJc","mmconnectstr",
"mmconnectstrdata","mmcPr","mmcs","mmdatasource","mmheadersource","mmmailsubject",
"mmodso","mmodsofilter","mmodsofldmpdata","mmodsomappedname","mmodsoname",
"mmodsorecipdata","mmodsosort","mmodsosrc","mmodsotable","mmodsoudl",
"mmodsoudldata","mmodsouniquetag","mmPr","mmquery","mmr","mnary","mnaryPr",
"mnoBreak","mnum","mobjDist","moMath","moMathPara","moMathParaPr","mopEmu",
"mphant","mphantPr","mplcHide","mpos","mr","mrad","mradPr","mrPr","msepChr",
"mshow","mshp","msPre","msPrePr","msSub","msSubPr","msSubSup","msSubSupPr","msSup",
"msSupPr","mstrikeBLTR","mstrikeH","mstrikeTLBR","mstrikeV","msub","msubHide",
"msup","msupHide","mtransp","mtype","mvertJc","mvfmf","mvfml","mvtof","mvtol",
"mzeroAsc","mzeroDesc","mzeroWid","nesttableprops","nextfile","nonesttables",
"objalias","objclass","objdata","object","objname","objsect","objtime","oldcprops",
"oldpprops","oldsprops","oldtprops","oleclsid","operator","panose","password",
"passwordhash","pgp","pgptbl","picprop","pict","pn","pnseclvl","pntext","pntxta",
"pntxtb","printim","private","propname","protend","protstart","protusertbl","pxe",
"result","revtbl","revtim","rsidtbl","rxe","shp","shpgrp","shpinst",
"shppict","shprslt","shptxt","sn","sp","staticval","stylesheet","subject","sv",
"svb","tc","template","themedata","title","txe","ud","upr","userprops",
"wgrffmtfilter","windowcaption","writereservation","writereservhash","xe","xform",
"xmlattrname","xmlattrvalue","xmlclose","xmlname","xmlnstbl",
"xmlopen"
};
private static readonly Dictionary<string, string> specialCharacters = new Dictionary<string, string>
{
{ "par", "\n" },
{ "sect", "\n\n" },
{ "page", "\n\n" },
{ "line", "\n" },
{ "tab", "\t" },
{ "emdash", "\u2014" },
{ "endash", "\u2013" },
{ "emspace", "\u2003" },
{ "enspace", "\u2002" },
{ "qmspace", "\u2005" },
{ "bullet", "\u2022" },
{ "lquote", "\u2018" },
{ "rquote", "\u2019" },
{ "ldblquote", "\u201C" },
{ "rdblquote", "\u201D" },
};
/// <summary>
/// Strip RTF Tags from RTF Text
/// </summary>
/// <param name="inputRtf">RTF formatted text</param>
/// <returns>Plain text from RTF</returns>
public static string StripRichTextFormat(string inputRtf)
{
if (inputRtf == null)
{
return null;
}
string returnString;
var stack = new Stack<StackEntry>();
bool ignorable = false; // Whether this group (and all inside it) are "ignorable".
int ucskip = 1; // Number of ASCII characters to skip after a unicode character.
int curskip = 0; // Number of ASCII characters left to skip
var outList = new List<string>(); // Output buffer.
MatchCollection matches = _rtfRegex.Matches(inputRtf);
if (matches.Count > 0)
{
foreach (Match match in matches)
{
string word = match.Groups[1].Value;
string arg = match.Groups[2].Value;
string hex = match.Groups[3].Value;
string character = match.Groups[4].Value;
string brace = match.Groups[5].Value;
string tchar = match.Groups[6].Value;
if (!String.IsNullOrEmpty(brace))
{
curskip = 0;
if (brace == "{")
{
// Push state
stack.Push(new StackEntry(ucskip, ignorable));
}
else if (brace == "}")
{
// Pop state
StackEntry entry = stack.Pop();
ucskip = entry.NumberOfCharactersToSkip;
ignorable = entry.Ignorable;
}
}
else if (!String.IsNullOrEmpty(character)) // \x (not a letter)
{
curskip = 0;
if (character == "~")
{
if (!ignorable)
{
outList.Add("\xA0");
}
}
else if ("{}\\".Contains(character))
{
if (!ignorable)
{
outList.Add(character);
}
}
else if (character == "*")
{
ignorable = true;
}
}
else if (!String.IsNullOrEmpty(word)) // \foo
{
curskip = 0;
if (destinations.Contains(word))
{
ignorable = true;
}
else if (ignorable)
{
}
else if (specialCharacters.ContainsKey(word))
{
outList.Add(specialCharacters[word]);
}
else if (word == "uc")
{
ucskip = Int32.Parse(arg);
}
else if (word == "u")
{
int c = Int32.Parse(arg);
if (c < 0)
{
c += 0x10000;
}
outList.Add(Char.ConvertFromUtf32(c));
curskip = ucskip;
}
}
else if (!String.IsNullOrEmpty(hex)) // \'xx
{
if (curskip > 0)
{
curskip -= 1;
}
else if (!ignorable)
{
int c = Int32.Parse(hex, System.Globalization.NumberStyles.HexNumber);
outList.Add(Char.ConvertFromUtf32(c));
}
}
else if (!String.IsNullOrEmpty(tchar))
{
if (curskip > 0)
{
curskip -= 1;
}
else if (!ignorable)
{
outList.Add(tchar);
}
}
}
}
else
{
// Didn't match the regex
returnString = inputRtf;
}
returnString = String.Join(String.Empty, outList.ToArray());
return returnString;
}
}
//
///富文本剥离器
///
///
///从Python翻译而来,位于:
/// http://stackoverflow.com/a/188877/448
///
公共静态类RichTextStripper
{
私有类StackEntry
{
public int NumberOfCharactersToSkip{get;set;}
公共布尔可忽略{get;set;}
公共堆栈条目(int numberOfCharactersToSkip,bool ignorable)
{
NumberOfCharactersToSkip=NumberOfCharactersToSkip;
可忽略的=可忽略的;
}
}
私有静态只读Regex\u rtfRegex=newregex(@“\\([a-z]{1,32})(\d{1,10})[??\”([0-9a-f]{2})([^a-z])([{}])[\r\n]+{124;(”,RegexOptions.Singleline;RegexOptions.IgnoreCase);
私有静态只读列表目的地=新列表
{
“aftncn”、“AFTNSP”、“AFTNESPC”、“注释”、“ATNAUTOR”、“atndate”、“atnicn”、“atnid”,
“ATNPART”、“atnref”、“atntime”、“atrfend”、“atrfstart”、“作者”、“背景”,
“bkmkend”、“bkmkstart”、“blipuid”、“buptim”、“category”、“colorschemapping”,
“colortbl”、“comment”、“company”、“creatim”、“datafield”、“datastore”、“defchp”、“defpap”,
“do”、“doccomm”、“docvar”、“dptxbxtext”、“ebcend”、“ebcstart”、“factoidname”、“falt”,
“fchars”、“ffdeftext”、“ffentrymcr”、“FFXITMCR”、“ffformat”、“ffhelptext”、“ffl”,
“ffname”、“ffstattext”、“field”、“file”、“filetbl”、“fldinst”、“fldrslt”、“fldtype”,
“fname”、“fontemb”、“fontfile”、“fonttbl”、“footer”、“footerf”、“footerl”、“footer”,
“脚注”、“formfield”、“ftncn”、“ftnsep”、“ftnsepc”、“g”、“生成器”、“gridtbl”,
“header”、“headerf”、“headerl”、“headerr”、“hl”、“hlfr”、“hlinkbase”、“hlloc”、“hlsrc”,
“hsv”、“htmltag”、“信息”、“键码”、“关键字”、“最新样式”、“lchars”、“级别编号”,
“leveltext”、“lfolevel”、“linkval”、“list”、“listlevel”、“listname”、“listoverride”,
“listoverridetable”、“listpicture”、“liststylename”、“listtable”、“listtext”,
“lsdlockedexcept”、“macc”、“maccPr”、“mailmerge”、“maln”、“malnScr”、“manager”、“margPr”,
“mbar”、“mbarPr”、“mbaseJc”、“mbegChr”、“mborderBox”、“mborderBoxPr”、“mbox”、“mboxPr”,
“mchr”、“mcount”、“mctrlPr”、“md”、“mdeg”、“mdegHide”、“mden”、“mdiff”、“mdPr”、“me”,
“mendChr”、“meqArr”、“meqArrPr”、“mf”、“mfName”、“mfPr”、“mfunc”、“mfuncPr”、“mgroupChr”,
“mgroupChrPr”、“mgrow”、“mhideBot”、“mhideLeft”、“mhideRight”、“mhideTop”、“mhtmltag”,
“mlim”、“mlimloc”、“mlimlow”、“mlimlowPr”、“mlimupp”、“mlimuppr”、“mm”、“mmaddfieldname”,
“mmath”、“mmathPict”、“mmathPr”、“mmaxdist”、“mmc”、“mmcJc”、“mmconnectstr”,
“mmconnectstrdata”、“mmcPr”、“mmcs”、“mmdatasource”、“mmheadersource”、“mmmailsubject”,
“mmodso”、“mmodsofilter”、“mmodsofdmpdata”、“mmodsomappedname”、“mmodsoname”,
“mmodsorecipdata”、“mmodsoort”、“mmodsosrc”、“mmodsotable”、“mmodsoudl”,
“mmodsoudldata”、“mmodsouniquetag”、“mmPr”、“mmquery”、“mmr”、“mnary”、“mnaryPr”,
“mnoBreak”、“mnum”、“mobjDist”、“moMath”、“moMathPara”、“momathparpar”、“mopEmu”,
“MPANT”、“MPANTPR”、“mplcHide”、“mpos”、“mr”、“mrad”、“mradPr”、“mrPr”、“msepChr”,
“mshow”、“mshp”、“msPre”、“MSPREP”、“msSubPr”、“msSubPr”、“msSubSup”、“msSubSupPr”、“MSSUPSUP”,
“msSupPr”、“mstrikeBLTR”、“mstrikeH”、“mstrikeTLBR”、“mstrikeV”、“msub”、“msubHide”,
“msup”、“msupHide”、“MTTransp”、“mtype”、“mvertJc”、“mvfmf”、“mvfml”、“mvtof”、“mvtol”,
“mzeroAsc”、“mzeroDesc”、“mzeroWid”、“nestableprops”、“nextfile”、“nonesttables”,
“objalias”、“objclass”、“objdata”、“object”、“objname”、“objsect”、“objtime”、“oldcrops”,
“oldpprops”、“oldsprops”、“oldtprops”、“oleclsid”、“运算符”、“panose”、“密码”,
“密码哈希”、“pgp”、“pgptbl”、“picprop”、“pict”、“pn”、“pnseclvl”、“pntext”、“pntxta”,
“pntxtb”、“printim”、“private”、“propname”、“protend”、“protstart”、“protusertbl”、“pxe”,
“结果”、“revtbl”、“revtim”、“rsidtbl”、“rxe”、“shp”、“shpgrp”、“shpinst”,
“shppict”、“shprslt”、“shptxt”、“sn”、“sp”、“staticval”、“样式表”、“主题”、“sv”,
“svb”、“tc”、“模板”、“主题数据”、“标题”、“txe”、“ud”、“upr”、“用户道具”,
“wgrffmtfilter”、“windowcaption”、“WriterServation”、“WriterServHash”、“xe”、“xform”,
“xmlattrname”、“xmlattrvalue”、“xmlclose”、“xmlname”、“xmlnstbl”,
“xmlopen”
};
专用静态只读词典specialCharacters=新词典
{
{“par”,“\n”},
{“sect”,“\n\n”},
{“page”,“\n\n”},
{“行”,“\n”},
{“tab”,“\t”},
{“emdash”,“\u2014”},
{“endash”,“\u2013”},
{“emspace”,“\u2003”},
{“enspace”,“\u2002”},
{“qmspace”,“\u2005”},
{“bullet”,“\u2022”},
{“lquote”,“\u2018”},
{“rquote”,“\u2019”},
{“ldblquote”,“\u201C”},
{“rdblquote”,“\u201D”},
};
///
///从RTF文本中剥离RTF标记
///
///RTF格式文本
///来自RTF的纯文本
公共静态字符串StripRichTextFormat(字符串inpurtf)
{
if(inpurtf==null)
{
返回null;
}
字符串返回字符串;
var stack=新堆栈();
bool ignorable=false;//此组(及其内部的所有组)是否“可忽略”。
int ucskip=1;//在unicode字符之后要跳过的ASCII字符数。
int curskip=0;//要跳过的ASCII字符数
var outList=new List();//输出缓冲区。
MatchCollection matches=\u rtfRegex.Matc
/// <summary>
/// Rich Text Stripper
/// </summary>
/// <remarks>
/// Translated from Python located at:
/// http://stackoverflow.com/a/188877/448
/// </remarks>
public static class RichTextStripper
{
private class StackEntry
{
public int NumberOfCharactersToSkip { get; set; }
public bool Ignorable { get; set; }
public StackEntry(int numberOfCharactersToSkip, bool ignorable)
{
NumberOfCharactersToSkip = numberOfCharactersToSkip;
Ignorable = ignorable;
}
}
private static readonly Regex _rtfRegex = new Regex(@"\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'([0-9a-f]{2})|\\([^a-z])|([{}])|[\r\n]+|(.)", RegexOptions.Singleline | RegexOptions.IgnoreCase);
private static readonly List<string> destinations = new List<string>
{
"aftncn","aftnsep","aftnsepc","annotation","atnauthor","atndate","atnicn","atnid",
"atnparent","atnref","atntime","atrfend","atrfstart","author","background",
"bkmkend","bkmkstart","blipuid","buptim","category","colorschememapping",
"colortbl","comment","company","creatim","datafield","datastore","defchp","defpap",
"do","doccomm","docvar","dptxbxtext","ebcend","ebcstart","factoidname","falt",
"fchars","ffdeftext","ffentrymcr","ffexitmcr","ffformat","ffhelptext","ffl",
"ffname","ffstattext","field","file","filetbl","fldinst","fldrslt","fldtype",
"fname","fontemb","fontfile","fonttbl","footer","footerf","footerl","footerr",
"footnote","formfield","ftncn","ftnsep","ftnsepc","g","generator","gridtbl",
"header","headerf","headerl","headerr","hl","hlfr","hlinkbase","hlloc","hlsrc",
"hsv","htmltag","info","keycode","keywords","latentstyles","lchars","levelnumbers",
"leveltext","lfolevel","linkval","list","listlevel","listname","listoverride",
"listoverridetable","listpicture","liststylename","listtable","listtext",
"lsdlockedexcept","macc","maccPr","mailmerge","maln","malnScr","manager","margPr",
"mbar","mbarPr","mbaseJc","mbegChr","mborderBox","mborderBoxPr","mbox","mboxPr",
"mchr","mcount","mctrlPr","md","mdeg","mdegHide","mden","mdiff","mdPr","me",
"mendChr","meqArr","meqArrPr","mf","mfName","mfPr","mfunc","mfuncPr","mgroupChr",
"mgroupChrPr","mgrow","mhideBot","mhideLeft","mhideRight","mhideTop","mhtmltag",
"mlim","mlimloc","mlimlow","mlimlowPr","mlimupp","mlimuppPr","mm","mmaddfieldname",
"mmath","mmathPict","mmathPr","mmaxdist","mmc","mmcJc","mmconnectstr",
"mmconnectstrdata","mmcPr","mmcs","mmdatasource","mmheadersource","mmmailsubject",
"mmodso","mmodsofilter","mmodsofldmpdata","mmodsomappedname","mmodsoname",
"mmodsorecipdata","mmodsosort","mmodsosrc","mmodsotable","mmodsoudl",
"mmodsoudldata","mmodsouniquetag","mmPr","mmquery","mmr","mnary","mnaryPr",
"mnoBreak","mnum","mobjDist","moMath","moMathPara","moMathParaPr","mopEmu",
"mphant","mphantPr","mplcHide","mpos","mr","mrad","mradPr","mrPr","msepChr",
"mshow","mshp","msPre","msPrePr","msSub","msSubPr","msSubSup","msSubSupPr","msSup",
"msSupPr","mstrikeBLTR","mstrikeH","mstrikeTLBR","mstrikeV","msub","msubHide",
"msup","msupHide","mtransp","mtype","mvertJc","mvfmf","mvfml","mvtof","mvtol",
"mzeroAsc","mzeroDesc","mzeroWid","nesttableprops","nextfile","nonesttables",
"objalias","objclass","objdata","object","objname","objsect","objtime","oldcprops",
"oldpprops","oldsprops","oldtprops","oleclsid","operator","panose","password",
"passwordhash","pgp","pgptbl","picprop","pict","pn","pnseclvl","pntext","pntxta",
"pntxtb","printim","private","propname","protend","protstart","protusertbl","pxe",
"result","revtbl","revtim","rsidtbl","rxe","shp","shpgrp","shpinst",
"shppict","shprslt","shptxt","sn","sp","staticval","stylesheet","subject","sv",
"svb","tc","template","themedata","title","txe","ud","upr","userprops",
"wgrffmtfilter","windowcaption","writereservation","writereservhash","xe","xform",
"xmlattrname","xmlattrvalue","xmlclose","xmlname","xmlnstbl",
"xmlopen"
};
private static readonly Dictionary<string, string> specialCharacters = new Dictionary<string, string>
{
{ "par", "\n" },
{ "sect", "\n\n" },
{ "page", "\n\n" },
{ "line", "\n" },
{ "tab", "\t" },
{ "emdash", "\u2014" },
{ "endash", "\u2013" },
{ "emspace", "\u2003" },
{ "enspace", "\u2002" },
{ "qmspace", "\u2005" },
{ "bullet", "\u2022" },
{ "lquote", "\u2018" },
{ "rquote", "\u2019" },
{ "ldblquote", "\u201C" },
{ "rdblquote", "\u201D" },
};
/// <summary>
/// Strip RTF Tags from RTF Text
/// </summary>
/// <param name="inputRtf">RTF formatted text</param>
/// <returns>Plain text from RTF</returns>
public static string StripRichTextFormat(string inputRtf)
{
if (inputRtf == null)
{
return null;
}
string returnString;
var stack = new Stack<StackEntry>();
bool ignorable = false; // Whether this group (and all inside it) are "ignorable".
int ucskip = 1; // Number of ASCII characters to skip after a unicode character.
int curskip = 0; // Number of ASCII characters left to skip
var outList = new List<string>(); // Output buffer.
MatchCollection matches = _rtfRegex.Matches(inputRtf);
if (matches.Count > 0)
{
foreach (Match match in matches)
{
string word = match.Groups[1].Value;
string arg = match.Groups[2].Value;
string hex = match.Groups[3].Value;
string character = match.Groups[4].Value;
string brace = match.Groups[5].Value;
string tchar = match.Groups[6].Value;
if (!String.IsNullOrEmpty(brace))
{
curskip = 0;
if (brace == "{")
{
// Push state
stack.Push(new StackEntry(ucskip, ignorable));
}
else if (brace == "}")
{
// Pop state
StackEntry entry = stack.Pop();
ucskip = entry.NumberOfCharactersToSkip;
ignorable = entry.Ignorable;
}
}
else if (!String.IsNullOrEmpty(character)) // \x (not a letter)
{
curskip = 0;
if (character == "~")
{
if (!ignorable)
{
outList.Add("\xA0");
}
}
else if ("{}\\".Contains(character))
{
if (!ignorable)
{
outList.Add(character);
}
}
else if (character == "*")
{
ignorable = true;
}
}
else if (!String.IsNullOrEmpty(word)) // \foo
{
curskip = 0;
if (destinations.Contains(word))
{
ignorable = true;
}
else if (ignorable)
{
}
else if (specialCharacters.ContainsKey(word))
{
outList.Add(specialCharacters[word]);
}
else if (word == "uc")
{
ucskip = Int32.Parse(arg);
}
else if (word == "u")
{
int c = Int32.Parse(arg);
if (c < 0)
{
c += 0x10000;
}
outList.Add(Char.ConvertFromUtf32(c));
curskip = ucskip;
}
}
else if (!String.IsNullOrEmpty(hex)) // \'xx
{
if (curskip > 0)
{
curskip -= 1;
}
else if (!ignorable)
{
int c = Int32.Parse(hex, System.Globalization.NumberStyles.HexNumber);
outList.Add(Char.ConvertFromUtf32(c));
}
}
else if (!String.IsNullOrEmpty(tchar))
{
if (curskip > 0)
{
curskip -= 1;
}
else if (!ignorable)
{
outList.Add(tchar);
}
}
}
}
else
{
// Didn't match the regex
returnString = inputRtf;
}
returnString = String.Join(String.Empty, outList.ToArray());
return returnString;
}
}
public static string StripRichTextFormat(string inputRtf)
{
if (inputRtf == null)
{
return null;
}
string returnString;
var stack = new Stack<StackEntry>();
bool ignorable = false; // Whether this group (and all inside it) are "ignorable".
int ucskip = 1; // Number of ASCII characters to skip after a unicode character.
int curskip = 0; // Number of ASCII characters left to skip
var outList = new List<string>(); // Output buffer.
MatchCollection matches = _rtfRegex.Matches(inputRtf);
if (matches.Count > 0)
{
foreach (Match match in matches)
{
string word = match.Groups[1].Value;
string arg = match.Groups[2].Value;
string hex = match.Groups[3].Value;
string character = match.Groups[4].Value;
string brace = match.Groups[5].Value;
string tchar = match.Groups[6].Value;
if (!String.IsNullOrEmpty(brace))
{
curskip = 0;
if (brace == "{")
{
// Push state
stack.Push(new StackEntry(ucskip, ignorable));
}
else if (brace == "}")
{
// Pop state
StackEntry entry = stack.Pop();
ucskip = entry.NumberOfCharactersToSkip;
ignorable = entry.Ignorable;
}
}
else if (!String.IsNullOrEmpty(character)) // \x (not a letter)
{
curskip = 0;
if (character == "~")
{
if (!ignorable)
{
outList.Add("\xA0");
}
}
else if ("{}\\".Contains(character))
{
if (!ignorable)
{
outList.Add(character);
}
}
else if (character == "*")
{
ignorable = true;
}
}
else if (!String.IsNullOrEmpty(word)) // \foo
{
curskip = 0;
if (destinations.Contains(word))
{
ignorable = true;
}
else if (ignorable)
{
}
else if (specialCharacters.ContainsKey(word))
{
outList.Add(specialCharacters[word]);
}
else if (word == "uc")
{
ucskip = Int32.Parse(arg);
}
else if (word == "u")
{
int c = Int32.Parse(arg);
if (c < 0)
{
c += 0x10000;
}
//Ein gültiger UTF32-Wert ist zwischen 0x000000 und 0x10ffff (einschließlich) und sollte keine Ersatzcodepunktwerte (0x00d800 ~ 0x00dfff)
if (c >= 0x000000 && c <= 0x10ffff && (c < 0x00d800 || c > 0x00dfff))
outList.Add(Char.ConvertFromUtf32(c));
else outList.Add("?");
curskip = ucskip;
}
}
else if (!String.IsNullOrEmpty(hex)) // \'xx
{
if (curskip > 0)
{
curskip -= 1;
}
else if (!ignorable)
{
int c = Int32.Parse(hex, System.Globalization.NumberStyles.HexNumber);
outList.Add(Char.ConvertFromUtf32(c));
}
}
else if (!String.IsNullOrEmpty(tchar))
{
if (curskip > 0)
{
curskip -= 1;
}
else if (!ignorable)
{
outList.Add(tchar);
}
}
}
}
else
{
// Didn't match the regex
returnString = inputRtf;
}
returnString = String.Join(String.Empty, outList.ToArray());
return returnString;
}
public string RtfToPlainText(string rtf)
{
var flowDocument = new FlowDocument();
var textRange = new TextRange(flowDocument.ContentStart, flowDocument.ContentEnd);
using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(rtf ?? string.Empty)))
{
textRange.Load(stream, DataFormats.Rtf);
}
return textRange.Text;
}