HTML差异工具API

HTML差异工具API,html,api,diff,compare,double-byte,Html,Api,Diff,Compare,Double Byte,我正在寻找一个api,它可以直观地显示html在结构、字符/单词和样式方面的差异。这个工具还必须支持双字节字符,并足够灵活,我可以将其添加到我现有的网站上,以便轻松显示比较结果。我目前正在使用组件软件COM实现,它不支持双字节字符,大约六年没有更新过。我发现只有两个工具可以做到这一点:和。两者都提供了指定url和监视更改的功能。这就是我使用的: [http://code.google.com/p/google-diff-match-patch/][1] 我不得不写我自己的方法来做比较,但经过一点

我正在寻找一个api,它可以直观地显示html在结构、字符/单词和样式方面的差异。这个工具还必须支持双字节字符,并足够灵活,我可以将其添加到我现有的网站上,以便轻松显示比较结果。我目前正在使用组件软件COM实现,它不支持双字节字符,大约六年没有更新过。

我发现只有两个工具可以做到这一点:和。两者都提供了指定url和监视更改的功能。

这就是我使用的:

[http://code.google.com/p/google-diff-match-patch/][1]

我不得不写我自己的方法来做比较,但经过一点工作,它看起来很好。这个实现比较传入的测试,所以如果您只是比较两个文本字符串,它就可以正常工作。我的diff_prettyHtml调用已更改为:

public string diff_prettyHtml(List<Diff> diffs)
    {
        StringBuilder html = new StringBuilder();
        foreach (Diff aDiff in diffs)
        {
            string text = aDiff.text.Replace("&", "&amp;").Replace("<", "&lt;")
              .Replace(">", "&gt;").Replace("\n", "<br>");
            switch (aDiff.operation)
            {
                case Operation.INSERT:
                    html.Append("<ins class='diff'>").Append(text)
                        .Append("</ins>");
                    break;
                case Operation.DELETE:
                    html.Append("<del class='diff'>").Append(text)
                        .Append("</del>");
                    break;
                case Operation.EQUAL:
                    html.Append("<span>").Append(text).Append("</span>");
                    break;
            }
        }
        return html.ToString();
    }
public string diff\u prettyHtml(列表差异)
{
StringBuilder html=新的StringBuilder();
foreach(差异中的差异aDiff)
{
string text=aDiff.text.Replace(“&”、“&;”).Replace(“,”).Replace(“\n”、“
”); 开关(自动断开操作) { 案例操作.插入: html.Append(“”).Append(文本) .附加(“”); 打破 案例操作。删除: html.Append(“”).Append(文本) .附加(“”); 打破 case Operation.EQUAL: html.Append(“”).Append(text.Append(“”); 打破 } } 返回html.ToString(); }
现在,如果您想对2个html字符串进行比较预览,这有点不同。这就是我所做的:

DiffMatchPatch.diff_match_patch diff = new DiffMatchPatch.diff_match_patch();
                List<DiffMatchPatch.Diff> differences = diff.diff_main(oldHtml,
                    newHtml);
                return diff.diff_previewHtml(differences);


public string diff_previewHtml(List<Diff> diffs) {
      StringBuilder html = new StringBuilder();
      foreach (Diff aDiff in diffs) {
        string text = aDiff.text;
        switch (aDiff.operation) {
          case Operation.INSERT:
                html.Append("<ins class='diff'>").Append(text)
                .Append("</ins>");
            break;
          case Operation.DELETE:
            html.Append("<del class='diff'>").Append(text)
                .Append("</del>");
            break;
          case Operation.EQUAL:
            html.Append(text);
            break;
        }
      }
      return html.ToString();
    }
DiffMatchPatch.diff_match_patch diff=新的DiffMatchPatch.diff_match_patch();
列表差异=diff.diff_main(oldHtml,
新HTML);
返回diff.diff_previewHtml(差异);
公共字符串差异(列表差异){
StringBuilder html=新的StringBuilder();
foreach(差异中的差异aDiff){
字符串文本=aDiff.text;
开关(自动断开操作){
案例操作.插入:
html.Append(“”).Append(文本)
.附加(“”);
打破
案例操作。删除:
html.Append(“”).Append(文本)
.附加(“”);
打破
case Operation.EQUAL:
html.Append(文本);
打破
}
}
返回html.ToString();
}
unicode类如下所示:

using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Linq;

namespace HtmlCompare
{
    class Unicoder
    {

        private Hashtable _htmlHash = new Hashtable();
        private const string _htmlPattern = @"<(S*?)[^>]*>.*?|<.*?\/>";
        private List<string> _blockElements = "img,br".Split(',').ToList<string>();
        private int _currentHash = 44032;

        public string pushHash(string tag)
        {
            if (_htmlHash[tag] == null)
            {
                //_htmlHash[tag] = char.Parse("\\u" + Convert.ToString(_currentHash,16));
                _htmlHash[tag] = char.ConvertFromUtf32(_currentHash);
                _currentHash++;
            }
            return _htmlHash[tag].ToString();
        }

        private string tagMatch(Match tag)
        {
            return pushHash(tag.Value);
        }

        public string html2plain(string html)
        {
            MatchEvaluator tagEvaluator = new MatchEvaluator(tagMatch);
            return Regex.Replace(html, _htmlPattern, tagEvaluator, RegexOptions.IgnoreCase | RegexOptions.Multiline);
        }

        private string ProcessDiffTag(string tagStart, string tagEnd, string contents)
        {
            ArrayList diffTagParts = new ArrayList();
            MatchCollection matches = Regex.Matches(contents,
                    _htmlPattern,
                    RegexOptions.IgnoreCase | RegexOptions.Multiline);

            if (matches.Count > 0)
            {
                int contentsStringIndex = 0;
                int contentsStringEndIndex = 0;
                int lastContentStringIndex = 0;

                bool lastTag = false;
                TagDefinition definition;
                foreach (Match currentMatch in matches)
                {
                    contentsStringIndex = currentMatch.Index;
                    contentsStringEndIndex = contentsStringIndex + currentMatch.Length;

                    lastTag = (currentMatch == matches[matches.Count - 1]);


                    // did we miss text that isn't a tag?
                    if (contentsStringIndex > lastContentStringIndex)
                    {
                        definition = new TagDefinition();
                        definition.Tag = false;
                        definition.Text = contents.Substring(lastContentStringIndex, contentsStringIndex - lastContentStringIndex);
                        AddTagDefinition(diffTagParts, definition);
                    }
                    else if (lastTag && contents.Length > contentsStringEndIndex) // something after the last tag?
                    {
                        definition = new TagDefinition();
                        definition.Tag = false;
                        definition.Text = contents.Substring(contentsStringEndIndex, contents.Length - contentsStringEndIndex);
                        AddTagDefinition(diffTagParts, definition);
                    }

                    // work on current tag
                    definition = new TagDefinition();
                    definition.Tag = true;
                    definition.OpeningTag = !IsClosingTag(currentMatch.Value);
                    definition.TagType = GetTagType(currentMatch.Value);
                    definition.Text = currentMatch.Value;
                    AddTagDefinition(diffTagParts, definition);

                    lastContentStringIndex = contentsStringEndIndex;
                }

                return GoThroughDiffParts(diffTagParts,
                        tagStart,
                        tagEnd);
            }
            else
                return string.Concat(tagStart, contents, tagEnd);
        }

        private string GetTagType(string tag)
        {
            int startIndex = 1; // skip <
            if (tag.StartsWith("</"))
                startIndex = 2; // skip </
            int endIndex = tag.IndexOf(" ");
            if (endIndex == -1)
                endIndex = tag.IndexOf(">");

            return tag.Substring(startIndex, endIndex - startIndex);

        }

        private string GoThroughDiffParts(ArrayList parts, string startTag, string endTag)
        {
            IEnumerator enumerator = parts.GetEnumerator();
            StringBuilder before = new StringBuilder(string.Empty);
            StringBuilder middle = new StringBuilder(string.Empty);
            StringBuilder after = new StringBuilder(string.Empty);

            TagDefinition definition;
            while (enumerator.MoveNext())
            {
                definition = (TagDefinition)enumerator.Current;
                if (!definition.Used) // have we already used this part?
                {
                    definition.Used = true;
                    if (_blockElements.Contains(definition.TagType))
                        middle.Append(definition.Text);
                    else if (definition.MatchingIndex == -1) // no matching tag
                    {
                        if (definition.Tag) // html tag?
                        {
                            if (definition.OpeningTag)
                                before.Append(definition.Text);
                            else
                                after.Append(definition.Text);
                        }
                        else
                            middle.Append(definition.Text);
                    }
                    else
                    {
                        if (!definition.Tag) // text and has a matching tag
                        {
                            TagDefinition matchingTag = (TagDefinition)parts[definition.MatchingIndex];
                            if (matchingTag.OpeningTag)
                                matchingTag.Text += definition.Text;
                            else
                                matchingTag.Text = string.Concat(definition.Text, matchingTag.Text);
                            definition.Used = true;
                        }
                        else
                            middle.Append(definition.Text);
                    }
                }
            }

            bool includeDiffTag = true;
            if (string.IsNullOrEmpty(middle.ToString()))
                includeDiffTag = false; // we don't want the ins/del tag around nothing
            else if (string.IsNullOrWhiteSpace(middle.ToString())) // spacing should be kept
                middle = new StringBuilder("&nbsp;" + middle.Replace("\n", "<br />"));

            if(includeDiffTag)
                middle.Insert(0, startTag); // <ins>[middle]
            middle.Insert(0, before); // [before]<ins>[middle]
            if (includeDiffTag)
                middle.Append(endTag); // [before]<ins>[middle]</ins>
            middle.Append(after); // [before]<ins>[middle]</ins>[end]

            return middle.ToString();
        }

        private string DiffTagMatch(Match tag)
        {
            string tagStart = tag.Groups[1].Value;
            string tagEnd = tag.Groups[5].Value;
            string contents = tag.Groups[4].Value;
            if (string.IsNullOrEmpty(contents))
                return string.Empty; // we don't want the ins/del tag around nothing
            else if (string.IsNullOrWhiteSpace(contents)) // spacing should be kept
                return string.Concat(tagStart, "&nbsp;", contents.Replace("\n", "<br />"), tagEnd);
            else
                return ProcessDiffTag(tagStart,
                    tagEnd,
                    contents);

        }

        private bool IsClosingTag(string tag)
        {
            return tag.Contains("</") && !tag.ToLower().Contains("<img") && !tag.ToLower().Contains("<br");
        }

        public string CleanUpMisplacedDiffTags(string html)
        {
            return Regex.Replace(html, @"(\<((ins|del).*?)\>)(.*?)(\<\/((ins|del).*?)\>)", DiffTagMatch, RegexOptions.IgnoreCase | RegexOptions.Multiline);
        }

        public string plain2html(string plain)
        {
            IDictionaryEnumerator enumerator = _htmlHash.GetEnumerator();
            while (enumerator.MoveNext())
            {
                plain = Regex.Replace(plain, 
                    _htmlHash[enumerator.Key].ToString(), 
                    enumerator.Key.ToString(), 
                    RegexOptions.IgnoreCase | RegexOptions.Multiline);
            }
            return CleanUpMisplacedDiffTags(plain);
        }

        private void AddTagDefinition(ArrayList list, TagDefinition tag)
        {
            IEnumerator enumerator = list.GetEnumerator();
            TagDefinition currentDefinition;
            int index = 0;
            int insertingIndex = list.Count;
            while (enumerator.MoveNext())
            {

                currentDefinition = (TagDefinition)enumerator.Current;
                //if (!tag.OpeningTag && currentDefinition.MatchingIndex == -1)
                //    currentDefinition.MatchingIndex = insertingIndex;

                if (tag.MatchingIndex == -1 && // matching tag not found yet
                        (currentDefinition.OpeningTag && !tag.OpeningTag) && // opening & closing
                        currentDefinition.TagType == currentDefinition.TagType) // same tag type
                {
                    tag.MatchingIndex = index;
                    currentDefinition.MatchingIndex = insertingIndex;
                }
            }

            list.Add(tag);
        }

        private class TagDefinition
        {
            public bool Tag { get; set; }
            public string TagType { get; set; }
            public string Text { get; set; }
            public int MatchingIndex { get; set; }
            public bool OpeningTag { get; set; }
            public bool Used { get; set; }

            public TagDefinition()
            {
                this.Tag = false;
                this.Text = string.Empty;
                this.TagType = string.Empty;
                this.MatchingIndex = -1;
                this.OpeningTag = false;
                this.Used = false;
            }
        }
    }
}
使用系统集合;
使用System.Collections.Generic;
使用系统文本;
使用System.Text.RegularExpressions;
使用System.Linq;
名称空间HtmlCompare
{
类独角兽
{
私有哈希表_htmlHash=新哈希表();
私有常量字符串_htmlPattern=@“]*>*?|”;
私有列表_blockElements=“img,br”.Split(',').ToList();
私有int_currentHash=44032;
公共字符串pushHash(字符串标记)
{
如果(_htmlHash[tag]==null)
{
//_htmlHash[tag]=char.Parse(\\u“+Convert.ToString(\\u currentHash,16));
_htmlHash[tag]=char.ConvertFromUtf32(\u currentHash);
_currentHash++;
}
返回_htmlHash[tag].ToString();
}
私有字符串标记匹配(匹配标记)
{
返回pushHash(tag.Value);
}
公共字符串html2plain(字符串html)
{
MatchEvaluator tagEvaluator=新的MatchEvaluator(tagMatch);
返回Regex.Replace(html、_htmlPattern、tagEvaluator、RegexOptions.IgnoreCase | RegexOptions.Multiline);
}
私有字符串ProcessDiffTag(字符串标记开始、字符串标记结束、字符串内容)
{
ArrayList diffTagParts=新的ArrayList();
MatchCollection matches=Regex.matches(内容,
_htmlPattern,
RegexOptions.IgnoreCase | RegexOptions.Multiline);
如果(matches.Count>0)
{
int contentsStringIndex=0;
int contentsstringendex=0;
int lastContentStringIndex=0;
bool lastTag=false;
标记定义;
foreach(在匹配项中匹配currentMatch)
{
contentsStringIndex=currentMatch.Index;
contentsStringIndex=contentsStringIndex+currentMatch.Length;
lastTag=(currentMatch==matches[matches.Count-1]);
//我们错过了不是标签的文本了吗?
如果(contentsStringIndex>lastContentStringIndex)
{
定义=新标记定义();
definition.Tag=false;
definition.Text=contents.Substring(lastContentStringIndex,contentsStringIndex-lastContentStringIndex);
AddTagDefinition(diffTagParts,定义);
}
else if(lastTag&&contents.Length>contentsstringendex)//最后一个标记后面的内容?
{
定义=新标记定义();
definition.Tag=false;
definition.Text=contents.Substring(contentsstringendex,contents.Length-contentsstringendex);
AddTagDefinition(diffTagParts,定义);
}
//在当前标签上工作
定义=新标记定义();
definition.Tag=true;
definition.OpeningTag=!IsClosingTag(currentMatch.Value);
definition.TagType=GetTagType(currentMatch.Value);
definition.Text=currentMatc