Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/320.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 获取字符串的编码_C#_.net_Encoding_Html Agility Pack - Fatal编程技术网

C# 获取字符串的编码

C# 获取字符串的编码,c#,.net,encoding,html-agility-pack,C#,.net,Encoding,Html Agility Pack,我从另一个页面获取html,在编码方面有问题。 例如: 我得到: aparelho nas sa??das 原文是: aparelho nas saídas 如何获得编码并转换为原始字符串 我的代码: var GetResponse = API_GET("..."); //this returns html of an http request. HtmlDocument doc = new HtmlDocument(); //the html-parsing doc.LoadHtml

我从另一个页面获取html,在编码方面有问题。 例如: 我得到:

aparelho nas sa??das
原文是:

aparelho nas saídas
如何获得编码并转换为原始字符串

我的代码:

var GetResponse = API_GET("..."); //this returns html of an http request. 
HtmlDocument doc = new HtmlDocument();  //the html-parsing 
doc.LoadHtml(GetResponse);
var body = doc.DocumentNode.SelectNodes("//div[@class='para']"); 
... 
var para = body[i].InnerHtml; //Here's the problem,it returns the output like: sa??das
我是怎么做到的


提前感谢

使用此功能-类似于这样做。只检查utf-8。我认为很难检查编码

如有疑问,请将其转换为UTF-8或编码=WTF

这是部分源代码。看看IsUtf8方法。这可能非常有用

using System;
using System.IO;

namespace Unicode
{
    /// <summary>
    /// http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335
    /// 
    /// http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-8.html
    /// 
    /// http://www.unicode.org/versions/corrigendum1.html
    /// 
    /// http://www.ietf.org/rfc/rfc2279.txt
    /// 
    /// </summary>
    public class Utf8Checker : IUtf8Checker
    {
        public bool Check(string fileName)
        {
            using (BufferedStream fstream = new BufferedStream(File.OpenRead(fileName)))
            {
                return this.IsUtf8(fstream);
            }
        }

        /// <summary>
        /// Check if stream is utf8 encoded.
        /// Notice: stream is read completely in memory!
        /// </summary>
        /// <param name="stream">Stream to read from.</param>
        /// <returns>True if the whole stream is utf8 encoded.</returns>
        public bool IsUtf8(Stream stream)
        {
            int count = 4 * 1024;
            byte[] buffer;
            int read;
            while (true)
            {
                buffer = new byte[count];
                stream.Seek(0, SeekOrigin.Begin);
                read = stream.Read(buffer, 0, count);
                if (read < count)
                {
                    break;
                }
                buffer = null;
                count *= 2;
            }
            return IsUtf8(buffer, read);
        }

        /// <summary>
        /// 
        /// </summary>
        /// <param name="buffer"></param>
        /// <param name="length"></param>
        /// <returns></returns>
        public static bool IsUtf8(byte[] buffer, int length)
        {
            int position = 0;
            int bytes = 0;
            while (position < length)
            {
                if (!IsValid(buffer, position, length, ref bytes))
                {
                    return false;
                }
                position += bytes;
            }
            return true;
        }

        /// <summary>
        /// 
        /// </summary>
        /// <param name="buffer"></param>
        /// <param name="position"></param>
        /// <param name="length"></param>
        /// <param name="bytes"></param>
        /// <returns></returns>
        public static bool IsValid(byte[] buffer, int position, int length, ref int bytes)
        {
            if (length > buffer.Length)
            {
                throw new ArgumentException("Invalid length");
            }

            if (position > length - 1)
            {
                bytes = 0;
                return true;
            }

            byte ch = buffer[position];

            if (ch <= 0x7F)
            {
                bytes = 1;
                return true;
            }

            if (ch >= 0xc2 && ch <= 0xdf)
            {
                if (position >= length - 2)
                {
                    bytes = 0;
                    return false;
                }
                if (buffer[position + 1] < 0x80 || buffer[position + 1] > 0xbf)
                {
                    bytes = 0;
                    return false;
                }
                bytes = 2;
                return true;
            }

            if (ch == 0xe0)
            {
                if (position >= length - 3)
                {
                    bytes = 0;
                    return false;
                }

                if (buffer[position + 1] < 0xa0 || buffer[position + 1] > 0xbf ||
                    buffer[position + 2] < 0x80 || buffer[position + 2] > 0xbf)
                {
                    bytes = 0;
                    return false;
                }
                bytes = 3;
                return true;
            }


            if (ch >= 0xe1 && ch <= 0xef)
            {
                if (position >= length - 3)
                {
                    bytes = 0;
                    return false;
                }

                if (buffer[position + 1] < 0x80 || buffer[position + 1] > 0xbf ||
                    buffer[position + 2] < 0x80 || buffer[position + 2] > 0xbf)
                {
                    bytes = 0;
                    return false;
                }

                bytes = 3;
                return true;
            }

            if (ch == 0xf0)
            {
                if (position >= length - 4)
                {
                    bytes = 0;
                    return false;
                }

                if (buffer[position + 1] < 0x90 || buffer[position + 1] > 0xbf ||
                    buffer[position + 2] < 0x80 || buffer[position + 2] > 0xbf ||
                    buffer[position + 3] < 0x80 || buffer[position + 3] > 0xbf)
                {
                    bytes = 0;
                    return false;
                }

                bytes = 4;
                return true;
            }

            if (ch == 0xf4)
            {
                if (position >= length - 4)
                {
                    bytes = 0;
                    return false;
                }

                if (buffer[position + 1] < 0x80 || buffer[position + 1] > 0x8f ||
                    buffer[position + 2] < 0x80 || buffer[position + 2] > 0xbf ||
                    buffer[position + 3] < 0x80 || buffer[position + 3] > 0xbf)
                {
                    bytes = 0;
                    return false;
                }

                bytes = 4;
                return true;
            }

            if (ch >= 0xf1 && ch <= 0xf3)
            {
                if (position >= length - 4)
                {
                    bytes = 0;
                    return false;
                }

                if (buffer[position + 1] < 0x80 || buffer[position + 1] > 0xbf ||
                    buffer[position + 2] < 0x80 || buffer[position + 2] > 0xbf ||
                    buffer[position + 3] < 0x80 || buffer[position + 3] > 0xbf)
                {
                    bytes = 0;
                    return false;
                }

                bytes = 4;
                return true;
            }

            return false;
        }
    }
}
使用系统;
使用System.IO;
名称空间Unicode
{
/// 
/// http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335
/// 
/// http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-8.html
/// 
/// http://www.unicode.org/versions/corrigendum1.html
/// 
/// http://www.ietf.org/rfc/rfc2279.txt
/// 
/// 
公共类Utf8Checker:IUtf8Checker
{
公共布尔检查(字符串文件名)
{
使用(BufferedStream fstream=new BufferedStream(File.OpenRead(fileName)))
{
返回此.IsUtf8(fstream);
}
}
/// 
///检查流是否为utf8编码。
///注意:流在内存中完全读取!
/// 
///要从中读取的流。
///如果整个流是utf8编码的,则为True。
公共布尔IsUtf8(流)
{
整数计数=4*1024;
字节[]缓冲区;
int-read;
while(true)
{
缓冲区=新字节[计数];
stream.Seek(0,SeekOrigin.Begin);
read=stream.read(缓冲区,0,计数);
如果(读取<计数)
{
打破
}
缓冲区=空;
计数*=2;
}
返回IsUtf8(缓冲区,读取);
}
/// 
/// 
/// 
/// 
/// 
/// 
公共静态bool IsUtf8(字节[]缓冲区,整数长度)
{
int位置=0;
int字节=0;
while(位置<长度)
{
如果(!IsValid(缓冲区、位置、长度、引用字节))
{
返回false;
}
位置+=字节;
}
返回true;
}
/// 
/// 
/// 
/// 
/// 
/// 
/// 
/// 
公共静态bool有效(字节[]缓冲区,int位置,int长度,ref int字节)
{
if(长度>缓冲区长度)
{
抛出新ArgumentException(“无效长度”);
}
如果(位置>长度-1)
{
字节=0;
返回true;
}
字节ch=缓冲器[位置];
if(ch=0xc2&&ch=length-2)
{
字节=0;
返回false;
}
如果(缓冲器[位置+1]<0x80 | |缓冲器[位置+1]>0xbf)
{
字节=0;
返回false;
}
字节=2;
返回true;
}
if(ch==0xe0)
{
如果(位置>=长度-3)
{
字节=0;
返回false;
}
如果(缓冲器[位置+1]<0xa0 | |缓冲器[位置+1]>0xbf||
缓冲区[位置+2]<0x80 | |缓冲区[位置+2]>0xbf)
{
字节=0;
返回false;
}
字节=3;
返回true;
}
如果(ch>=0xe1&&ch=length-3)
{
字节=0;
返回false;
}
如果(缓冲器[位置+1]<0x80 | |缓冲器[位置+1]>0xbf||
缓冲区[位置+2]<0x80 | |缓冲区[位置+2]>0xbf)
{
字节=0;
返回false;
}
字节=3;
返回true;
}
如果(ch==0xf0)
{
如果(位置>=长度-4)
{
字节=0;
返回false;
}
如果(缓冲器[位置+1]<0x90 | |缓冲器[位置+1]>0xbf||
缓冲区[位置+2]<0x80 | |缓冲区[位置+2]>0xbf||
缓冲区[位置+3]<0x80 | |缓冲区[位置+3]>0xbf)
{
字节=0;
返回false;
}
字节=4;
返回true;
}
如果(ch==0xf4)
{
如果(位置>=长度-4)
{
字节=0;
返回false;
}
如果(缓冲器[位置+1]<0x80 | |缓冲器[位置+1]>0x8f||
缓冲区[位置+2]<0x80 | |缓冲区[位置+2]>0xbf||
缓冲区[位置+3]<0x80 | |缓冲区[位置+3]>0xbf)
{
字节=0;
返回false;
}
字节=4;
返回true;
}
如果(ch>=0xf1&&ch=length-4)
{
字节=0;
返回false;
}
如果(缓冲器[位置+1]<0x80 | |缓冲器[位置+1]>0xbf||
缓冲区[位置+2]<0x80 | |缓冲区[位置+2]>0xbf||
缓冲区[位置+3]<0x80 | |缓冲区[位置+3]>0xbf)
{
字节=0;
返回false;