C# 将html拆分为单词
假设我有以下字符串:C# 将html拆分为单词,c#,html,split,C#,Html,Split,假设我有以下字符串: Hellotoevryone<img height="115" width="150" alt="" src="/Content/Edt/image/b4976875-8dfb-444c-8b32-cc b47b2d81e0.jpg" />Iamsogladtoseeall. HellotoEvryoneiamsoGladtoseall。 using System; using System.Text.RegularExpressions; using Sys
Hellotoevryone<img height="115" width="150" alt="" src="/Content/Edt/image/b4976875-8dfb-444c-8b32-cc b47b2d81e0.jpg" />Iamsogladtoseeall.
HellotoEvryoneiamsoGladtoseall。
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
这个字符串表示一系列字符,这些字符之间没有空格,在这个字符串中还插入了一个html图像。现在我想把字符串分成单词,每个单词的长度为10个字符,因此AOUT应该是:
1)Hellotoevr
2)yone<img height="115" width="150" alt="" src="/Content/Edt/image/b4976875-8dfb-444c-8b32-cc b47b2d81e0.jpg" />Iamsog
3)ladtoseeal
4)l.
1)Hellotevr
2) 尤尼亚姆索格
3) 拉多赛尔
4) l。
因此,我们的想法是将任何html标记内容保留为0长度的字符
我编写了这样一个方法,但它没有考虑html标记:
public static string EnsureWordLength(this string target, int length)
{
string[] words = target.Split(' ');
for (int i = 0; i < words.Length; i++)
if (words[i].Length > length)
{
var possible = true;
var ord = 1;
do
{
var lengthTmp = length*ord+ord-1;
if (lengthTmp < words[i].Length) words[i] = words[i].Insert(lengthTmp, " ");
else possible = false;
ord++;
} while (possible);
}
return string.Join(" ", words);
}
publicstaticstringensurewordlength(这个字符串目标,int-length)
{
string[]words=target.Split(“”);
for(int i=0;i长度)
{
var可能=真;
var-ord=1;
做
{
var lengthTmp=长度*ord+ord-1;
如果(lengthmp
我希望看到一个代码,它可以执行我所描述的拆分。谢谢。下面的代码将处理您提供的案例,但对于任何更复杂的情况,都会中断。另外,由于您没有指定它应该如何处理带有内部文本或HTML的长格式标记,因此它将所有标记都视为短格式标记(运行代码以了解我的意思) 使用此输入: Hellotoevryone<img height="115" width="150" alt="" src="/Content/Edt/image/b4976875-8dfb-444c-8b32-cc b47b2d81e0.jpg" />Iamsogladtoseeall. Hellotoevryone<img src="/Content/Edt/image/b4976875-8dfb-444c-8b32-cc b47b2d81e0.jpg" />Iamsoglad<img src="baz.jpeg" />toseeall. Hello<span class="foo">toevryone</span>Iamso<em>glad</em>toseeallTheQuickBrown<img src="bar.jpeg" />FoxJumpsOverTheLazyDog. Hello<span class="foo">toevryone</span>Iamso<em>glad</em>toseeall. Loremipsumdolorsitamet,consecteturadipiscingelit.Nullamacnibhelit,quisvolutpatnunc.Donecultrices,ipsumquisaccumsanconvallis,tortortortorgravidaante,etsollicitudinipsumnequeeulorem. 大家好,我很高兴见到你们。
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
大家好,我很高兴见到你们。
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
他每个人都很高兴看到所有的快速浏览信息跳转。
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
大家好,我很高兴见到你们。
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
知识产权保护、公民权利保护、公民权利保护、公民权利保护、公民权利保护、侵权行为保护、公民权利保护等。
使用此输入中断(请注意不完整的标记):
Hellotoevryone
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
使用系统;
使用System.Text.RegularExpressions;
使用System.IO;
使用System.Collections.Generic;
公共静态类CustomSplit{
公共静态void Main(字符串[]args){
如果(args.Length>0&&File.Exists(args[0])){
StreamReader sr=新的StreamReader(args[0]);
String[]lines=sr.ReadToEnd().Split(新字符串[]{Environment.NewLine},StringSplitOptions.None);
int计数器=0;
foreach(行中的字符串行){
控制台.WriteLine(“{0}行、++计数器”);
控制台写入线(行);
控制台写入线(行长度(10));
}
}
}
}
publicstatic类确保了扩展{
公共静态字符串长度(此字符串目标,int-length){
列表单词=新列表();
添加(新列表());
for(int i=0;i
以下代码将处理您提供的案例,但对于任何更复杂的情况,都会中断。另外,由于您没有指定它应该如何处理带有内部文本或HTML的长格式标记,因此它将所有标记都视为短格式标记(运行代码以了解我的意思)
使用此输入:
Hellotoevryone<img height="115" width="150" alt="" src="/Content/Edt/image/b4976875-8dfb-444c-8b32-cc b47b2d81e0.jpg" />Iamsogladtoseeall.
Hellotoevryone<img src="/Content/Edt/image/b4976875-8dfb-444c-8b32-cc b47b2d81e0.jpg" />Iamsoglad<img src="baz.jpeg" />toseeall.
Hello<span class="foo">toevryone</span>Iamso<em>glad</em>toseeallTheQuickBrown<img src="bar.jpeg" />FoxJumpsOverTheLazyDog.
Hello<span class="foo">toevryone</span>Iamso<em>glad</em>toseeall.
Loremipsumdolorsitamet,consecteturadipiscingelit.Nullamacnibhelit,quisvolutpatnunc.Donecultrices,ipsumquisaccumsanconvallis,tortortortorgravidaante,etsollicitudinipsumnequeeulorem.
大家好,我很高兴见到你们。
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
大家好,我很高兴见到你们。
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
他每个人都很高兴看到所有的快速浏览信息跳转。
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
大家好,我很高兴见到你们。
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
知识产权保护、公民权利保护、公民权利保护、公民权利保护、公民权利保护、侵权行为保护、公民权利保护等。
使用此输入中断(请注意不完整的标记):
Hellotoevryone
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
使用系统;
使用System.Text.RegularExpressions;
使用System.IO;
使用System.Collections.Generic;
公共静态类CustomSplit{
公共静态void Main(字符串[]args){
如果(args.Length>0&&File.Exists(args[0])){
StreamReader sr=新的StreamReader(args[0]);
String[]lines=sr.ReadToEnd().Split(新字符串[]{Environment.NewLine},StringSplitOptions.None);
int计数器=0;
foreach(行中的字符串行){
控制台.WriteLine(“{0}行、++计数器”);
控制台写入线(行);
控制台写入线(行长度(10));
}
}
}
}
publicstatic类确保了扩展{
公共静态字符串长度(此字符串目标,int-length){
列表单词=新列表();
添加(新列表());
for(int i=0;i
这是一个符合您需求的正则表达式解决方案。请记住,如果您决定稍微改变一下您的需求,这可能不起作用,因为这是忠实于客户的
使用System.Text.regular表达式;
字符串[]示例={
@“你好,我很高兴见到你。”,
using System;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
public static class CustomSplit {
public static void Main(String[] args) {
if (args.Length > 0 && File.Exists(args[0])) {
StreamReader sr = new StreamReader(args[0]);
String[] lines = sr.ReadToEnd().Split(new String[]{Environment.NewLine}, StringSplitOptions.None);
int counter = 0;
foreach (String line in lines) {
Console.WriteLine("########### Line {0} ###########", ++counter);
Console.WriteLine(line);
Console.WriteLine(line.EnsureWordLength(10));
}
}
}
}
public static class EnsureWordLengthExtension {
public static String EnsureWordLength(this String target, int length) {
List<List<Char>> words = new List<List<Char>>();
words.Add(new List<Char>());
for (int i = 0; i < target.Length; i++) {
words[words.Count - 1].Add(target[i]);
if (target[i] == '<') {
do {
i++;
words[words.Count - 1].Add(target[i]);
} while(target[i] != '>');
}
if ((new String(words[words.Count - 1].ToArray())).CountCharsWithoutTags() == length) {
words.Add(new List<Char>());
}
}
String[] result = new String[words.Count];
for (int j = 0; j < words.Count; j++) {
result[j] = new String(words[j].ToArray());
}
return String.Join(" ", result);
}
private static int CountCharsWithoutTags(this String target) {
return Regex.Replace(target, "<.*?>", "").Length;
}
}
“测试123你好,世界”,
@“测试”,
@“布拉布拉赫”,
@“Test”,//有空格,不匹配
“堆栈溢出”//有空格,不匹配
};
//如果不想使用rege,请使用这两行