Javascript 如何构造正则表达式来拆分此文本?

Javascript 如何构造正则表达式来拆分此文本?,javascript,html,regex,Javascript,Html,Regex,大家好,我正在写一个脚本,主要思想是我有一个固定结构的文本,如下所示: "RBD|X|RBD|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa" "RBD|X|RBD|C|92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa" "RBD|X|RBD|C|92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa" "RBD|X|RBD|C|92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa" "R

大家好,我正在写一个脚本,主要思想是我有一个固定结构的文本,如下所示:

"RBD|X|RBD|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXA.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXC.NAYE" "SAMBORNSiPOSSSTHRa"
splitWords = [RBD,X,RBD,C,92173,GJHGWO.NAYE,SAMBORNSiPOSSSTHRa]
<!DOCTYPE html>
<html>

<body>
<p id="demo"></p>

<textarea cols=150 rows=15 id="texto">
"RBD|X|RBD|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXA.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXC.NAYE" "SAMBORNSiPOSSSTHRa"
</textarea>

<script>
var splitWords = document.getElementById("texto").value.split("|");
document.write(splitWords.toString());
</script>

</body>
</html>
我想处理该文本,我想用以下符号分割该文本: |“~,管道双引号和~,我想创建一个数组来存储这些值,如下所示:

"RBD|X|RBD|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXA.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXC.NAYE" "SAMBORNSiPOSSSTHRa"
splitWords = [RBD,X,RBD,C,92173,GJHGWO.NAYE,SAMBORNSiPOSSSTHRa]
<!DOCTYPE html>
<html>

<body>
<p id="demo"></p>

<textarea cols=150 rows=15 id="texto">
"RBD|X|RBD|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXA.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXC.NAYE" "SAMBORNSiPOSSSTHRa"
</textarea>

<script>
var splitWords = document.getElementById("texto").value.split("|");
document.write(splitWords.toString());
</script>

</body>
</html>
为了实现这一目标,我尝试:

var splitWords = document.getElementById("texto").value.split("|");
document.write(stringArray.toString());
我得到:

"RBD,X,RBD,C,92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa" "RBD,X,RBD,C,92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa" "RBD,X,RBD,C,92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa" "RBD,X,RBD,C,92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa" "RBD,X,RBD,C,92173~GJHGXA.NAYE" "SAMBORNSiPOSSSTHRa" "RBD,X,RBD,C,92173~GJHGXC.NAYE" "SAMBORNSiPOSSSTHRa"
问题是,这只是用管道分割文本,我也想用其他符号分割文本,以获得我想要的输出。 完整的代码如下所示:

"RBD|X|RBD|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXA.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXC.NAYE" "SAMBORNSiPOSSSTHRa"
splitWords = [RBD,X,RBD,C,92173,GJHGWO.NAYE,SAMBORNSiPOSSSTHRa]
<!DOCTYPE html>
<html>

<body>
<p id="demo"></p>

<textarea cols=150 rows=15 id="texto">
"RBD|X|RBD|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX4.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX6.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGX8.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXA.NAYE" "SAMBORNSiPOSSSTHRa"
"RBD|X|RBD|C|92173~GJHGXC.NAYE" "SAMBORNSiPOSSSTHRa"
</textarea>

<script>
var splitWords = document.getElementById("texto").value.split("|");
document.write(splitWords.toString());
</script>

</body>
</html>

“RBD | X | RBD | C | 92173~GJHGWO.NAYE”“samborniposssthra” “RBD | X | RBD | C | 92173~GJHGX4.NAYE”“Samborniposssthra” “RBD | X | RBD | C | 92173~GJHGX6.NAYE”“samborniposssthra” “RBD | X | RBD | C | 92173~GJHGX8.NAYE”“samborniposssthra” “RBD | X | RBD | C | 92173~GJHGXA.NAYE”“Sambornipossthra” “RBD | X | RBD | C | 92173~GJHGXC.NAYE”“Samborniposssthra” var splitWords=document.getElementById(“texto”).value.split(“|”); document.write(splitWords.toString());

我非常感谢任何关于正则表达式的建议来实现这一点。

使用正则表达式:

str = '"RBD|X|RBD|C|92173~GJHGWO.NAYE" "SAMBORNSiPOSSSTHRa"';
str.split(/[\|"~\s]+/).filter(Boolean); // Output: ["RBD", "X", "RBD", "C", "92173", "GJHGWO.NAYE", "SAMBORNSiPOSSSTHRa"]

如果还想过滤句点,请将其添加到正则表达式的方括号中,并用反斜杠将其转义。

好的,让我们开始…获取
textarea
值并修剪它

var splitWords = document.getElementById("texto").value.trim();
首先,您需要替换
符号

splitWords = splitWords.replace(/"/g, '');
然后分割行,因为它就像表中的行

splitWords = splitWords.split('\n');
然后将每一行拆分为两行,
|
~

splitWords.forEach(function(rowValue,rowIndex) {
    splitWords[rowIndex] = rowValue.split(/[|~ ]/);
    console.log(rowIndex, splitWords[rowIndex]);
});
Console.log输出将为:

0 ["RBD", "X", "RBD", "C", "92173", "GJHGWO.NAYE", "SAMBORNSiPOSSSTHRa"]
1 ["RBD", "X", "RBD", "C", "92173", "GJHGX4.NAYE", "SAMBORNSiPOSSSTHRa"]
2 ["RBD", "X", "RBD", "C", "92173", "GJHGX6.NAYE", "SAMBORNSiPOSSSTHRa"]
3 ["RBD", "X", "RBD", "C", "92173", "GJHGX8.NAYE", "SAMBORNSiPOSSSTHRa"]
4 ["RBD", "X", "RBD", "C", "92173", "GJHGXA.NAYE", "SAMBORNSiPOSSSTHRa"]
5 ["RBD", "X", "RBD", "C", "92173", "GJHGXC.NAYE", "SAMBORNSiPOSSSTHRa"]
然后用二维数组做任何你想做的事我的建议是:

splitWords.forEach(function(rowValue,rowIndex) {
    splitWords[rowIndex] = rowValue.split(/[|~ ]/);
    console.log(rowIndex, splitWords[rowIndex]);
});

“RBD | X | RBD | C | 92173~GJHGWO.NAYE”“samborniposssthra” “RBD | X | RBD | C | 92173~GJHGX4.NAYE”“Samborniposssthra” “RBD | X | RBD | C | 92173~GJHGX6.NAYE”“samborniposssthra” “RBD | X | RBD | C | 92173~GJHGX8.NAYE”“samborniposssthra” “RBD | X | RBD | C | 92173~GJHGXA.NAYE”“Sambornipossthra” “RBD | X | RBD | C | 92173~GJHGXC.NAYE”“Samborniposssthra” var lines=document.getElementById(“texto”).value.split('\n'); var splitWords=lines.filter(函数(v){返回v.length>0}) .map(函数(当前值、索引){ 返回currentValue.trim().replace(/^“([^“]+)”\s“([^“]+)”$/,“$1$2”).split(/[| ~]/); }); log(JSON.stringify(splitWords,null,4));
text.split(/[\\\\\\~]/)split接受正则表达式。
/\\\\\\\\\\\\\\\\\\\\\./g
您要求一个正则表达式按特定字符分割文本,但这并不能解决您的问题:您希望得到一个多维数组,其中每行包含条目,其中每行用您的数据分隔为两个“单词”通过空格。@user138717您确定您的单个值不能包含任何用于拆分的字符,例如,不能使用“Sambornipossthra”而不是“SA BORNSiPOSSTHRa”吗等等?如果不是,最好使用更精细的正则表达式匹配策略。好的,谢谢,我会验证它。布尔构造函数的巧妙使用。我非常感谢支持,这非常有用。谢谢,这非常有用,我感谢支持。