C# 从文本文件中获取某些信息_C#_Regex

C# 从文本文件中获取某些信息

c# regex

C# 从文本文件中获取某些信息,c#,regex,C#,Regex,我有一个~7mb的文本文件，我想从中提取一些信息，其中包含许多类似于以下格式的实例： "name": "Riki's Dagger", "defindex": 0, "item_class": "dota_item_wearable", "item_type_name": "#DOTA_WearableType_Daggers", "item_name": "#DOTA_Ite

我有一个~7mb的文本文件，我想从中提取一些信息，其中包含许多类似于以下格式的实例：

            "name": "Riki's Dagger",
            "defindex": 0,
            "item_class": "dota_item_wearable",
            "item_type_name": "#DOTA_WearableType_Daggers",
            "item_name": "#DOTA_Item_Rikis_Dagger",
            "proper_name": false,
            "item_quality": 0,
            "image_inventory": null,
            "min_ilevel": 1,
            "max_ilevel": 1,
            "image_url": "",
            "image_url_large": "",

我想提取名称和defindex，检查此实例是否包含一些关键字，然后将其放入新的文本文件中，以便以后使用。我的计划是在文件中搜索每个“name”实例（带引号），并将下一个“name”实例之前的所有内容设置为一个名为current的变量。然后从那里搜索我需要的信息的当前字符串。这是最好的方法吗？我该怎么做？我应该使用正则表达式还是文件太大？如果您能提供一些指导，我们将不胜感激

这就是我到目前为止所做的：

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.IO;

namespace ConsoleApplication1
{
    class Test
    {
        static void Main(string[] args)
            {
            string ingameschemaFilePath = @"C:\Users\Andrew\Documents\GitHub\SteamBot\Bin\Debug\ingameschema.txt";
            string dota2schemaFilePath = @"C:\Users\Andrew\Documents\GitHub\SteamBot\Bin\Debug\dota2schema.txt";
            string schemaFilePath = @"C:\Users\Andrew\Documents\GitHub\SteamBot\Bin\Debug\schema.txt";

            string[] ingameschema = File.ReadAllLines(ingameschemaFilePath);
            string[] dota2schema = File.ReadAllLines(dota2schemaFilePath);
            string[] current = null;
            string[] name = null;
            string[] defindex = null;
            string[] rarity = null;

            using (TextWriter textWriter = new StreamWriter(schemaFilePath))
            {
                foreach (//search for "name"->"name" segment here)
                {
                    //    if current.Contains("dota_item_wearable") == false, current.Contains("announcer", "courier", "ward", "egg", "costume", "HUD", "smeevil", "taunt", "bait", "lure", "bundle" ) == true, 
                    //          break
                    }
                }
            System.Console.WriteLine("Press any key to exit.");
            System.Console.ReadKey();
    }
    }
}

我认为您应该使用

StreamReader

逐行读取文本文件，然后在该行中找到所需的信息

只有当您将文件的一部分存储到完成读取之前，才会出现问题，然后您可能会遇到内存问题（但您会惊讶地发现，在内存耗尽之前，列表和字典会变得如此之大）

<>你需要做的是尽快保存处理过的数据，而不是把它保存在内存中（或者尽可能少地存储在内存中）。范例

    static void Main(string[] args)
    {
        string sourcefile = @"C:\test\source.txt";
        string outputfile = @"C:\test\output.txt";

        string[] source = File.ReadAllLines(sourcefile);

        // The list would represent the collection of all the items 
        List<NameValueCollection> list = new List<NameValueCollection>();

        // Each nvc would represent the collection of attributes for that item
        NameValueCollection nvc = null;

        foreach (string s in source)
        {
            //Split your string into its key and value
            string[] nv = s.Split(':');

            //If the key is name you have finished your previous item, and will it to the list and start a new one
            if (nv[0] == "name")
            {
                if (nvc != null)
                    list.Add(nvc);

                nvc = new NameValueCollection();
            }
            // Add your attribute and value to the items attribute collection
            nvc.Add(nv[0], nv[1]);
        } 
    }

static void Main（字符串[]args）
{
字符串sourcefile=@“C:\test\source.txt”；
字符串outputfile=@“C:\test\output.txt”；
string[]source=File.ReadAllLines（sourcefile）；
//该列表将代表所有项目的集合
列表=新列表（）；
//每个nvc代表该项目的属性集合
NameValueCollection nvc=null；
foreach（源中的字符串s）
{
//将字符串拆分为键和值
字符串[]nv=s.Split（“：”）；
//如果键为name，则您已完成上一项，并将其添加到列表中并开始新项
如果（nv[0]=“名称”）
{
如果（nvc！=null）
列表。添加（nvc）；
nvc=新的NameValueCollection（）；
}
//将属性和值添加到items属性集合
nvc.Add（nv[0]，nv[1]）；
} 
}

7mb有点大，但以今天的记忆，你应该会很好。如果它成为一个问题，您可能会考虑使用流对象中的Read Load，而不是每次将一行加载到内存中。p>

让我知道这是否有帮助。

您能告诉我们该文件的开头和结尾吗？我觉得这是JSON，在这种情况下，你可能有一个比正则表达式更好的选择。它看起来有点像JSON，但没有大括号{}。输入文件是实际的JSON，还是只是一个长列表，每个条目元素都没有分隔？如果是JSON，有几个很棒的库可以为您将文件解析为整洁的对象。我自己也是JSON.NET的粉丝，但也有其他好的。开始：{“结果”：{“状态”：1，“项目”游戏url:“http:\/\/media.steampowered.com\/apps\/570\/scripts\/items\/items\/items游戏。****************.txt”，“品质”：{“正常”：0，“正宗”：1，“复古”：2，“不寻常”：3，“独特”：4，“社区”：5，“开发者”：6，“自制”：7，“定制”：8，“陌生”：9，“完成”：10，“闹鬼”：11，“比赛”：12，“青睐”：13好吧，如果没有JSON。读取这么大的JSON会导致一些内存问题。既然.NET的反序列化程序设计用于帮助您在使用JSON时执行此操作，他为什么要这样做JSON@DJKRAZE我想你是对的，7mb的数据不多，但在这种情况下还是会有一些限制的。我建议你读一下

J儿子

我不知道你是如何得出这些未经检验的假设的抱歉