Java 如何加速我的表转换算法?

Java 如何加速我的表转换算法?,java,algorithm,Java,Algorithm,我的任务是将字符串表从一种格式转换为另一种格式。 我使用此类转换表: import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.Map; class TableConverter { public String[] Entities; //here I store descriptive fields like DescField1, DescField2

我的任务是将字符串表从一种格式转换为另一种格式。

我使用此类转换表:

import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

class TableConverter
{
    public String[] Entities; //here I store descriptive fields like DescField1, DescField2...
    public ArrayList<String> ConvertedList; //here I store converted table rows as separate string lines 

    public TableConverter(ArrayList<String> lines) //we receive table rows as separate string lines
    {
        String[] splitted_first_line = lines.get(0).split("\t"); //split first row to get descriptive fields
        this.Entities = new String[splitted_first_line.length - 2]; //allocate size to hold all descriptive fields. -2 because last two columns is Date and Total
        System.arraycopy(splitted_first_line, 0, this.Entities, 0, this.Entities.length); //copy descriptive fields into my arr     

        //--

        int lines_sz = lines.size(); //save lines size to not recalculate it every iteration 
        Map<String, Integer> k_d_map = new HashMap<String, Integer>(); //map to store indecies of every Date column

        for (int i = 1; i < lines_sz; i++)
        {
            if (lines.get(i).isEmpty())
                continue;

            String[] splitted_line = lines.get(i).split("\t"); //splitted line on values    

            if (!k_d_map.containsKey(splitted_line[splitted_line.length - 2])) //if my map does not contain such date
                k_d_map.put(splitted_line[splitted_line.length - 2], 0); //then add it
        }

        String[] known_dates = k_d_map.keySet().toArray(new String[k_d_map.size()]);
        SortStrDates(known_dates); //I sort dates by ASC 
        k_d_map.clear(); //clear map to fill it again with correct indecies

        for (int i = 0; i < known_dates.length; i++) //refilling map and now we know every date index
            k_d_map.put(known_dates[i], i);

        //--

        Map<String, EntitySales> ESs_map = new HashMap<String, EntitySales>(); //map for rows

        for (int i = 1; i < lines_sz; i++)
        {
            if (lines.get(i).isEmpty())
                continue;

            String[] splitted_line = lines.get(i).split("\t"); //split row  
            String curr_entity = GetEntityFromLine(splitted_line); //I get set of descriptive fields separated by \t. It looks like this: asd\tqwe\t...\tzxc
            int dti = k_d_map.get(splitted_line[splitted_line.length - 2]); //I get date column index for Date stored in this row (if it was 02.2017 then index will be 0) 

            if (ESs_map.containsKey(curr_entity)) //I check if map contains row with such descriptive fields set
                ESs_map.get(curr_entity).SalesAmounts[dti] = splitted_line[splitted_line.length - 1]; //if contains, we set sale amount at date index (set 5 to 02.2017 column for example)
            else
            {
                EntitySales es = new EntitySales(curr_entity, known_dates.length); //else we create new object to hold row          
                es.SalesAmounts[dti] = splitted_line[splitted_line.length - 1]; //set sales amount at date
                ESs_map.put(curr_entity, es); //and add to map
            }
        }

        //--

        String first_row = ""; //here and below I build first row text representation, I add stored DescFields and unique dates
        this.ConvertedList = new ArrayList<String>();               

        for (int i = 0; i < this.Entities.length; i++)
            first_row += this.Entities[i] + "\t";

        for (int i = 0; i < known_dates.length; i++)
            first_row += i < known_dates.length - 1 ? known_dates[i] + "\t" : known_dates[i];

        this.ConvertedList.add(first_row);

        //--

        for (EntitySales es : ESs_map.values()) //Here I get rows as separate lines 
            this.ConvertedList.add(es.GetAsLine());
    }

    public String GetEntityFromLine(String[] line)
    {
        String[] entities = new String[line.length - 2];
        System.arraycopy(line, 0, entities, 0, entities.length);

        String entity = "";

        for (int i = 0; i < entities.length; i++)
            entity += i < entities.length - 1 ? entities[i] + "\t" : entities[i];

        return entity;
    }

    public void SortStrDates(String[] dates)
    {
        for (int i = 0; i < dates.length; i++)
            for (int j = i + 1; j < dates.length; j++)
            {
                Date dt_i = MyJunk.ConvertStrToDate(dates[i]);
                Date dt_j = MyJunk.ConvertStrToDate(dates[j]);

                if (dt_j.before(dt_i))
                {
                    String temp_i = dates[i];
                    dates[i] = dates[j];
                    dates[j] = temp_i;
                }
            }
    }
}

class EntitySales
{
    public String Entity;
    public String[] SalesAmounts;

    public EntitySales(String entity, int sales_amounts_size)
    {
        this.Entity = entity;
        this.SalesAmounts = new String[sales_amounts_size];
    }

    public String GetAsLine()
    {
        String line = this.Entity + "\t";

        for (int i = 0; i < this.SalesAmounts.length; i++)
        {
            String val = this.SalesAmounts[i] == null || this.SalesAmounts[i].isEmpty() ? "0" : this.SalesAmounts[i];
            line += i < this.SalesAmounts.length - 1 ? val + "\t" : val;
        }

        return line;
    }
}
import java.util.ArrayList;
导入java.util.Date;
导入java.util.HashMap;
导入java.util.Map;
类表转换器
{
public String[]Entities;//我在这里存储描述性字段,如DescField1、DescField2。。。
public ArrayList ConvertedList;//这里我将转换的表行存储为单独的字符串行
publictableconverter(arraylistlines)//我们将表行作为单独的字符串行接收
{
String[]splitted_first_line=lines.get(0.split(“\t”);//拆分第一行以获取描述性字段
this.Entities=new String[splitted_first_line.length-2];//分配大小以保存所有描述性字段。-2,因为最后两列是Date和Total
System.arraycopy(拆分的第一行,0,this.Entities,0,this.Entities.length);//将描述性字段复制到我的arr中
//--
int lines_sz=lines.size();//保存行大小以避免每次迭代都重新计算
Map k_d_Map=new HashMap();//映射以存储每个日期列的索引
对于(int i=1;i
它可以工作,但在巨大的桌子上速度非常慢。 我等了1小时20分钟才将800k行转换到表中并取消了任务。 仅在3分钟内就转换了20万行。我不知道我为什么会这么慢,但问题是如何大大加快我的算法?
我尝试将
整型
值分配给每一组描述性字段(asd\tqwe\t…\tzxc->0,something\telse->1),并比较没有
映射的整型,但速度较慢。

虽然您可以改进整体算法,但主要的减速可能是在
GetAsLine
函数中:

public String GetAsLine()
{
    String line = this.Entity + "\t";

    for (int i = 0; i < this.SalesAmounts.length; i++)
    {
        String val = this.SalesAmounts[i] == null || this.SalesAmounts[i].isEmpty() ? "0" : this.SalesAmounts[i];
        line += i < this.SalesAmounts.length - 1 ? val + "\t" : val;
    }

    return line;
}
p
StringBuilder line = new StringBuilder();
for (int i = 0; i < this.SalesAmounts.length; i++)
{
    String val = this.SalesAmounts[i] == null || this.SalesAmounts[i].isEmpty() ? "0" : this.SalesAmounts[i];
    line.append(val+"\t");
}
// remove final tab character
line.remove(line.length()-1, line.length()-1);

return line.toString();