使用Java中的group by聚合CSV数据
我需要用Java中的GROUPBY聚合CSV数据 我的csv文件如下所示:使用Java中的group by聚合CSV数据,csv,opencsv,Csv,Opencsv,我需要用Java中的GROUPBY聚合CSV数据 我的csv文件如下所示: Numero, NumeroWsn, NoeudAdress, PacketRece, NoeudsRece, Hello 1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b4, Hello #33 1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b986, Hello #33
Numero, NumeroWsn, NoeudAdress, PacketRece, NoeudsRece, Hello
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b4, Hello #33
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b986, Hello #33
1436136640477415,wsn430-8,NA:b27b,Packet recevied from,RX: bc2d, Hello #33
1436136640477566,wsn430-8,NA:b27b,Packet recevied from,RX: b36b, Hello #34
1436136640477716,wsn430-8,NA:b27b,Packet recevied from,RX: bcb6, Hello #35
1436136640477995,wsn430-9,NA:bc2d,Packet recevied from,RX: 1f9e, Hello #33
1436136640478162,wsn430-9,NA:bc2d,Packet recevied from,RX: be29, Hello #33
1436136640478313,wsn430-9,NA:bc2d,Packet recevied from,RX: b61a, Hello #32
1436136640478462,wsn430-9,NA:bc2d,Packet recevied from,RX: c735, Hello #32
1436136640478612,wsn430-9,NA:bc2d,Packet recevied from,RX: bb0a, Hello #32
1436136640478760,wsn430-9,NA:bc2d,Packet recevied from,RX: b6bc, Hello #33
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b1, Hello #42
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b984, Hello #44
是否有一种方法可以通过使用Java按noeuddress分组并将NoeudsRece计数器显示为如下所示的列来聚合这些数据
NoeudsAdresse,NumberOfNoeudsRece
b27b ,7
bc2d ,6
我曾想过使用OpenCSV将CSV文件加载到列表中,但对于具有数百万行的CSV文件来说,这是否有效 您可以逐行读取文件中的数据,用逗号拆分并将各个值保存在字符串数组中,然后创建一个哈希映射,其中键作为与noeuddress对应的索引,值作为需要计数器的列的数组列表。在这种情况下,计数器的大小将与相应的Arraylist相同 编辑:这是OpenCSV导入整个csv的条件变体。我们不是将整个csv加载到内存中,而是只导入我们需要的数据和我们需要的特定格式。这将比原始方法执行得更好。使用H2而不是OpenCSV 去掉标题行并将其放入名为DATA.CSV的文件中 从这里下载h2 jar文件: 然后运行这个代码
import java.io.File;
import java.net.URISyntaxException;
import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class CSVLoader {
public static final String getApplicationPath(Class<?> mainClass) throws URISyntaxException {
return getApplicationDirectory(mainClass).getAbsolutePath();
}
public CSVLoader() {
executeStatement(getDropTableStatement(), false);
executeStatement(getCreateTableStatement(), false);
executeStatement(getInsertStatement(), false);
executeStatement(getSelectStatement(), true);
}
public static final String getDropTableStatement() {
String SQLString = "DROP TABLE DATA IF EXISTS;\n";
return SQLString;
}
public static final String getSelectStatement() {
String SQLString = "SELECT NOEUDADRESS, COUNT(NOEUDSRECE) FROM DATA GROUP BY NOEUDADRESS;\n";
return SQLString;
}
public static final String getCreateTableStatement() {
String SQLString = "CREATE TABLE DATA(\n";
SQLString += " NUMERO VARCHAR(100),\n";
SQLString += " NUMEROWSN VARCHAR(100),\n";
SQLString += " NOEUDADRESS VARCHAR(100),\n";
SQLString += " PACKETRECE VARCHAR(100),\n";
SQLString += " NOEUDSRECE VARCHAR(100),\n";
SQLString += " HELLO VARCHAR(100))";
return SQLString;
}
public static final String getInsertStatement() {
return "INSERT INTO DATA SELECT * FROM CSVREAD('DATA.CSV')";
}
public void executeStatement(String sql, boolean withResultSet) {
Connection connection = null;
Statement statement = null;
ResultSet resultSet = null;
try {
File file = getApplicationDirectory(CSVLoader.class);
Class.forName("org.h2.Driver");
connection = DriverManager.getConnection("jdbc:h2:" + file.getAbsolutePath() + File.separator + "storage", "sa", "secret");
statement = connection.createStatement();
if(withResultSet) {
resultSet = statement.executeQuery(sql);
while(resultSet.next()) {
System.out.println("-->" + resultSet.getString(1) + "\t" + resultSet.getString(2));
}
}
else {
statement.execute(sql);
}
}
catch (URISyntaxException e) {
e.printStackTrace();
}
catch (ClassNotFoundException e) {
e.printStackTrace();
}
catch (SQLException e) {
e.printStackTrace();
}
finally {
try {
if(resultSet != null) {
resultSet.close();
}
if(statement != null) {
statement.close();
}
if(connection != null) {
connection.close();
}
}
catch (SQLException e) {
e.printStackTrace();
statement = null;
connection = null;
}
}
}
private static final File getApplicationDirectory(Class<?> mainClass) throws URISyntaxException {
URL url = mainClass.getProtectionDomain().getCodeSource().getLocation();
File file = new File(url.toURI());
return file.getParentFile();
}
public static void main(String[] args) {
new CSVLoader();
}
}
您也可以从控制台运行它:当您在控制台中输入此命令时,上面的示例也可以运行:
CREATE TABLE TEST(
NUMERO VARCHAR(100),
NUMEROWSN VARCHAR(100),
NOEUDADRESS VARCHAR(100),
PACKETRECE VARCHAR(100),
NOEUDSRECE VARCHAR(100),
HELLO VARCHAR(100)
) AS SELECT * FROM CSVREAD('C:\\ECLIPSE\\WORKSPACE\\H2\\DATA.CSV')
确保使用数据文件的完整路径如果非java解决方案正常,可以在如下命令行中完成:cat FileWithData | awk-F,{print$3}| awk-F:{print$2}“| sort | uniq-c感谢您的回答,但我对java编程一无所知,为什么您在问题中标记了java?我有一个相同的问题,我使用从CSV文件导入数据时会这样做:从CSVREAD'TEST.CSV'将表TEST创建为SELECT*;从CSVREAD'test.csv'中创建表TESTID INT主键,将VARCHAR255命名为SELECT*;那对你有用吗?我用你的数据测试了它,它工作了第一步:我下载h2 jar文件h2控制台第二步:导入通用的csv文件。在h2控制台中,它是通用的h2嵌入的。很抱歉,但是我如何才能将我的csv文件导入h2控制台我尝试了好几次,但都遇到同样的问题,再次感谢你
-->NA:b27b 7
-->NA:bc2d 6
CREATE TABLE TEST(
NUMERO VARCHAR(100),
NUMEROWSN VARCHAR(100),
NOEUDADRESS VARCHAR(100),
PACKETRECE VARCHAR(100),
NOEUDSRECE VARCHAR(100),
HELLO VARCHAR(100)
) AS SELECT * FROM CSVREAD('C:\\ECLIPSE\\WORKSPACE\\H2\\DATA.CSV')