Java 比较两个excel文件中的数据,并在第三个文件中写入相应的映射
我在两张excel表格中有一份药物列表,其中包含来自两个不同数据源的相应ID,例如: 来自世界卫生组织的数据: 来自其他来源的数据看起来类似,但具有不同的产品ID和其他产品。然而,物质获得ID的方式是标准的 我必须阅读这两张单独的表格,并比较一些产品是否基于其物质匹配。如果他们这样做,我必须将两张表中相应的产品ID映射到彼此。因此,我的最终工作表如下所示: 产品ID1产品ID2物质1物质2物质3物质4 注-产品可能含有100多种物质 以下是我试图解决这个问题但需要帮助的方法:Java 比较两个excel文件中的数据,并在第三个文件中写入相应的映射,java,excel,maps,Java,Excel,Maps,我在两张excel表格中有一份药物列表,其中包含来自两个不同数据源的相应ID,例如: 来自世界卫生组织的数据: 来自其他来源的数据看起来类似,但具有不同的产品ID和其他产品。然而,物质获得ID的方式是标准的 我必须阅读这两张单独的表格,并比较一些产品是否基于其物质匹配。如果他们这样做,我必须将两张表中相应的产品ID映射到彼此。因此,我的最终工作表如下所示: 产品ID1产品ID2物质1物质2物质3物质4 注-产品可能含有100多种物质 以下是我试图解决这个问题但需要帮助的方法: 从两张图纸上读取
publicstaticvoidmain(字符串[]args){
String readFile=“C:\\Users\\admin\\Desktop\\SampleData”;
HashMap productMapWHO=新HashMap();
HashMap productMapNDC=新HashMap();
productMapWHO=readExcel(0,readFile);
productMapNDC=readExcel(1,readFile);
Map WHOtoNDCMapping=newhashmap();
WHOtoNDCMapping=compareProductMaps(productMapWHO,productMapNDC);
String writeFile=“C:\\Users\\admin\\Desktop\\WHO\u NDC\u Mapping.xls”;
试一试{
writeToExcel(whotondMapping,writeFile);
}捕获(无效格式){
e、 printStackTrace();
}捕获(HPSFE异常){
e、 printStackTrace();
}
}
私有静态HashMap readExcel(int sheetNumber,字符串文件名){
HashMap productMap=新的HashMap();
试一试{
FileInputStream文件=新FileInputStream(新文件(文件名));
//创建包含对.xlsx文件引用的工作簿实例
XSSF工作簿=新XSSF工作簿(文件);
//从工作簿中获取第一张/所需的工作表
XSSFSheet sheet=workbook.getSheetAt(sheetNumber);
//逐个遍历每一行
迭代器rowIterator=sheet.Iterator();
while(roweiterator.hasNext()){
List substancelist=新建ArrayList();
行=行迭代器。下一步();
双键;
对象值=”;
//实体主义者;
迭代器cellIterator=row.cellIterator();
Cell=null;
while(cellIterator.hasNext()){
if(cell.getColumnIndex()==1)
key=cell.getNumericCellValue();
开关(cell.getCellType())
{
case Cell.Cell\u类型\u数值:
value=cell.getNumericCellValue();
打破
case Cell.Cell\u类型\u字符串:
value=cell.getStringCellValue().trim();
打破
}
Set list=productMap.get(键);
if(list==null)productMap.put(key,list=newhashset());
列表。添加(值);
}
}
}
捕获(例外e){
e、 printStackTrace();
}
返回productMap;
}
私有静态映射compareProductMaps(HashMapproductMap1、HashMapproductMap2){
Map finalMapping=new HashMap();
对于(Map.Entry:productMap1.entrySet()){
Double key=entry.getKey();
Map mappedIds=newhashmap();
for(设置值列表:productMap1.values()){
if(valueList.size()==productMap2.values().size()&&productMap2.values().containsAll(valueList))
{
Double productId2=productMap2.get(valueList);//在这里抛出错误。我想获取匹配的对应valueList的键。
mappedIds.put(productId2,valueList);
最终应用。放置(键,mappedIds);
}
}
}
返回最终应用程序;
}
私有静态void writeToExcel(映射最终映射,字符串xlsFilename)引发HPSFException,InvalidFormatException{
工作簿wb=null;
试一试{
wb=WorkbookFactory.create(新文件输入流(xlsFilename));
}捕获(EncryptedDocumentException e){
e、 printStackTrace();
}catch(filenotfounde异常){
e、 printStackTrace();
}捕获(IOE异常){
e、 printStackTrace();
}
Sheet Sheet=wb.createSheet(“WHOtoNDCMapping”);
int rowIdx=0;
int-cellIdx=0;
//标题
行hssfHeader=sheet.createRow(rowIdx);
rowIdx=1;
Row-Row=sheet.createRow(rowIdx++);
cellIdx=0;
for(Double productId1:finalMapping.keySet()){
Map m1=finalMapping.get(productId1);
Cell Cell=row.createCell(cellIdx++);
cell.setCellValue(productId1);
for(Double productId2:m1.keySet()){
设置substanceList=m1.get(productId2);
cell=row.createCell(cellIdx++);
cell.setCellValue(productId2);
for(对象实体:实体列表){
if(字符串的物质实例){
cell.setCellValue((字符串)物质);
}else if(物质实例编号){
cell.setCellValue(((数字)物质).doubleValue());
}否则{
抛出新的RuntimeException(“无效类型的单元格值”+实体);
}
}
}
}
试一试{
FileOutputStream out=新的FileOutputStream(xlsFilename);
wb.写(出);
out.close();
}捕获(IOE异常){
抛出新的HPSFException(例如getMessage());
}
}
我不会使用maps of maps of maps等,而是构建一个正确表示产品的类。如果两个文件的结构相同,您可以使用类似这样的内容(简化,我将为您留下一些工作)
public static void main(String[] args) {
String readFile = "C:\\Users\\admin\\Desktop\\SampleData";
HashMap<Double, Set<Object>> productMapWHO = new HashMap<Double, Set<Object>>();
HashMap<Double, Set<Object>> productMapNDC = new HashMap<Double, Set<Object>>();
productMapWHO = readExcel(0, readFile);
productMapNDC = readExcel(1, readFile);
Map<Double,Map<Double,Set<Object>>> WHOtoNDCMapping = new HashMap<Double,Map<Double,Set<Object>>>();
WHOtoNDCMapping = compareProductMaps(productMapWHO,productMapNDC);
String writeFile = "C:\\Users\\admin\\Desktop\\WHO_NDC_Mapping.xls";
try {
writeToExcel(WHOtoNDCMapping,writeFile);
} catch (InvalidFormatException e) {
e.printStackTrace();
} catch (HPSFException e) {
e.printStackTrace();
}
}
private static HashMap<Double, Set<Object>> readExcel(int sheetNumber, String fileName) {
HashMap<Double, Set<Object>> productMap = new HashMap<Double, Set<Object>>();
try {
FileInputStream file = new FileInputStream(new File(fileName));
//Create Workbook instance holding reference to .xlsx file
XSSFWorkbook workbook = new XSSFWorkbook(file);
//Get first/desired sheet from the workbook
XSSFSheet sheet = workbook.getSheetAt(sheetNumber);
//Iterate through each rows one by one
Iterator<Row> rowIterator = sheet.iterator();
while (rowIterator.hasNext()) {
List<String> substancelist = new ArrayList<String>();
Row row = rowIterator.next();
double key;
Object value="";
//substancelist.clear();
Iterator<Cell> cellIterator = row.cellIterator();
Cell cell =null;
while (cellIterator.hasNext()) {
if(cell.getColumnIndex() == 1)
key = cell.getNumericCellValue();
switch (cell.getCellType())
{
case Cell.CELL_TYPE_NUMERIC:
value = cell.getNumericCellValue();
break;
case Cell.CELL_TYPE_STRING:
value = cell.getStringCellValue().trim();
break;
}
Set<Object> list = productMap.get(key);
if (list == null) productMap.put(key, list = new HashSet<Object>());
list.add(value);
}
}
}
catch (Exception e) {
e.printStackTrace();
}
return productMap;
}
private static Map<Double,Map<Double,Set<Object>>> compareProductMaps (HashMap<Double, Set<Object>>productMap1, HashMap<Double, Set<Object>>productMap2) {
Map<Double,Map<Double,Set<Object>>> finalMapping = new HashMap<Double,Map<Double,Set<Object>>>();
for(Map.Entry<Double, Set<Object>> entry : productMap1.entrySet()) {
Double key = entry.getKey();
Map<Double,Set<Object>> mappedIds = new HashMap<Double, Set<Object>>();
for(Set<Object> valueList : productMap1.values()) {
if (valueList.size() == productMap2.values().size() && productMap2.values().containsAll(valueList))
{
Double productId2 = productMap2.get(valueList); //throws error here. I want to get the key for the corresponding valuelist that matched.
mappedIds.put(productId2,valueList);
finalMapping.put(key,mappedIds);
}
}
}
return finalMapping;
}
private static void writeToExcel(Map<Double,Map<Double,Set<Object>>> finalMapping, String xlsFilename) throws HPSFException, InvalidFormatException {
Workbook wb = null;
try {
wb = WorkbookFactory.create(new FileInputStream(xlsFilename));
} catch (EncryptedDocumentException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
Sheet sheet = wb.createSheet("WHOtoNDCMapping");
int rowIdx = 0;
int cellIdx = 0;
// Header
Row hssfHeader = sheet.createRow(rowIdx);
rowIdx = 1;
Row row = sheet.createRow(rowIdx++);
cellIdx = 0;
for(Double productId1 : finalMapping.keySet()) {
Map<Double,Set<Object>> m1 = finalMapping.get(productId1);
Cell cell = row.createCell(cellIdx++);
cell.setCellValue(productId1);
for(Double productId2 : m1.keySet()) {
Set<Object> substanceList = m1.get(productId2);
cell = row.createCell(cellIdx++);
cell.setCellValue(productId2);
for (Object substance : substanceList){
if (substance instanceof String) {
cell.setCellValue((String) substance);
} else if (substance instanceof Number) {
cell.setCellValue(((Number) substance).doubleValue());
} else {
throw new RuntimeException("Cell value of invalid type " + substance);
}
}
}
}
try {
FileOutputStream out = new FileOutputStream(xlsFilename);
wb.write(out);
out.close();
} catch (IOException e) {
throw new HPSFException(e.getMessage());
}
}
class ExcelProduct {
String productId;
String productName;
Set<String> substanceIds; //assuming order is not relevant, otherwise use a list
}
for( ExcelProduct leftProduct : leftMap.values() ) {
ExcelProduct rightProduct = rightMap.get(leftProduct.productId);
//product not present in right map so skip
if( rightProduct == null ) {
continue;
}
//compare products here, e.g. comparing the substance ids
if( leftProduct.substanceIds.equals( rightProduct.substanceIds) ) {
//do whatever is needed, e.g. add the product to the result list which will be written to the result excel file
//you probably don't need a result map here
}
}