package com.tudicloud.module.medcoding.service.dictionary;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static com.tudicloud.framework.common.core.util.core.StringUtils.isBlank;
public class MedDRADataParser {
// 基础名称映射(专属asc文件)
private Map<String, String> lltNameMap = new HashMap<>();
private Map<String, String> ptNameMap = new HashMap<>();
private Map<String, String> hltNameMap = new HashMap<>();
private Map<String, String> hlgtNameMap = new HashMap<>();
private Map<String, String> socNameMap = new HashMap<>();
// 从mdhier补充的名称映射(兜底用)
private Map<String, String> ptNameMdhierMap = new HashMap<>();
private Map<String, String> hltNameMdhierMap = new HashMap<>();
private Map<String, String> hlgtNameMdhierMap = new HashMap<>();
private Map<String, String> socNameMdhierMap = new HashMap<>();
// 层级关联
private Map<String, String> lltToPtMap = new HashMap<>();
private Map<String, String> ptToHltMap = new HashMap<>();
private Map<String, String> hltToHlgtMap = new HashMap<>();
private Map<String, String> hlgtToSocMap = new HashMap<>();
// 状态字段
private Map<String, String> lltToCurrentMap = new HashMap<>();
private Map<String, String> ptToMainSocMap = new HashMap<>();
/**
* 读取asc文件(保留所有列,包括空列)
*/
private List<List<String>> readAscFile(String filePath) {
List<List<String>> data = new ArrayList<>();
File file = new File(filePath);
if (!file.exists()) {
System.out.println("⚠️ 文件不存在:" + filePath);
return data;
}
try (BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(file), StandardCharsets.UTF_8))) {
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.isEmpty()) continue;
// 分割时保留空列(-1参数),保证列索引绝对准确
String[] fields = line.split("\\$", -1);
List<String> fieldList = new ArrayList<>();
for (String field : fields) {
fieldList.add(field.trim());
}
data.add(fieldList);
}
} catch (IOException e) {
System.err.println("❌ 读取文件失败:" + filePath);
e.printStackTrace();
}
return data;
}
/**
* 加载所有映射(核心:双来源兜底)
*/
public void loadAllMappings(String path) {
// 1. 加载所有文件
List<List<String>> lltData = readAscFile(path + "llt.asc");
List<List<String>> ptData = readAscFile(path + "pt.asc");
List<List<String>> hltData = readAscFile(path + "hlt.asc");
List<List<String>> hlgtData = readAscFile(path + "hlgt.asc");
List<List<String>> socData = readAscFile(path + "soc.asc");
List<List<String>> mdhierData = readAscFile(path + "mdhier.asc");
// 2. 加载基础名称映射(专属asc文件)
loadBasicNameMap(lltData, lltNameMap, 0, 1, "LLT");
loadBasicNameMap(ptData, ptNameMap, 0, 1, "PT");
loadBasicNameMap(hltData, hltNameMap, 0, 1, "HLT");
loadBasicNameMap(hlgtData, hlgtNameMap, 0, 1, "HLGT");
loadBasicNameMap(socData, socNameMap, 0, 1, "SOC");
// 3. 从mdhier加载名称+关联(兜底核心)
loadMdhierData(mdhierData);
// 4. 加载LLT→PT + 现行状态
loadLltRelatedData(lltData);
// 日志:验证加载结果
System.out.println("\n=== 加载验证 ===");
System.out.println("HLT名称(专属文件):" + hltNameMap.size() + " 条");
System.out.println("HLT名称(mdhier兜底):" + hltNameMdhierMap.size() + " 条");
System.out.println("HLGT名称(专属文件):" + hlgtNameMap.size() + " 条");
System.out.println("HLGT名称(mdhier兜底):" + hlgtNameMdhierMap.size() + " 条");
}
/**
* 加载基础名称映射(专属asc文件)
*/
private void loadBasicNameMap(List<List<String>> data, Map<String, String> map,
int keyIdx, int valIdx, String type) {
int count = 0;
for (List<String> row : data) {
if (row.size() <= Math.max(keyIdx, valIdx)) continue;
String key = row.get(keyIdx);
String val = row.get(valIdx);
if (!key.isEmpty() && !val.isEmpty()) {
map.put(key, val);
count++;
}
}
System.out.println(type + " 名称加载:" + count + " 条");
}
/**
* 加载mdhier数据(核心:关联关系 + 名称兜底)
* mdhier行结构:0=PT编号,2=HLT编号,3=HLGT编号,10=SOC编号
* 4=PT名称,5=HLT名称,6=HLGT名称,7=SOC名称
*/
private void loadMdhierData(List<List<String>> mdhierData) {
int count = 0;
for (List<String> row : mdhierData) {
if (row.size() < 12) continue;
// 1. 提取编号
String ptId = row.get(0);
String hltId = row.get(2);
String hlgtId = row.get(3);
String socId = row.get(10);
// 2. 提取名称(mdhier里的名称直接兜底)
String ptName = row.get(4);
String hltName = row.get(5);
String hlgtName = row.get(6);
String socName = row.get(7);
// 3. 提取主SOC
String mainSoc = row.get(11);
// 4. 填充关联关系
if (!ptId.isEmpty() && !hltId.isEmpty()) ptToHltMap.put(ptId, hltId);
if (!hltId.isEmpty() && !hlgtId.isEmpty()) hltToHlgtMap.put(hltId, hlgtId);
if (!hlgtId.isEmpty() && !socId.isEmpty()) hlgtToSocMap.put(hlgtId, socId);
// 5. 填充mdhier名称兜底(为空才存)
if (!ptId.isEmpty() && !ptName.isEmpty() && !ptNameMdhierMap.containsKey(ptId)) {
ptNameMdhierMap.put(ptId, ptName);
}
if (!hltId.isEmpty() && !hltName.isEmpty() && !hltNameMdhierMap.containsKey(hltId)) {
hltNameMdhierMap.put(hltId, hltName);
}
if (!hlgtId.isEmpty() && !hlgtName.isEmpty() && !hlgtNameMdhierMap.containsKey(hlgtId)) {
hlgtNameMdhierMap.put(hlgtId, hlgtName);
}
if (!socId.isEmpty() && !socName.isEmpty() && !socNameMdhierMap.containsKey(socId)) {
socNameMdhierMap.put(socId, socName);
}
// 6. 填充主SOC
if (!ptId.isEmpty() && !mainSoc.isEmpty()) {
ptToMainSocMap.put(ptId, mainSoc);
}
count++;
}
System.out.println("mdhier加载:" + count + " 条(含名称兜底)");
}
/**
* 加载LLT相关数据(LLT→PT + 现行状态)
*/
private void loadLltRelatedData(List<List<String>> lltData) {
int count = 0;
for (List<String> row : lltData) {
if (row.size() < 10) continue;
String lltId = row.get(0);
String ptId = row.get(2);
String current = row.get(9); // 现行:第9列
// LLT→PT
if (!lltId.isEmpty() && !ptId.isEmpty()) {
lltToPtMap.put(lltId, ptId);
}
// 现行状态
if (!lltId.isEmpty() && !current.isEmpty()) {
lltToCurrentMap.put(lltId, current);
}
count++;
}
System.out.println("LLT数据加载:" + count + " 条(含现行状态)");
}
/**
* 获取名称(优先专属文件,兜底mdhier)
*/
private String getName(Map<String, String> basicMap, Map<String, String> mdhierMap, String id) {
if (id.isEmpty()) return "";
// 优先专属asc文件
String name = basicMap.get(id);
if (name != null && !name.isEmpty()) {
return name;
}
// 兜底mdhier
name = mdhierMap.get(id);
return name == null ? "" : name;
}
/**
* 导出最终CSV(所有字段必有值)
*/
public void exportToCsv(String csvPath,String meddraVersion) {
try (FileOutputStream fos = new FileOutputStream(csvPath);
OutputStreamWriter osw = new OutputStreamWriter(fos, StandardCharsets.UTF_8);
BufferedWriter bw = new BufferedWriter(osw)) {
// UTF-8 BOM 解决乱码
fos.write(new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF});
// 表头
String header = "LLT编号,LLT名称,PT编号,PT名称,HLT编号,HLT名称,HLGT编号,HLGT名称,SOC编号,SOC名称,主SOC,现行,字典版本";
bw.write(header);
bw.newLine();
// 遍历所有LLT
int exportCount = 0;
for (String lltId : lltNameMap.keySet()) {
// 1. LLT层
String lltName = lltNameMap.getOrDefault(lltId, "");
String current = lltToCurrentMap.getOrDefault(lltId, "N");
// 2. PT层
String ptId = lltToPtMap.getOrDefault(lltId, "");
String ptName = getName(ptNameMap, ptNameMdhierMap, ptId);
String mainSoc = ptToMainSocMap.getOrDefault(ptId, "N");
// 3. HLT层(双来源兜底)
String hltId = ptToHltMap.getOrDefault(ptId, "");
String hltName = getName(hltNameMap, hltNameMdhierMap, hltId);
// 4. HLGT层(双来源兜底)
String hlgtId = hltToHlgtMap.getOrDefault(hltId, "");
String hlgtName = getName(hlgtNameMap, hlgtNameMdhierMap, hlgtId);
// 5. SOC层
String socId = hlgtToSocMap.getOrDefault(hlgtId, "");
String socName = getName(socNameMap, socNameMdhierMap, socId);
// 组装行(每个字段都包装,解决逗号分列问题)
String row = String.join(",",
wrapCsvField(lltId),
wrapCsvField(lltName),
wrapCsvField(ptId),
wrapCsvField(ptName),
wrapCsvField(hltId),
wrapCsvField(hltName),
wrapCsvField(hlgtId),
wrapCsvField(hlgtName),
wrapCsvField(socId),
wrapCsvField(socName),
wrapCsvField(mainSoc),
wrapCsvField(current),
wrapCsvField(meddraVersion)
);
bw.write(row);
bw.newLine();
exportCount++;
}
System.out.println("\n✅ 导出完成!");
System.out.println("📁 文件路径:" + csvPath);
System.out.println("📊 导出数据量:" + exportCount + " 条");
} catch (IOException e) {
System.err.println("❌ 导出失败:" + e.getMessage());
e.printStackTrace();
}
}
/**
* 包装CSV字段(包含逗号的字段用双引号包裹)
*/
private String wrapCsvField(String value) {
if (isBlank(value)) {
return "";
}
// 包含逗号/双引号的字段需要用双引号包裹,CSV标准格式
if (value.contains(",") || value.contains("\"")) {
// 替换原有双引号为两个双引号(CSV转义)
value = value.replace("\"", "\"\"");
return "\"" + value + "\"";
}
return value;
}
public static void main(String[] args) {
MedDRADataParser parser = new MedDRADataParser();
parser.loadAllMappings("/Users/sunda/Desktop/MedDRA28.1版本/MedDRA_28_1_ENglish/MedAscii/");
String meddraVersion = "MedDRA_28.1";
parser.exportToCsv("/Users/sunda/Desktop/MedDRA28.1版本/MedDRA_28_1_ENglish/MedAscii/"+meddraVersion+".csv","28.1");
}
}
最后修改:2026 年 03 月 17 日
© 允许规范转载