package com.tudicloud.module.medcoding.service.dictionary;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static com.tudicloud.framework.common.core.util.core.StringUtils.isBlank;


public class MedDRADataParser {

    // 基础名称映射(专属asc文件)
    private Map<String, String> lltNameMap = new HashMap<>();
    private Map<String, String> ptNameMap = new HashMap<>();
    private Map<String, String> hltNameMap = new HashMap<>();
    private Map<String, String> hlgtNameMap = new HashMap<>();
    private Map<String, String> socNameMap = new HashMap<>();

    // 从mdhier补充的名称映射(兜底用)
    private Map<String, String> ptNameMdhierMap = new HashMap<>();
    private Map<String, String> hltNameMdhierMap = new HashMap<>();
    private Map<String, String> hlgtNameMdhierMap = new HashMap<>();
    private Map<String, String> socNameMdhierMap = new HashMap<>();

    // 层级关联
    private Map<String, String> lltToPtMap = new HashMap<>();
    private Map<String, String> ptToHltMap = new HashMap<>();
    private Map<String, String> hltToHlgtMap = new HashMap<>();
    private Map<String, String> hlgtToSocMap = new HashMap<>();

    // 状态字段
    private Map<String, String> lltToCurrentMap = new HashMap<>();
    private Map<String, String> ptToMainSocMap = new HashMap<>();



    /**
     * 读取asc文件(保留所有列,包括空列)
     */
    private List<List<String>> readAscFile(String filePath) {
        List<List<String>> data = new ArrayList<>();
        File file = new File(filePath);
        if (!file.exists()) {
            System.out.println("⚠️  文件不存在:" + filePath);
            return data;
        }

        try (BufferedReader br = new BufferedReader(new InputStreamReader(
                new FileInputStream(file), StandardCharsets.UTF_8))) {
            String line;
            while ((line = br.readLine()) != null) {
                line = line.trim();
                if (line.isEmpty()) continue;
                // 分割时保留空列(-1参数),保证列索引绝对准确
                String[] fields = line.split("\\$", -1);
                List<String> fieldList = new ArrayList<>();
                for (String field : fields) {
                    fieldList.add(field.trim());
                }
                data.add(fieldList);
            }
        } catch (IOException e) {
            System.err.println("❌ 读取文件失败:" + filePath);
            e.printStackTrace();
        }
        return data;
    }

    /**
     * 加载所有映射(核心:双来源兜底)
     */
    public void loadAllMappings(String path) {
        // 1. 加载所有文件
        List<List<String>> lltData = readAscFile(path + "llt.asc");
        List<List<String>> ptData = readAscFile(path + "pt.asc");
        List<List<String>> hltData = readAscFile(path + "hlt.asc");
        List<List<String>> hlgtData = readAscFile(path + "hlgt.asc");
        List<List<String>> socData = readAscFile(path + "soc.asc");
        List<List<String>> mdhierData = readAscFile(path + "mdhier.asc");

        // 2. 加载基础名称映射(专属asc文件)
        loadBasicNameMap(lltData, lltNameMap, 0, 1, "LLT");
        loadBasicNameMap(ptData, ptNameMap, 0, 1, "PT");
        loadBasicNameMap(hltData, hltNameMap, 0, 1, "HLT");
        loadBasicNameMap(hlgtData, hlgtNameMap, 0, 1, "HLGT");
        loadBasicNameMap(socData, socNameMap, 0, 1, "SOC");

        // 3. 从mdhier加载名称+关联(兜底核心)
        loadMdhierData(mdhierData);

        // 4. 加载LLT→PT + 现行状态
        loadLltRelatedData(lltData);

        // 日志:验证加载结果
        System.out.println("\n=== 加载验证 ===");
        System.out.println("HLT名称(专属文件):" + hltNameMap.size() + " 条");
        System.out.println("HLT名称(mdhier兜底):" + hltNameMdhierMap.size() + " 条");
        System.out.println("HLGT名称(专属文件):" + hlgtNameMap.size() + " 条");
        System.out.println("HLGT名称(mdhier兜底):" + hlgtNameMdhierMap.size() + " 条");
    }

    /**
     * 加载基础名称映射(专属asc文件)
     */
    private void loadBasicNameMap(List<List<String>> data, Map<String, String> map,
                                  int keyIdx, int valIdx, String type) {
        int count = 0;
        for (List<String> row : data) {
            if (row.size() <= Math.max(keyIdx, valIdx)) continue;
            String key = row.get(keyIdx);
            String val = row.get(valIdx);
            if (!key.isEmpty() && !val.isEmpty()) {
                map.put(key, val);
                count++;
            }
        }
        System.out.println(type + " 名称加载:" + count + " 条");
    }

    /**
     * 加载mdhier数据(核心:关联关系 + 名称兜底)
     * mdhier行结构:0=PT编号,2=HLT编号,3=HLGT编号,10=SOC编号
     *              4=PT名称,5=HLT名称,6=HLGT名称,7=SOC名称
     */
    private void loadMdhierData(List<List<String>> mdhierData) {
        int count = 0;
        for (List<String> row : mdhierData) {
            if (row.size() < 12) continue;

            // 1. 提取编号
            String ptId = row.get(0);
            String hltId = row.get(2);
            String hlgtId = row.get(3);
            String socId = row.get(10);

            // 2. 提取名称(mdhier里的名称直接兜底)
            String ptName = row.get(4);
            String hltName = row.get(5);
            String hlgtName = row.get(6);
            String socName = row.get(7);

            // 3. 提取主SOC
            String mainSoc = row.get(11);

            // 4. 填充关联关系
            if (!ptId.isEmpty() && !hltId.isEmpty()) ptToHltMap.put(ptId, hltId);
            if (!hltId.isEmpty() && !hlgtId.isEmpty()) hltToHlgtMap.put(hltId, hlgtId);
            if (!hlgtId.isEmpty() && !socId.isEmpty()) hlgtToSocMap.put(hlgtId, socId);

            // 5. 填充mdhier名称兜底(为空才存)
            if (!ptId.isEmpty() && !ptName.isEmpty() && !ptNameMdhierMap.containsKey(ptId)) {
                ptNameMdhierMap.put(ptId, ptName);
            }
            if (!hltId.isEmpty() && !hltName.isEmpty() && !hltNameMdhierMap.containsKey(hltId)) {
                hltNameMdhierMap.put(hltId, hltName);
            }
            if (!hlgtId.isEmpty() && !hlgtName.isEmpty() && !hlgtNameMdhierMap.containsKey(hlgtId)) {
                hlgtNameMdhierMap.put(hlgtId, hlgtName);
            }
            if (!socId.isEmpty() && !socName.isEmpty() && !socNameMdhierMap.containsKey(socId)) {
                socNameMdhierMap.put(socId, socName);
            }

            // 6. 填充主SOC
            if (!ptId.isEmpty() && !mainSoc.isEmpty()) {
                ptToMainSocMap.put(ptId, mainSoc);
            }
            count++;
        }
        System.out.println("mdhier加载:" + count + " 条(含名称兜底)");
    }

    /**
     * 加载LLT相关数据(LLT→PT + 现行状态)
     */
    private void loadLltRelatedData(List<List<String>> lltData) {
        int count = 0;
        for (List<String> row : lltData) {
            if (row.size() < 10) continue;

            String lltId = row.get(0);
            String ptId = row.get(2);
            String current = row.get(9); // 现行:第9列

            // LLT→PT
            if (!lltId.isEmpty() && !ptId.isEmpty()) {
                lltToPtMap.put(lltId, ptId);
            }
            // 现行状态
            if (!lltId.isEmpty() && !current.isEmpty()) {
                lltToCurrentMap.put(lltId, current);
            }
            count++;
        }
        System.out.println("LLT数据加载:" + count + " 条(含现行状态)");
    }

    /**
     * 获取名称(优先专属文件,兜底mdhier)
     */
    private String getName(Map<String, String> basicMap, Map<String, String> mdhierMap, String id) {
        if (id.isEmpty()) return "";
        // 优先专属asc文件
        String name = basicMap.get(id);
        if (name != null && !name.isEmpty()) {
            return name;
        }
        // 兜底mdhier
        name = mdhierMap.get(id);
        return name == null ? "" : name;
    }

    /**
     * 导出最终CSV(所有字段必有值)
     */
    public void exportToCsv(String csvPath,String meddraVersion) {
        try (FileOutputStream fos = new FileOutputStream(csvPath);
             OutputStreamWriter osw = new OutputStreamWriter(fos, StandardCharsets.UTF_8);
             BufferedWriter bw = new BufferedWriter(osw)) {

            // UTF-8 BOM 解决乱码
            fos.write(new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF});

            // 表头
            String header = "LLT编号,LLT名称,PT编号,PT名称,HLT编号,HLT名称,HLGT编号,HLGT名称,SOC编号,SOC名称,主SOC,现行,字典版本";
            bw.write(header);
            bw.newLine();

            // 遍历所有LLT
            int exportCount = 0;
            for (String lltId : lltNameMap.keySet()) {
                // 1. LLT层
                String lltName = lltNameMap.getOrDefault(lltId, "");
                String current = lltToCurrentMap.getOrDefault(lltId, "N");

                // 2. PT层
                String ptId = lltToPtMap.getOrDefault(lltId, "");
                String ptName = getName(ptNameMap, ptNameMdhierMap, ptId);
                String mainSoc = ptToMainSocMap.getOrDefault(ptId, "N");

                // 3. HLT层(双来源兜底)
                String hltId = ptToHltMap.getOrDefault(ptId, "");
                String hltName = getName(hltNameMap, hltNameMdhierMap, hltId);

                // 4. HLGT层(双来源兜底)
                String hlgtId = hltToHlgtMap.getOrDefault(hltId, "");
                String hlgtName = getName(hlgtNameMap, hlgtNameMdhierMap, hlgtId);

                // 5. SOC层
                String socId = hlgtToSocMap.getOrDefault(hlgtId, "");
                String socName = getName(socNameMap, socNameMdhierMap, socId);

                // 组装行(每个字段都包装,解决逗号分列问题)
                String row = String.join(",",
                        wrapCsvField(lltId),
                        wrapCsvField(lltName),
                        wrapCsvField(ptId),
                        wrapCsvField(ptName),
                        wrapCsvField(hltId),
                        wrapCsvField(hltName),
                        wrapCsvField(hlgtId),
                        wrapCsvField(hlgtName),
                        wrapCsvField(socId),
                        wrapCsvField(socName),
                        wrapCsvField(mainSoc),
                        wrapCsvField(current),
                        wrapCsvField(meddraVersion)
                );


                bw.write(row);
                bw.newLine();
                exportCount++;
            }

            System.out.println("\n✅ 导出完成!");
            System.out.println("📁 文件路径:" + csvPath);
            System.out.println("📊 导出数据量:" + exportCount + " 条");

        } catch (IOException e) {
            System.err.println("❌ 导出失败:" + e.getMessage());
            e.printStackTrace();
        }
    }

    /**
     * 包装CSV字段(包含逗号的字段用双引号包裹)
     */
    private String wrapCsvField(String value) {
        if (isBlank(value)) {
            return "";
        }
        // 包含逗号/双引号的字段需要用双引号包裹,CSV标准格式
        if (value.contains(",") || value.contains("\"")) {
            // 替换原有双引号为两个双引号(CSV转义)
            value = value.replace("\"", "\"\"");
            return "\"" + value + "\"";
        }
        return value;
    }

    public static void main(String[] args) {
        MedDRADataParser parser = new MedDRADataParser();
        parser.loadAllMappings("/Users/sunda/Desktop/MedDRA28.1版本/MedDRA_28_1_ENglish/MedAscii/");

        String meddraVersion = "MedDRA_28.1";
        parser.exportToCsv("/Users/sunda/Desktop/MedDRA28.1版本/MedDRA_28_1_ENglish/MedAscii/"+meddraVersion+".csv","28.1");
    }
}
最后修改:2026 年 03 月 17 日
如果觉得我的文章对你有用,请随意赞赏