Java實現(xiàn)Markdown圖片批量本地化處理工具

更新時間：2025年07月30日 09:44:13 作者：zk_xyb

在日常使用Markdown時,我們常通過遠程URL引用圖片,所以本文小編就為大家詳細介紹一下如何使用Java實現(xiàn)Markdown圖片批量本地化處理吧

一、工具介紹

在日常使用Markdown時，我們常通過遠程URL引用圖片，但這種方式存在依賴網(wǎng)絡、圖片易失效、離線無法查看等問題。MarkdownImageProcessor 正是為解決這些問題而生的Java工具，其核心功能是批量識別Markdown文件中的遠程圖片URL，自動下載圖片到本地目錄，并生成替換后的新Markdown文件（圖片路徑改為本地相對路徑），讓文檔徹底擺脫對遠程資源的依賴。

核心功能

多文件/目錄支持：可處理單個Markdown文件或目錄（遞歸查找所有.md/.markdown文件）；

遠程圖片下載：自動識別http:///https://開頭的遠程圖片URL，下載至本地images目錄；

路徑自動替換：生成的新Markdown文件中，圖片路徑會替換為本地相對路徑（如images/xxx.jpg）；

安全處理機制：下載失敗時保留原始URL，避免文檔損壞；文件名自動去重（同名文件加序號），防止覆蓋；

清晰的進度反饋：實時輸出處理進度（成功/失敗的文件/圖片數(shù)量），生成處理總結(jié)。

使用流程

運行程序后，輸入Markdown文件路徑或目錄路徑（每行一個）；

輸入空行開始處理，程序會遞歸掃描目錄中的所有Markdown文件；

處理完成后，在原文件同目錄生成帶_processed后綴的新文件（如doc.md→doc_processed.md），圖片保存至同目錄的images文件夾。

二、代碼優(yōu)化方案

原代碼功能完整，但在靈活性、健壯性和現(xiàn)代Java特性使用上有優(yōu)化空間。以下是優(yōu)化后的代碼及關(guān)鍵改進點：

優(yōu)化后的代碼

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

public class MarkdownImageProcessor {
    // 可配置參數(shù)（默認值）
    private final String imageRegex;
    private String imageDir;
    private String processedSuffix;
    private int connectionTimeout; // 連接超時（毫秒）
    private int readTimeout; // 讀取超時（毫秒）
    private String userAgent;

    private static final Scanner scanner = new Scanner(System.in);

    // 構(gòu)造函數(shù)：支持自定義配置
    public MarkdownImageProcessor() {
        this.imageRegex = "!\\[(.*?)\\]\\((.*?)\\)";
        this.imageDir = "images";
        this.processedSuffix = "_processed";
        this.connectionTimeout = 5000; // 5秒
        this.readTimeout = 10000; // 10秒
        this.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36";
    }

    public static void main(String[] args) {
        try {
            MarkdownImageProcessor processor = new MarkdownImageProcessor();
            List<Path> filesToProcess = processor.collectFilesFromConsole();
            
            if (filesToProcess.isEmpty()) {
                System.out.println("沒有需要處理的文件。");
                return;
            }
            
            ProcessSummary summary = processor.processFiles(filesToProcess);
            processor.printSummary(summary);
            
        } catch (Exception e) {
            System.err.println("程序運行出錯: " + e.getMessage());
            e.printStackTrace();
        } finally {
            scanner.close();
        }
    }

    // 從控制臺收集待處理文件
    private List<Path> collectFilesFromConsole() {
        List<Path> files = new ArrayList<>();
        System.out.println("Markdown圖片處理工具 - 批量模式");
        System.out.println("--------------------------------");
        System.out.println("請輸入Markdown文件或目錄路徑（每行一個，輸入空行開始處理）");
        System.out.println("支持:");
        System.out.println("  - 單個文件路徑");
        System.out.println("  - 目錄路徑（將遞歸處理所有.md文件）");
        System.out.println("--------------------------------");
        
        Set<String> processedPaths = new HashSet<>();
        
        while (true) {
            System.out.print("輸入路徑: ");
            String pathStr = scanner.nextLine().trim();
            
            if (pathStr.isEmpty()) {
                break;
            }
            
            if (processedPaths.contains(pathStr)) {
                System.out.println("警告: 路徑已添加 - " + pathStr);
                continue;
            }
            
            Path path = Paths.get(pathStr);
            if (!Files.exists(path)) {
                System.out.println("錯誤: 文件/目錄不存在 - " + pathStr);
                continue;
            }
            
            processedPaths.add(pathStr);
            
            if (Files.isDirectory(path)) {
                List<Path> dirFiles = getMdFilesRecursively(path);
                files.addAll(dirFiles);
                System.out.printf("已添加目錄: %s (%d個MD文件)%n", path, dirFiles.size());
            } else {
                if (isMarkdownFile(path)) {
                    files.add(path);
                    System.out.println("已添加文件: " + path);
                } else {
                    System.out.println("錯誤: 不是Markdown文件 - " + pathStr);
                }
            }
        }
        
        return files;
    }

    // 遞歸獲取目錄中所有Markdown文件（使用NIO簡化代碼）
    private List<Path> getMdFilesRecursively(Path directory) {
        List<Path> result = new ArrayList<>();
        try (Stream<Path> stream = Files.walk(directory)) {
            stream.filter(Files::isRegularFile)
                  .filter(this::isMarkdownFile)
                  .forEach(result::add);
        } catch (IOException e) {
            System.err.printf("警告: 讀取目錄失敗 %s - %s%n", directory, e.getMessage());
        }
        return result;
    }

    // 判斷是否為Markdown文件
    private boolean isMarkdownFile(Path path) {
        String fileName = path.getFileName().toString().toLowerCase();
        return fileName.endsWith(".md") || fileName.endsWith(".markdown");
    }

    // 處理所有文件
    private ProcessSummary processFiles(List<Path> files) {
        ProcessSummary summary = new ProcessSummary();
        int totalFiles = files.size();
        
        System.out.printf("%n開始處理 %d 個Markdown文件...%n%n", totalFiles);
        
        for (int i = 0; i < totalFiles; i++) {
            Path file = files.get(i);
            System.out.printf("處理文件 %d/%d: %s%n", i + 1, totalFiles, file.toAbsolutePath());
            
            try {
                processMarkdownFile(file);
                summary.successfulFiles++;
            } catch (Exception e) {
                System.err.printf("  處理失敗: %s%n", e.getMessage());
                summary.failedFiles++;
            }
            
            System.out.println();
        }
        
        return summary;
    }

    // 處理單個Markdown文件
    private void processMarkdownFile(Path mdFile) throws IOException {
        // 讀取文件內(nèi)容
        String content = readFileContent(mdFile);

        // 創(chuàng)建圖片保存目錄（基于原文件目錄）
        Path imageDirPath = mdFile.getParent().resolve(imageDir);
        Files.createDirectories(imageDirPath); // 自動創(chuàng)建父目錄

        // 處理圖片URL并下載
        ImageProcessingResult result = processImages(content, imageDirPath);
        
        if (result.totalImages > 0) {
            // 生成新的MD文件路徑
            Path newMdFile = getProcessedFilePath(mdFile);
            writeFileContent(newMdFile, result.processedContent);
            
            System.out.printf("  已處理 %d 張圖片，生成新文件: %s%n", 
                              result.totalImages, newMdFile.getFileName());
        } else {
            System.out.println("  未發(fā)現(xiàn)需要處理的遠程圖片URL");
        }
    }

    // 使用Java 8兼容的方式讀取文件內(nèi)容
    private String readFileContent(Path path) throws IOException {
        StringBuilder content = new StringBuilder();
        try (BufferedReader reader = Files.newBufferedReader(path, StandardCharsets.UTF_8)) {
            String line;
            while ((line = reader.readLine()) != null) {
                content.append(line).append("\n");
            }
        }
        return content.toString();
    }

    // 使用Java 8兼容的方式寫入文件內(nèi)容
    private void writeFileContent(Path path, String content) throws IOException {
        try (BufferedWriter writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) {
            writer.write(content);
        }
    }

    // 獲取處理后的文件路徑（可自定義后綴）
    private Path getProcessedFilePath(Path originalPath) {
        String fileName = originalPath.getFileName().toString();
        int dotIndex = fileName.lastIndexOf('.');
        String baseName = (dotIndex > 0) ? fileName.substring(0, dotIndex) : fileName;
        String extension = (dotIndex > 0) ? fileName.substring(dotIndex) : "";
        String newFileName = baseName + processedSuffix + extension;
        return originalPath.getParent().resolve(newFileName);
    }

    // 處理圖片URL并替換為本地路徑
    private ImageProcessingResult processImages(String content, Path imageDirPath) throws IOException {
        Pattern pattern = Pattern.compile(imageRegex);
        Matcher matcher = pattern.matcher(content);
        StringBuffer sb = new StringBuffer();
        int totalImages = 0;
        int failedImages = 0;

        while (matcher.find()) {
            String altText = matcher.group(1);
            String imageUrl = matcher.group(2).trim();

            // 處理遠程圖片URL（http/https）
            if (imageUrl.startsWith("http://") || imageUrl.startsWith("https://")) {
                totalImages++;
                try {
                    // 下載圖片并返回本地文件名
                    String fileName = downloadImage(imageUrl, imageDirPath);
                    // 構(gòu)建相對路徑（原文件到images目錄的相對路徑）
                    Path relativePath = imageDirPath.getParent().relativize(imageDirPath).resolve(fileName);
                    String replacement = "![" + altText + "](" + relativePath + ")";
                    matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement));
                    System.out.printf("    ? 已下載: %s -> %s%n", imageUrl, relativePath);
                } catch (Exception e) {
                    failedImages++;
                    System.err.printf("    ? 下載失敗 (%s): %s%n", e.getMessage(), imageUrl);
                    // 保留原始URL
                    matcher.appendReplacement(sb, Matcher.quoteReplacement(matcher.group(0)));
                }
            } else {
                // 本地圖片路徑不處理
                matcher.appendReplacement(sb, Matcher.quoteReplacement(matcher.group(0)));
            }
        }
        matcher.appendTail(sb);

        ImageProcessingResult result = new ImageProcessingResult();
        result.processedContent = sb.toString();
        result.totalImages = totalImages;
        result.successfulImages = totalImages - failedImages;
        result.failedImages = failedImages;
        
        return result;
    }

    // 下載圖片并保存到本地目錄
    private String downloadImage(String imageUrl, Path imageDirPath) throws IOException {
        // 創(chuàng)建HTTP連接并設置超時
        HttpURLConnection connection = createHttpConnection(imageUrl);
        
        // 獲取響應狀態(tài)
        int responseCode = connection.getResponseCode();
        if (responseCode != HttpURLConnection.HTTP_OK) {
            throw new IOException("HTTP請求失敗，狀態(tài)碼: " + responseCode);
        }

        // 根據(jù)Content-Type獲取正確的文件擴展名
        String contentType = connection.getContentType();
        String extension = getExtensionFromContentType(contentType);

        // 提取基礎文件名（不含擴展名）
        String baseFileName = extractBaseFileName(imageUrl);
        // 拼接完整文件名（基礎名+擴展名）
        String originalFileName = baseFileName + "." + extension;
        // 生成唯一文件名（避免重復）
        String fileName = generateUniqueFileName(imageDirPath, originalFileName);

        // 下載并保存文件
        try (InputStream in = connection.getInputStream();
             OutputStream out = Files.newOutputStream(imageDirPath.resolve(fileName))) {
            
            byte[] buffer = new byte[4096];
            int bytesRead;
            while ((bytesRead = in.read(buffer)) != -1) {
                out.write(buffer, 0, bytesRead);
            }
        }
        
        return fileName;
    }

    // 創(chuàng)建HTTP連接（統(tǒng)一配置超時和請求頭）
    private HttpURLConnection createHttpConnection(String urlStr) throws IOException {
        URL url = new URL(urlStr);
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        connection.setRequestMethod("GET");
        connection.setRequestProperty("User-Agent", userAgent);
        connection.setConnectTimeout(connectionTimeout); // 連接超時
        connection.setReadTimeout(readTimeout); // 讀取超時
        connection.setInstanceFollowRedirects(true); // 自動跟隨重定向
        return connection;
    }

    // 從Content-Type獲取正確的文件擴展名（兼容Java 8）
    private String getExtensionFromContentType(String contentType) {
        if (contentType == null) {
            return "jpg"; // 默認 fallback
        }
        // 傳統(tǒng)switch語句（Java 8支持）
        switch (contentType) {
            case "image/jpeg":
                return "jpg";
            case "image/png":
                return "png";
            case "image/gif":
                return "gif";
            case "image/bmp":
                return "bmp";
            case "image/webp":
                return "webp";
            default:
                return "jpg"; // 未知類型默認jpg
        }
    }

    // 提取基礎文件名（不含擴展名）
    private String extractBaseFileName(String url) {
        // 移除URL查詢參數(shù)和錨點
        String path = url.split("[?#]")[0];
        // 提取最后一個路徑段
        String fileName = path.substring(path.lastIndexOf('/') + 1);
        // 移除可能的擴展名（避免重復）
        int dotIndex = fileName.lastIndexOf('.');
        if (dotIndex > 0) {
            fileName = fileName.substring(0, dotIndex);
        }
        //  sanitize文件名（移除非法字符）
        return fileName.replaceAll("[\\\\/:*?\"<>|]", "_");
    }

    // 生成唯一文件名（避免覆蓋）
    private String generateUniqueFileName(Path directory, String originalName) {
        String baseName = originalName;
        String extension = "";
        int dotIndex = originalName.lastIndexOf('.');
        if (dotIndex > 0) {
            baseName = originalName.substring(0, dotIndex);
            extension = originalName.substring(dotIndex);
        }

        String fileName = originalName;
        int counter = 1;
        while (Files.exists(directory.resolve(fileName))) {
            fileName = baseName + "_" + counter + extension;
            counter++;
        }

        return fileName;
    }

    // 打印處理總結(jié)
    private void printSummary(ProcessSummary summary) {
        System.out.println("--------------------------------");
        System.out.println("處理完成！");
        System.out.printf("成功處理文件: %d%n", summary.successfulFiles);
        System.out.printf("處理失敗文件: %d%n", summary.failedFiles);
        System.out.println("--------------------------------");
    }

    // 配置參數(shù)設置方法（提高靈活性）
    public void setImageDir(String imageDir) {
        this.imageDir = imageDir;
    }

    public void setProcessedSuffix(String processedSuffix) {
        this.processedSuffix = processedSuffix;
    }

    public void setConnectionTimeout(int connectionTimeout) {
        this.connectionTimeout = connectionTimeout;
    }

    public void setReadTimeout(int readTimeout) {
        this.readTimeout = readTimeout;
    }

    public void setUserAgent(String userAgent) {
        this.userAgent = userAgent;
    }

    // 處理總結(jié)內(nèi)部類
    static class ProcessSummary {
        int successfulFiles = 0;
        int failedFiles = 0;
    }

    // 圖片處理結(jié)果內(nèi)部類
    static class ImageProcessingResult {
        String processedContent;
        int totalImages;
        int successfulImages;
        int failedImages;
    }
}