淺談Java?Zip?壓縮及其優(yōu)化

更新時(shí)間：2025年09月23日 08:30:30 作者：sp42

本文主要介紹了Java中實(shí)現(xiàn)文件壓縮/解壓縮的工具類(lèi),文中通過(guò)示例代碼介紹的非常詳細(xì),對(duì)大家的學(xué)習(xí)或者工作具有一定的參考學(xué)習(xí)價(jià)值,需要的朋友們下面隨著小編來(lái)一起學(xué)習(xí)學(xué)習(xí)吧

壓縮文件

Java 壓縮文件，就是輸入多個(gè)文件的參數(shù)，最終壓縮為一個(gè) zip 文件。這個(gè)代碼比較簡(jiǎn)單就不張貼了。

壓縮目錄

壓縮目錄的話顯然較復(fù)雜一點(diǎn)，其中一個(gè)思路自然是通過(guò)遞歸目錄實(shí)現(xiàn)的。網(wǎng)上找過(guò)幾個(gè)例子都有點(diǎn)小問(wèn)題，還是谷歌找出來(lái)的靠譜。主要是增加了指定文件的功能，通過(guò) Java8 的 Lambda 判斷是否加入 ZIP 壓縮，比較方便。函數(shù)表達(dá)式的簽名是Function<File, Boolean>參數(shù)是待加入的File對(duì)象，返回值true表示允許，反之不行。

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.function.Function;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;

import com.ajaxjs.util.logger.LogHelper;

/**
 * ZIP 壓縮/解壓縮
 * 
 * @author sp42
 *
 */
public class ZipHelper {
	private static final LogHelper LOGGER = LogHelper.getLog(ZipHelper.class);

	/**
	 * 解壓文件
	 * 
	 * @param save    解壓文件的路徑，必須為目錄
	 * @param zipFile 輸入的解壓文件路徑，例如C:/temp/foo.zip或 c:\\temp\\bar.zip
	 */
	public static void unzip(String save, String zipFile) {
		if (!new File(save).isDirectory())
			throw new IllegalArgumentException("保存的路徑必須為目錄路徑");

		long start = System.currentTimeMillis();
		File folder = new File(save);
		if (!folder.exists())
			folder.mkdirs();

		try (ZipInputStream zis = new ZipInputStream(new FileInputStream(zipFile));) {
			ZipEntry ze;
			while ((ze = zis.getNextEntry()) != null) {
				File newFile = new File(save + File.separator + ze.getName());
				System.out.println("file unzip : " + newFile.getAbsoluteFile());

				// 大部分網(wǎng)絡(luò)上的源碼，這里沒(méi)有判斷子目錄
				if (ze.isDirectory()) {
					newFile.mkdirs();
				} else {
//					new File(newFile.getParent()).mkdirs();
					FileHelper.initFolder(newFile);
					FileOutputStream fos = new FileOutputStream(newFile);
					IoHelper.write(zis, fos, false);
					fos.close();
				}

//				ze = zis.getNextEntry();
			}
			zis.closeEntry();
		} catch (IOException e) {
			LOGGER.warning(e);
		}

		LOGGER.info("解壓縮完成，耗時(shí)：{0}ms，保存在{1}", System.currentTimeMillis() - start, save);
	}

	/**
	 * 壓縮文件
	 * 
	 * @param toZip   要壓縮的目錄或文件
	 * @param saveZip 壓縮后保存的 zip 文件名
	 */
	public static void zip(String toZip, String saveZip) {
		zip(toZip, saveZip, null);
	}

	/**
	 * 壓縮文件
	 * 
	 * @param toZip     要壓縮的目錄或文件
	 * @param saveZip   壓縮后保存的 zip 文件名
	 * @param everyFile 輸入 File，可在這 Lambda 里面判斷是否加入 ZIP 壓縮，返回 true 表示允許，反之不行
	 */
	public static void zip(String toZip, String saveZip, Function<File, Boolean> everyFile) {
		long start = System.currentTimeMillis();
		File fileToZip = new File(toZip);

		FileHelper.initFolder(saveZip);

		try (FileOutputStream fos = new FileOutputStream(saveZip); ZipOutputStream zipOut = new ZipOutputStream(fos);) {
			zip(fileToZip, fileToZip.getName(), zipOut, everyFile);
		} catch (IOException e) {
			LOGGER.warning(e);
		}

		LOGGER.info("壓縮完成，耗時(shí)：{0}ms，保存在{1}", System.currentTimeMillis() - start, saveZip);
	}

	/**
	 * 內(nèi)部的壓縮方法
	 * 
	 * @param toZip     要壓縮的目錄或文件
	 * @param fileName  ZIP 內(nèi)的文件名
	 * @param zipOut    ZIP 流
	 * @param everyFile 輸入 File，可在這 Lambda 里面判斷是否加入 ZIP 壓縮，返回 true 表示允許，反之不行
	 */
	private static void zip(File toZip, String fileName, ZipOutputStream zipOut, Function<File, Boolean> everyFile) {
		if (toZip.isHidden())
			return;

		if (everyFile != null && !everyFile.apply(toZip)) {
			return; // 跳過(guò)不要的
		}

		try {
			if (toZip.isDirectory()) {
				zipOut.putNextEntry(new ZipEntry(fileName.endsWith("/") ? fileName : fileName + "/"));
				zipOut.closeEntry();

				File[] children = toZip.listFiles();
				for (File childFile : children) {
					zip(childFile, fileName + "/" + childFile.getName(), zipOut, everyFile);
				}

				return;
			}

			zipOut.putNextEntry(new ZipEntry(fileName));

			try (FileInputStream in = new FileInputStream(toZip);) {
				IoHelper.write(in, zipOut, false);
			}
		} catch (IOException e) {
			LOGGER.warning(e);
		}
	}
}

目標(biāo)大致是實(shí)現(xiàn)了，不過(guò)性能則比較差。接著我們看看如何去優(yōu)化。

優(yōu)化速度

開(kāi)始拜讀了大神文章《Zip 壓縮大文件從30秒到近乎1秒的優(yōu)化過(guò)程》，深入分析了 Java 文件壓縮的優(yōu)化過(guò)程，從最初的無(wú)緩沖壓縮到使用緩沖區(qū)，再到利用 NIO 的 Channel 和內(nèi)存映射文件技術(shù)，最終實(shí)現(xiàn)壓縮速度的顯著提升。

具體代碼如下。

/**
 * Zip壓縮大文件從30秒到近乎1秒的優(yōu)化過(guò)程
 * 這是一個(gè)調(diào)用本地方法與原生操作系統(tǒng)進(jìn)行交互，從磁盤(pán)中讀取數(shù)據(jù)。
 * 每讀取一個(gè)字節(jié)的數(shù)據(jù)就調(diào)用一次本地方法與操作系統(tǒng)交互，是非常耗時(shí)的。例如我們現(xiàn)在有30000個(gè)字節(jié)的數(shù)據(jù)，如果使用 FileInputStream
 * 那么就需要調(diào)用30000次的本地方法來(lái)獲取這些數(shù)據(jù)，而如果使用緩沖區(qū)的話（這里假設(shè)初始的緩沖區(qū)大小足夠放下30000字節(jié)的數(shù)據(jù)）那么只需要調(diào)用一次就行。因?yàn)榫彌_區(qū)在第一次調(diào)用  read() 方法的時(shí)候會(huì)直接從磁盤(pán)中將數(shù)據(jù)直接讀取到內(nèi)存中。
 * 隨后再一個(gè)字節(jié)一個(gè)字節(jié)的慢慢返回。
 *
 * @param toZip
 * @param saveZip
 */
public static void zipFileBuffer(String toZip, String saveZip) {
    File fileToZip = new File(toZip);

    try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(fileToZip.toPath()));
         BufferedOutputStream bout = new BufferedOutputStream(zipOut)) {

        for (int i = 1; i < 11; i++) {
            try (BufferedInputStream bin = new BufferedInputStream(Files.newInputStream(Paths.get(saveZip + i + ".jpg")))) {
                zipOut.putNextEntry(new ZipEntry(saveZip + i + ".jpg"));
                int temp;

                while ((temp = bin.read()) != -1) {
                    bout.write(temp);
                    bout.flush();// BufferedInputStream 在每次write 后應(yīng)該加入 flush
                }
            }
        }
    } catch (IOException e) {
        log.warn("zipFileBuffer", e);
    }
}

文章有網(wǎng)友評(píng)論附議：

“BufferedInputStream 在每次write 后應(yīng)該加入 flush（注：實(shí)際是 BufferedOutputStream ）”
除了flush()還應(yīng)馬上close()流
如果最快就用STORED。注：zip 有幾種壓縮策略，就是調(diào)整其壓縮比的，STORED 是其中一種，就是不怎么壓縮，所以快

看來(lái)還可以繼續(xù)地優(yōu)化。于是我翻閱那位評(píng)論者的博客，果然還有介紹他怎么優(yōu)化的文章，可惜目前已經(jīng)收復(fù)了……不過(guò)好在我當(dāng)時(shí)已經(jīng) copy 了代碼：

 /**
   * Java 極快壓縮方式 <a  rel="external nofollow" >fileContent</a>
   */
  public static void zipFile(File[] fileContent, String saveZip) {
      try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(Paths.get(saveZip)));
           BufferedOutputStream bout = new BufferedOutputStream(zipOut)) {

          for (File fc : fileContent) {
              try (BufferedInputStream bin = new BufferedInputStream(Files.newInputStream(fc.toPath()))) {
                  ZipEntry entry = new ZipEntry(fc.getName());
                  // 核心，和復(fù)制粘貼效果一樣，并沒(méi)有壓縮，但速度很快
                  entry.setMethod(ZipEntry.STORED);
                  entry.setSize(fc.length());
                  entry.setCrc(getFileCRCCode(fc));
                  zipOut.putNextEntry(entry);

                  int len;
                  byte[] data = new byte[8192];

                  while ((len = bin.read(data)) != -1)
                      bout.write(data, 0, len);

                  bin.close();
                  bout.flush();
              }
          }
      } catch (IOException e) {
          log.warn("zipFile", e);
      }
  }
  
  /**
   * 獲取 CRC32
   * CheckedInputStream一種輸入流，它還維護(hù)正在讀取的數(shù)據(jù)的校驗(yàn)和。然后可以使用校驗(yàn)和來(lái)驗(yàn)證輸入數(shù)據(jù)的完整性。
   *
   * @param file
   * @return
   */
  public static long getFileCRCCode(File file) {
      CRC32 crc32 = new CRC32();

      try (BufferedInputStream bufferedInputStream = new BufferedInputStream(Files.newInputStream(file.toPath()));
           CheckedInputStream checkedinputstream = new CheckedInputStream(bufferedInputStream, crc32)) {
          while (checkedinputstream.read() != -1) {
          }
      } catch (IOException e) {
          log.warn("getFileCRCCode", e);
      }

      return crc32.getValue();
  }

然后該文還有網(wǎng)友評(píng)論可以優(yōu)化（厲害了不過(guò)本人看不是太懂……）：

getFileCRCCode 里面的while用buff read速度更快，那里得到的value跟read是一樣的。實(shí)測(cè)2G視頻提升40秒

接著我交給 GPT 去優(yōu)化，得出下面優(yōu)化過(guò)后的函數(shù)。

/**
 * 支持傳入壓縮方式的Zip方法
 *
 * @param fileContent 需要壓縮的文件數(shù)組
 * @param saveZip     目標(biāo)zip文件路徑
 * @param useStore    true:僅存儲(chǔ)(STORED，不壓縮)，false:標(biāo)準(zhǔn)壓縮(DEFLATED)
 */
public static void zipFile(File[] fileContent, String saveZip, boolean useStore) {
    Path path = Paths.get(saveZip);

    // 用 BufferedOutputStream 包裹文件輸出流，然后交給 ZipOutputStream
    try (BufferedOutputStream bos = new BufferedOutputStream(Files.newOutputStream(path));
         ZipOutputStream zipOut = new ZipOutputStream(bos)) {

        for (File fc : fileContent) {
            try (BufferedInputStream bin = new BufferedInputStream(Files.newInputStream(fc.toPath()))) {
                ZipEntry entry = new ZipEntry(fc.getName());

                if (useStore) {
                    entry.setMethod(ZipEntry.STORED);
                    entry.setSize(fc.length());
                    entry.setCrc(getFileCRCCode(fc));
                } else {
                    // DEFLATED 模式不需要設(shè)置size和crc，ZipOutputStream會(huì)自動(dòng)處理
                    entry.setMethod(ZipEntry.DEFLATED);
                }

                zipOut.putNextEntry(entry);

                int len;
                byte[] data = new byte[8192];

                while ((len = bin.read(data)) != -1)
                    zipOut.write(data, 0, len);

                zipOut.closeEntry();
            }
        }
    } catch (IOException e) {
        log.warn("zipFile", e);
    }
}

主要優(yōu)化點(diǎn)說(shuō)明

只在底層文件流包裹一次 BufferedOutputStream，ZipOutputStream 直接用它，無(wú)需再包一層。
每個(gè) entry 的數(shù)據(jù)直接寫(xiě)入 zipOut，保證 putNextEntry/closeEntry 的正確配對(duì)。
bout.flush() 不再需要（zipOut 的 close/flush 會(huì)自動(dòng)做）。

最終版本

由于這個(gè)優(yōu)化只是支持多個(gè) File 傳入，而不是傳入目錄的參數(shù)。因此我們讓 GPT 再提供一般完整的 API。

/**
 * 一維文件數(shù)組壓縮為 ZIP
 *
 * @param fileContent 文件數(shù)組
 * @param saveZip     目標(biāo) zip 文件路徑
 * @param useStore    true: 僅存儲(chǔ)(STORED)，false: 標(biāo)準(zhǔn)壓縮(DEFLATED)
 */
public static void zipFile(File[] fileContent, String saveZip, boolean useStore) {
    try (BufferedOutputStream bos = new BufferedOutputStream(Files.newOutputStream(Paths.get(saveZip)));
         ZipOutputStream zipOut = new ZipOutputStream(bos)) {

        for (File fc : fileContent)
            addFileToZip(fc, fc.getName(), zipOut, useStore);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

/**
 * 遞歸壓縮目錄為ZIP
 *
 * @param sourceDir 目錄路徑
 * @param saveZip   目標(biāo) zip 文件路徑
 * @param useStore  true: 僅存儲(chǔ)(STORED)，false: 標(biāo)準(zhǔn)壓縮(DEFLATED)
 */
public static void zipDirectory(String sourceDir, String saveZip, boolean useStore) {
    File dir = new File(sourceDir);

    if (!dir.exists() || !dir.isDirectory())
        throw new IllegalArgumentException("Source directory does not exist or is not a directory: " + sourceDir);

    try (BufferedOutputStream bos = new BufferedOutputStream(Files.newOutputStream(Paths.get(saveZip)));
         ZipOutputStream zipOut = new ZipOutputStream(bos)) {
        String basePath = dir.getCanonicalPath();
        zipDirectoryRecursive(dir, basePath, zipOut, useStore);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

/**
 * 目錄壓縮，用于遞歸
 */
private static void zipDirectoryRecursive(File file, String basePath, ZipOutputStream zipOut, boolean useStore) throws IOException {
    String relativePath = basePath.equals(file.getCanonicalPath())
            ? StrUtil.EMPTY_STRING
            : file.getCanonicalPath().substring(basePath.length() + 1).replace(File.separatorChar, '/');

    if (file.isDirectory()) {
        File[] files = file.listFiles();

        if (files != null && files.length == 0 && !relativePath.isEmpty()) {
            ZipEntry entry = new ZipEntry(relativePath + "/"); // 空目錄也要加入Zip
            zipOut.putNextEntry(entry);
            zipOut.closeEntry();
        } else if (files != null) {
            for (File child : files)
                zipDirectoryRecursive(child, basePath, zipOut, useStore);
        }
    } else
        addFileToZip(file, relativePath, zipOut, useStore);
}

/**
 * 單文件添加到 zip
 */
private static void addFileToZip(File file, String zipEntryName, ZipOutputStream zipOut, boolean useStore) throws IOException {
    try (BufferedInputStream bin = new BufferedInputStream(Files.newInputStream(file.toPath()))) {
        ZipEntry entry = new ZipEntry(zipEntryName);

        if (useStore) {
            entry.setMethod(ZipEntry.STORED);
            entry.setSize(file.length());
            entry.setCrc(getFileCRCCode(file));
        } else
            entry.setMethod(ZipEntry.DEFLATED);// // DEFLATED 模式不需要設(shè)置 size 和 crc，ZipOutputStream 會(huì)自動(dòng)處理

        zipOut.putNextEntry(entry);

        byte[] buffer = new byte[8192];
        int len;
        while ((len = bin.read(buffer)) != -1)
            zipOut.write(buffer, 0, len);

        zipOut.closeEntry();
    }
}

/**
 * 獲取 CRC32
 * CheckedInputStream 一種輸入流，它還維護(hù)正在讀取的數(shù)據(jù)的校驗(yàn)和。然后可以使用校驗(yàn)和來(lái)驗(yàn)證輸入數(shù)據(jù)的完整性。
 */
private static long getFileCRCCode(File file) {
    CRC32 crc32 = new CRC32();

    try (BufferedInputStream bufferedInputStream = new BufferedInputStream(Files.newInputStream(file.toPath()));
         CheckedInputStream checkedinputstream = new CheckedInputStream(bufferedInputStream, crc32)) {
        while (checkedinputstream.read() != -1) {
            // 只需遍歷即可統(tǒng)計(jì)
        }
    } catch (IOException e) {
        log.warn("getFileCRCCode", e);
    }

    return crc32.getValue();
}

自此我們的 zip 壓縮工具函數(shù)就完成了。

到此這篇關(guān)于淺談Java Zip 壓縮及其優(yōu)化的文章就介紹到這了,更多相關(guān)Java Zip 壓縮內(nèi)容請(qǐng)搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家！

您可能感興趣的文章: