目录
- 前言
- 实现方法
- 使用
前言
前段时间一直使用到word文档转pdf或者pdf转word,寻思着用Java应该是可以实现的,于是花了点时间写了个文件转换工具
源码weloe/FileConversion (github.com)
主要功能就是word和pdf的文件转换,如下
- pdf 转 word
- pdf 转 图片
- word 转 图片
- word 转 html
- word 转 pdf
实现方法
主要使用了pdfbox Apache PDFBox | A Java PDF Library以及spire.doc Free Spire.Doc for Java | 100% 免费 Java Word 组件 (e-iceblue.cn)两个工具包
pom.xml
<repositories> | |
<repository> | |
<id>com.e-iceblue</id> | |
<url>http://repo.e-iceblue.cn/repository/maven-public/</url> | |
</repository> | |
</repositories> | |
<properties> | |
<maven.compiler.source></maven.compiler.source> | |
<maven.compiler.target></maven.compiler.target> | |
</properties> | |
<dependencies> | |
<dependency> | |
<groupId>org.apache.pdfbox</groupId> | |
<artifactId>pdfbox</artifactId> | |
<version>.0.4</version> | |
</dependency> | |
<dependency> | |
<groupId>junit</groupId> | |
<artifactId>junit</artifactId> | |
<version>.13.2</version> | |
<scope>test</scope> | |
</dependency> | |
<dependency> | |
<groupId>e-iceblue</groupId> | |
<artifactId>spire.doc.free</artifactId> | |
<version>.9.0</version> | |
</dependency> | |
</dependencies> |
策略接口
public interface FileConversion { | |
boolean isSupport(String s); | |
String convert(String pathName,String dirAndFileName) throws Exception; | |
} |
PDF转图片实现
public class PDFImage implements FileConversion{ | |
private String suffix = ".jpg"; | |
public static final int DEFAULT_DPI =; | |
public boolean isSupport(String s) { | |
return "pdfimage".equals(s); | |
} | |
public String convert(String pathName,String dirAndFileName) throws Exception { | |
String outPath = dirAndFileName + suffix; | |
if(Files.exists(Paths.get(outPath))){ | |
throw new RuntimeException(outPath+" 文件已存在"); | |
} | |
pdfmultiImage(pathName,outPath,DEFAULT_DPI); | |
return outPath; | |
} | |
/** | |
* pdf转图片 | |
* 多页PDF会每页转换为一张图片,下面会有多页组合成一页的方法 | |
* | |
* @param pdfFile pdf文件路径 | |
* @param outPath 图片输出路径 | |
* @param dpi 相当于图片的分辨率,值越大越清晰,但是转换时间变长 | |
*/ | |
public void pdfmultiImage(String pdfFile, String outPath, int dpi) { | |
if (dpi <=) { | |
// 如果没有设置DPI,默认设置为 | |
dpi = DEFAULT_DPI; | |
} | |
try (PDDocument pdf = PDDocument.load(new FileInputStream(pdfFile))) { | |
int actSize = pdf.getNumberOfPages(); | |
List<BufferedImage> picList = new ArrayList<>(); | |
for (int i =; i < actSize; i++) { | |
BufferedImage image = new PDFRenderer(pdf).renderImageWithDPI(i, dpi, ImageType.RGB); | |
picList.add(image); | |
} | |
// 组合图片 | |
ImageUtil.yPic(picList, outPath); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
} |
PDF转word实现
public class PDFWord implements FileConversion { | |
private String suffix = ".doc"; | |
public boolean isSupport(String s) { | |
return "pdfword".equals(s); | |
} | |
/** | |
* | |
* @param pathName | |
* @throws IOException | |
*/ | |
public String convert(String pathName,String dirAndFileName) throws Exception { | |
String outPath = dirAndFileName + suffix; | |
if(Files.exists(Paths.get(outPath))){ | |
throw new RuntimeException(outPath+" 文件已存在"); | |
} | |
pdfword(pathName, outPath); | |
return outPath; | |
} | |
private void pdfword(String pathName, String outPath) throws IOException { | |
PDDocument doc = PDDocument.load(new File(pathName)); | |
int pagenumber = doc.getNumberOfPages(); | |
// 创建文件 | |
createFile(Paths.get(outPath)); | |
FileOutputStream fos = new FileOutputStream(outPath); | |
Writer writer = new OutputStreamWriter(fos, "UTF-"); | |
PDFTextStripper stripper = new PDFTextStripper(); | |
stripper.setSortByPosition(true);//排序 | |
stripper.setStartPage();//设置转换的开始页 | |
stripper.setEndPage(pagenumber);//设置转换的结束页 | |
stripper.writeText(doc, writer); | |
writer.close(); | |
doc.close(); | |
} | |
} |
word转html
public class WordHTML implements FileConversion{ | |
private String suffix = ".html"; | |
public boolean isSupport(String s) { | |
return "wordhtml".equals(s); | |
} | |
public String convert(String pathName, String dirAndFileName) { | |
String outPath = dirAndFileName + suffix; | |
if(Files.exists(Paths.get(outPath))){ | |
throw new RuntimeException(outPath+" 文件已存在"); | |
} | |
Document doc = new Document(); | |
doc.loadFromFile(pathName); | |
doc.saveToFile(outPath, FileFormat.Html); | |
doc.dispose(); | |
return outPath; | |
} | |
} |
word转图片
public class WordImage implements FileConversion{ | |
private String suffix = ".jpg"; | |
public boolean isSupport(String s) { | |
return "wordimage".equals(s); | |
} | |
public String convert(String pathName, String dirAndFileName) throws Exception { | |
String outPath = dirAndFileName + suffix; | |
if(Files.exists(Paths.get(outPath))){ | |
throw new RuntimeException(outPath+" 文件已存在"); | |
} | |
Document doc = new Document(); | |
//加载文件 | |
doc.loadFromFile(pathName); | |
//上传文档页数,也是最后要生成的图片数 | |
Integer pageCount = doc.getPageCount(); | |
// 参数第一个和第三个都写死 第二个参数就是生成图片数 | |
BufferedImage[] image = doc.saveToImages(, pageCount, ImageType.Bitmap); | |
// 组合图片 | |
List<BufferedImage> imageList = Arrays.asList(image); | |
ImageUtil.yPic(imageList, outPath); | |
return outPath; | |
} | |
} |
word转pdf
public class WordPDF implements FileConversion{ | |
private String suffix = ".pdf"; | |
public boolean isSupport(String s) { | |
return "wordpdf".equals(s); | |
} | |
public String convert(String pathName, String dirAndFileName) throws Exception { | |
String outPath = dirAndFileName + suffix; | |
if(Files.exists(Paths.get(outPath))){ | |
throw new RuntimeException(outPath+" 文件已存在"); | |
} | |
//加载word | |
Document document = new Document(); | |
document.loadFromFile(pathName, FileFormat.Docx); | |
//保存结果文件 | |
document.saveToFile(outPath, FileFormat.PDF); | |
document.close(); | |
return outPath; | |
} | |
} |
使用
输入转换方法,文件路径,输出路径(输出路径如果输入'null'则为文件同目录下同名不同后缀文件)
转换方法可选项:
- pdf2word
- pdf2image
- word2html
- word2image
- word2pdf
例如输入:
pdfword D:\test\testpdf.pdf null
控制台输出:
转换方法: pdfword 文件: D:\test\testFile.pdf | |
转换成功!文件路径: D:\test\testFile.doc |