一、aspose-pdf

转:Java 代码实现pdf转word文件

代码实现主要依赖两个第三方jar包,一个是apache-poi,一个是aspose-pdf。apache-poi包完全开源免费,aspose-pdf免费版生成有水印,且生成数量有限制。单纯用apache-poi实现pdf转word文件的话,实现非常复杂,且样式和原来样式,保持一致的的比例很低。所以,我先用aspose-pdf生成了带水印的docx文件,再用docx文件去除aspose-pdf生成的水印的,最终得到了一个无水印的word文件。

项目远程仓库
aspose-pdf 这个需要配置单独的仓库地址才能下载,不会配置的可以去官网直接下载jar引入项目代码中。

<repositories>
        <repository>
            <id>AsposeJavaAPI</id>
            <name>Aspose Java API</name>
            <url>https://repository.aspose.com/repo/</url>
        </repository>
    </repositories>

Maven项目pom文件依赖
        <!-- https://mvnrepository.com/artifact/com.aspose/aspose-pdf -->

<dependency>
            <groupId>com.aspose</groupId>
            <artifactId>aspose-pdf</artifactId>
            <version>21.8</version>
        </dependency>
        <!-- poi-ooxml是poi的升级版本-->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>4.1.2</version>
        </dependency>

 核心代码实现

import com.aspose.pdf.Document;
import com.aspose.pdf.SaveFormat;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;import java.io.*;
import java.util.List;public class PDFHelper3 {public static void main(String[] args) throws IOException {pdf2doc("test.pdf");}//移除文字水印public static boolean removeWatermark(File file) {try {XWPFDocument doc = new XWPFDocument(new FileInputStream(file));// 段落List<XWPFParagraph> paragraphs = doc.getParagraphs();for (XWPFParagraph paragraph : paragraphs) {String text=paragraph.getText();if("Evaluation Only. Created with Aspose.PDF. Copyright 2002-2021 Aspose Pty Ltd.".equals(text)){List<XWPFRun> runs = paragraph.getRuns();runs.forEach(e-> e.setText("",0));}}FileOutputStream outStream = new FileOutputStream(file);doc.write(outStream);outStream.close();} catch (IOException e) {e.printStackTrace();}return true;}//pdf转doc(目前最大支持21页)public static void pdf2doc(String pdfPath) {long old = System.currentTimeMillis();try {//新建一个pdf文档String wordPath=pdfPath.substring(0,pdfPath.lastIndexOf("."))+".docx";File file = new File(wordPath);FileOutputStream os = new FileOutputStream(file);//Address是将要被转化的word文档Document doc = new Document(pdfPath);//全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF, EPUB, XPS, SWF 相互转换doc.save(os, SaveFormat.DocX);os.close();//去除水印removeWatermark(new File(wordPath));//转化用时long now = System.currentTimeMillis();System.out.println("Pdf 转 Word 共耗时:" + ((now - old) / 1000.0) + "秒");} catch (Exception e) {System.out.println("Pdf 转 Word 失败...");e.printStackTrace();}}//转pptpublic static void pdf2ppt(String pdfPath) {long old = System.currentTimeMillis();try {//新建一个word文档String wordPath = pdfPath.substring(0, pdfPath.lastIndexOf(".")) + ".pptx";FileOutputStream os = new FileOutputStream(wordPath);//doc是将要被转化的word文档Document doc = new Document(pdfPath);//全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF, EPUB, XPS, SWF 相互转换doc.save(os, SaveFormat.Pptx);os.close();//转化用时long now = System.currentTimeMillis();System.out.println("Pdf 转 PPT 共耗时:" + ((now - old) / 1000.0) + "秒");} catch (Exception e) {System.out.println("Pdf 转 PPT 失败...");e.printStackTrace();}}//转excelpublic static void pdf2excel(String pdfPath) {long old = System.currentTimeMillis();try {String wordPath = pdfPath.substring(0, pdfPath.lastIndexOf(".")) + ".xlsx";FileOutputStream os = new FileOutputStream(wordPath);Document doc = new Document(pdfPath);doc.save(os, SaveFormat.Excel);os.close();long now = System.currentTimeMillis();System.out.println("Pdf 转 EXCEL 共耗时:" + ((now - old) / 1000.0) + "秒");} catch (Exception e) {System.out.println("Pdf 转 EXCEL 失败...");e.printStackTrace();}}//转htmlpublic static void pdf2Html(String pdfPath) {long old = System.currentTimeMillis();try {String htmlPath = pdfPath.substring(0, pdfPath.lastIndexOf(".")) + ".html";Document doc = new Document(pdfPath);doc.save(htmlPath, SaveFormat.Html);long now = System.currentTimeMillis();System.out.println("Pdf 转 HTML 共耗时:" + ((now - old) / 1000.0) + "秒");} catch (Exception e) {System.out.println("Pdf 转 HTML 失败...");e.printStackTrace();}}//转图片public static void pdf2image(String pdfPath) {long old = System.currentTimeMillis();try {Resolution resolution = new Resolution(300);String dataDir = pdfPath.substring(0, pdfPath.lastIndexOf("."));File imageDir = new File(dataDir + "_images");if (!imageDir.exists()) {imageDir.mkdirs();}Document doc = new Document(pdfPath);PngDevice pngDevice = new PngDevice(resolution);for (int pageCount = 1; pageCount <= doc.getPages().size(); pageCount++) {OutputStream imageStream = new FileOutputStream(imageDir + "/" + pageCount + ".png");pngDevice.process(doc.getPages().get_Item(pageCount), imageStream);imageStream.close();}long now = System.currentTimeMillis();System.out.println("Pdf 转 PNG 共耗时:" + ((now - old) / 1000.0) + "秒");} catch (Exception e) {System.out.println("Pdf 转 PNG 失败...");e.printStackTrace();}}//转txtpublic static void pdf2txt(String pdfPath) {long old = System.currentTimeMillis();Document pdfDocument = new Document(pdfPath);TextAbsorber ta = new TextAbsorber();ta.visit(pdfDocument);String txtPath = pdfPath.substring(0, pdfPath.lastIndexOf(".")) + ".txt";try {BufferedWriter writer = new BufferedWriter(new FileWriter(txtPath));writer.write(ta.getText());writer.close();long now = System.currentTimeMillis();System.out.println("Pdf 转 TXT 共耗时:" + ((now - old) / 1000.0) + "秒");} catch (IOException e) {System.out.println("Pdf 转 TXT 失败...");e.printStackTrace();}}}

二、spire  Spire.Doc for Java 中文教程

pdf 转 word


import java.io.File;import com.spire.doc.Document;
import com.spire.pdf.FileFormat;
import com.spire.pdf.PdfDocument;
import com.spire.pdf.widget.PdfPageCollection;public class PdfToWord {public static void main(String[] args) {pdftoword("test.pdf");}public static String pdftoword(String srcPath) {boolean result = false;String baseDir = srcPath.substring(0, srcPath.length() - 4);String splitPath = baseDir + "_temp_split" + File.separator;String docPath = baseDir + "_temp_doc" + File.separator;String desPath = baseDir + ".docx";try {// 0、判断输入的是否是pdf文件//第一步:判断输入的是否合法boolean flag = isPDFFile(srcPath);if(flag){//第二步:在输入的路径下新建文件夹boolean flag1 = create(splitPath, docPath);if (flag1) {// 1、加载pdfPdfDocument pdf = new PdfDocument();pdf.loadFromFile(srcPath);PdfPageCollection num = pdf.getPages();// 2、如果pdf的页数小于11,那么直接进行转化if (num.getCount() <= 10) {pdf.saveToFile(desPath, com.spire.pdf.FileFormat.DOCX);}// 3、否则输入的页数比较多,就开始进行切分再转化else {// 第一步:将其进行切分,每页一张pdfpdf.split(splitPath + "test{0}.pdf", 0);// 第二步:将切分的pdf,一个一个进行转换File[] fs = getSplitFiles(splitPath);for (int i = 0; i < fs.length; i++) {PdfDocument sonpdf = new PdfDocument();sonpdf.loadFromFile(fs[i].getAbsolutePath());sonpdf.saveToFile(docPath + fs[i].getName().substring(0, fs[i].getName().length() - 4) + ".docx", FileFormat.DOCX);}//第三步:对转化的doc文档进行合并,合并成一个大的wordtry {result = merge(docPath, desPath);return desPath;} catch (Exception e) {e.printStackTrace();}}}}} catch (Exception e) {e.printStackTrace();} finally {//4、把刚刚缓存的split和doc删除if (result == true) {clearFiles(splitPath);clearFiles(docPath);}}return null;}private static boolean create(String splitPath, String docPath) {File f = new File(splitPath);File f1 = new File(docPath);if (!f.exists()) f.mkdirs();if (!f.exists()) f1.mkdirs();return true;}// 判断是否是pdf文件private static boolean isPDFFile(String srcPath2) {File file = new File(srcPath2);String filename = file.getName();if (filename.endsWith(".pdf")) {return true;}return false;}// 取得某一路径下所有的pdfprivate static File[] getSplitFiles(String path) {File f = new File(path);File[] fs = f.listFiles();if (fs == null) {return null;}return fs;}//删除文件和目录private static void clearFiles(String workspaceRootPath){File file = new File(workspaceRootPath);if(file.exists()){deleteFile(file);}}private static void deleteFile(File file){if(file.isDirectory()){File[] files = file.listFiles();for(int i=0; i<files.length; i++){deleteFile(files[i]);}}file.delete();}private static boolean merge(String docPath, String desPath) {File[] fs = getSplitFiles(docPath);Document document = new Document(docPath + "test0.docx");for (int i = 1; i < fs.length; i++) {document.insertTextFromFile(docPath + "test" + i + ".docx", com.spire.doc.FileFormat.Docx_2013);}//第四步:对合并的doc进行保存2document.saveToFile(desPath);return true;}}

word 转  pdf

public static void wordToPdf() {//实例化Document类的对象Document doc = new Document();//加载Worddoc.loadFromFile("test.docx");//保存为PDF格式doc.saveToFile("test.pdf", com.spire.doc.FileFormat.PDF);}

这个只能转3页

Spire Doc.
Free version converting word documents to PDF files, you can only get the first 3 page of PDF file.
Upgrade to Commercial Edition of Spire.Doc <https://www.e-iceblue.com/Introduce/doc-for-java.html>.

三、Itextpdf

word 转 pdf

先word 转 html 再把 html 转 pdf

word 转 html

 /*** Word03 转为 HTML*/public static String wordToHtml03(String fileName) {if (!(checkFile(fileName, "doc") || checkFile(fileName, "docx"))) {return null;}HWPFDocument wordDoc = null;WordToHtmlConverter wthc = null;try {wordDoc = new HWPFDocument(new FileInputStream(fileName));wthc = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());} catch (IllegalArgumentException e) {return null;} catch (Exception e) {return null;}final String fn = fileName;//html引用图片位置wthc.setPicturesManager(new PicturesManager() {public String savePicture(byte[] bytes, PictureType pt, String str, float f, float f1) {return getImageUrl(fn) + str;}});wthc.processDocument(wordDoc);List<Picture> pics = wordDoc.getPicturesTable().getAllPictures();if (null != pics && pics.size() > 0) {fileExists(getImageSavePath(fileName));for (Picture pic : pics) {try {//生成图片位置pic.writeImageContent(new FileOutputStream(getImageSavePath(fileName) + pic.suggestFullFileName()));} catch (IOException e) {return null;}}}org.w3c.dom.Document htmlDocument = wthc.getDocument();ByteArrayOutputStream out = new ByteArrayOutputStream();DOMSource domSource = new DOMSource(htmlDocument);StreamResult streamResult = new StreamResult(out);try {TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "html");serializer.transform(domSource, streamResult);} catch (TransformerException e) {return null;} finally {}String htmlStr = new String(out.toByteArray());return htmlStr;}/*** Word07 转为 HTML*/@SuppressWarnings("deprecation")public static String wordToHtml07(String fileName) {if (!checkFile(fileName, "docx") && !checkFile(fileName, "doc")) {return null;}//加载html页面时图片路径XHTMLOptions options = XHTMLOptions.create().URIResolver(new BasicURIResolver(getImageUrl(fileName)));//图片保存文件夹路径fileExists(getImageSavePath(fileName));options.setExtractor(new FileImageExtractor(new File(getImageSavePath(fileName))));ByteArrayOutputStream out = null;//读取文档内容XWPFDocument document = null;InputStream in = null;try {in = new FileInputStream(fileName);document = new XWPFDocument(in);out = new ByteArrayOutputStream();//out = new FileOutputStream(new File(outputFile));XHTMLConverter.getInstance().convert(document, out, options);String str = out.toString("utf-8");return str;} catch (NotOfficeXmlFileException e) {return null;} catch (Exception e) {return null;} finally {}}

html 转 pdf

转:java使用itext7实现html转pdf

<!-- itext7html转pdf  -->
<dependency>
    <groupId>com.itextpdf</groupId>
    <artifactId>html2pdf</artifactId>
    <version>3.0.2</version>
</dependency>
<!-- 中文字体支持 -->
<dependency>
    <groupId>com.itextpdf</groupId>
    <artifactId>font-asian</artifactId>
    <version>7.1.13</version>
</dependency>

import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.kernel.events.PdfDocumentEvent;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.layout.font.FontProvider;import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;public class HtmlToPdfUtils {/*** html转pdf** @param  inputStream 输入流* @param  waterMark 水印* @param fontPath 字体路径,ttc后缀的字体需要添加<b>,0<b/>* @param  outputStream 输出流* @date : 2021/1/15 14:07*/public static void convertToPdf(InputStream inputStream, String waterMark, String fontPath, OutputStream outputStream) throws IOException {PdfWriter pdfWriter = new PdfWriter(outputStream);PdfDocument pdfDocument = new PdfDocument(pdfWriter);//设置为A4大小pdfDocument.setDefaultPageSize(PageSize.A4);//添加水印pdfDocument.addEventHandler(PdfDocumentEvent.END_PAGE, new WaterMarkEventHandler(waterMark));//添加中文字体支持ConverterProperties properties = new ConverterProperties();FontProvider fontProvider = new FontProvider();PdfFont sysFont = PdfFontFactory.createFont("STSongStd-Light", "UniGB-UCS2-H", false);fontProvider.addFont(sysFont.getFontProgram(), "UniGB-UCS2-H");//添加自定义字体,例如微软雅黑if (StrUtil.isNotBlank(fontPath)){PdfFont microsoft = PdfFontFactory.createFont(fontPath, PdfEncodings.IDENTITY_H, false);fontProvider.addFont(microsoft.getFontProgram(), PdfEncodings.IDENTITY_H);}properties.setFontProvider(fontProvider);HtmlConverter.convertToPdf(inputStream, pdfDocument, properties);pdfWriter.close();pdfDocument.close();}public static void html2pdf(String htmlFile, String pdfFile) throws Exception {ConverterProperties converterProperties = new ConverterProperties();DefaultFontProvider dfp = new DefaultFontProvider();//添加字体库dfp.addDirectory("C:/Windows/Fonts");converterProperties.setFontProvider(dfp);try (InputStream in = new FileInputStream(new File(htmlFile)); OutputStream out = new FileOutputStream(new File(pdfFile))){HtmlConverter.convertToPdf(in, out, converterProperties);}catch (Exception e){e.printStackTrace();}}}

pdf 转 word

java使用poi、itextpdf将word、ppt转为pdf文件

Pdf 转 word 和 word 转 pdf 等相关推荐

  1. html 实现动态在线预览word、excel、pdf等文件

    word+excle+pdf表格在线浏览 通过iframe直接引用微软提供的方法 <iframe src='https://view.officeapps.live.com/op/view.as ...

  2. 编辑PDF文档,Word 2013可以是您的选择

    题外话:记得刚进公司的时候,几乎所有的培训文档都是PDF.标准文档,公司使用的软件是Adobe Acrobat 5.0(不知道多少钱,呵呵),软件当然是购买正版的:去年,公司购买了新版本的Adobe ...

  3. word转pdf图片模糊怎么办_迅捷PDF转换器如何将word转为长图?word转图片方法

    在日常工作中,我们基本都用word来编辑文档,编辑完成,如果将它保存为其他格式,那可能会出现跑版的情况.为了解决这一问题,我们可以用迅捷PDF转换器将word转长图,这样也方便在手机上阅读.只是很多人 ...

  4. php word excel转pdf文件怎么打开,php office文件(word/excel/ppt)转pdf文件,pptpdf

    php office文件(word/excel/ppt)转pdf文件,pptpdf 把代码放到了github上,点击进入 前阶段有个项目用到了线上预览功能, 关于预览office文件实现核心就是,把o ...

  5. Office word 2007不能另存为pdf格式的解决方法

    Office word 2007不能另存为pdf格式的解决方法 参考文章: (1)Office word 2007不能另存为pdf格式的解决方法 (2)https://www.cnblogs.com/ ...

  6. 转换Word文档为PDF文件

    1.使用 Office COM组件的Microsoft.Office.Interop.word.dll库 该方法需要在电脑上安装Office软件,并且需要Office支持转换为PDF格式,如果不支持, ...

  7. pdf 加深 扫描件_为什么PDF文档不能像Word文档一样随便编辑?如何免费将PDF转换为Word?...

    PDF文档是一种非常难编辑的文档,有时候我们需要编辑或者复制PDF文档里面的内容,把PDF文档转换为Word就是我们最佳的选择.为什么PDF文档这么难编辑?我们怎么免费把PDF文档转换为Word文档? ...

  8. wireshark网络分析就这么简单 pdf_才知道,PDF、Excel、Word互转这么简单?涨知识了...

    才知道,PDF.Excel.Word相互转换这么简单?看完涨知识了 今天又是一期干货分享,关于职场办公中经常遇见的文档格式互换问题,其中包含PDF.Excel.Word这3种常见的格式转换. 相信大家 ...

  9. Java 将 Word 文档转换为 PDF 的完美工具

    点击上方 好好学java ,选择 星标 公众号 重磅资讯.干货,第一时间送达 今日推荐:为什么魂斗罗只有 128 KB却可以实现那么长的剧情?个人原创+1博客:点击前往,查看更多 来源:https:/ ...

  10. 【python自动化办公02】word操作-word转pdf

    点击上方"AI搞事情"关注我们 清明 Python调用Win32com实现Office批量转PDF https://www.cnblogs.com/lolipop2019/p/14 ...

最新文章

  1. 给AI系统做“安全体检”,阿里安全提出自动化AI对抗平台CAA | AAAI 2021
  2. 构建dubbo分布式平台-maven模块规划和平台功能导图
  3. C语言重复加一个数的所有位数字的算法(附完整源码)
  4. tensorflow 标准数据读取 tfrecords
  5. 高级php面试题及部分答案
  6. ubuntu下安装拼音输入法ibus
  7. C++带参数控制台(实例)
  8. opengl png图片 qt_Qt资源文件的格式,并用CMake添加Qt资源文件
  9. 重磅!MobileNetV3 来了!
  10. vba 数组赋值_VBA数组与字典解决方案第31讲:VBA数组声明及赋值后的回填方法
  11. EAI企业应用集成场景及解决方案
  12. 二十四 Redis消息订阅事务持久化
  13. Mac上Jupyter之notebook快捷键
  14. windows 10安装python2.7、python3.6并配置环境变量
  15. Qt在VS2012中引用QtWidgets时报GLES2/gl2.h无法打开错误的解决办法
  16. 画手机原型图工具推荐
  17. PyQt+PyQtWebEngine+Spyder问题小结
  18. 基于python的更换照片底色工具开发
  19. 0欧姆电阻、磁珠及电感的作用
  20. python主函数调用格式_Python的模块与函数

热门文章

  1. 幼儿园案例经验迁移_幼儿生活经验“迁移”讲述的运用指导
  2. 《高等代数学》(姚慕生),例1.5.10
  3. 2018 rust卡石头教程_rust地上的石头怎么捡 | 手游网游页游攻略大全
  4. 谣言检测论文精读——1.IJCAI2016-Detecting Rumors from Microblogs with Recurrent Neural Networks
  5. 浅析Marshmallow在flask中的应用
  6. Android Studio 设置背景色、修改前进后退快捷键
  7. 细胞工程1、2-实验室组成与无菌操作
  8. 你真的了解活跃用户吗?
  9. C#连接服务器超时解决方法
  10. Ubuntu16.04下cuda9.0+cudnn7.0安装指南