微信或手机浏览器在线显示pdf文件

问题及解决

在之前的文章中提到了如何在手机上显示office文件，这里提一下pdf文件如何在手机上展示。问题发生情况是，公司企业号会从OA系统定时获取信息，并且解析内网文章中的图片、文件，展示在微信中，后台发现程序报错，为什么呢？因为文件的在线预览只处理office软件的，对于pdf文件未处理，导致程序处理时在安卓机的情况下异常，现在使用pdf2html工具进行转换，流程如下：

（苹果设备无需特殊处理，直接通过文件流设置头部信息application/msword、application/msexcel、application/pdf 内置浏览器能够识别）

效果图

pdf2html工具转码出来的html很清晰，如果直接放在手机端高度、宽度太大、样式异常，需要对转码后的html进行格式化后展示，后面的示例码，只是简单的处理展示，需要针对进一步优化。

（其实可以更简单，转码后对html文件进行处理，使用nginx反向代理转码目录，详细页面通过iframe直接连接nginx代理后的url）

示例代码

-------------------从OA获取图片-----------------

/*** 从OA上抓取文件* @return*/public String getFileFromOa(){   HttpServletRequest req = ServletActionContext.getRequest();String userAgent=req.getHeader("User-Agent");//里面包含了设备类型//获得文件地址String fileUrl = ServletActionContext.getRequest().getParameter("fileUrl");fileUrl.replaceAll("%2B", "\\+");//转换加号String fileTypeTemp=fileUrl.substring(fileUrl.lastIndexOf(".")+1,fileUrl.length());System.out.println("-----------------   "+fileTypeTemp);//判断是否苹果手机、是否office文件if(-1!=userAgent.indexOf("iPhone")||-1!=fileTypeTemp.indexOf("txt")){//-----------------////此方法需要浏览器自己能够打开，ios可以但是微信andriod版内置浏览器不支持//-----------------////如果是苹果手机fileUrl.replaceAll("%20", "\\+");//转换加号String strURL = MessageUtil.oaUrl+fileUrl;String fileType=strURL.substring(strURL.lastIndexOf(".")+1,strURL.length());//获得图片的数据流try {URL oaUrl = new URL(strURL);HttpURLConnection httpConn = (HttpURLConnection) oaUrl.openConnection();InputStream in = httpConn.getInputStream();//获取输出流HttpServletResponse response = ServletActionContext.getResponse();req.setCharacterEncoding("UTF-8");response.setCharacterEncoding("UTF-8");String name=fileUrl.substring(fileUrl.lastIndexOf("/")+1, fileUrl.length());response.setHeader("Content-Disposition",  "attachment;filename=" +  new String( (name ).getBytes(),  "iso-8859-1"));if("doc".equals(fileType)||"docx".equals(fileType)){response.setContentType("application/msword");}else if("xls".equals(fileType)||"xlsx".equals(fileType)){response.setContentType("application/msexcel"); }else{response.setContentType("application/"+fileType);}OutputStream out = response.getOutputStream();//输出图片信息byte[] bytes = new byte[1024];  int cnt=0;  while ((cnt=in.read(bytes,0,bytes.length)) != -1) {  out.write(bytes, 0, cnt);  }  out.flush();out.close();in.close();} catch (MalformedURLException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}return null;}else{//如果非苹果手机，自己处理文档//获得OAuth2验证String code=req.getParameter("code");String state=req.getParameter("state");//根据code获得人员MessageUtil msgUtil=new MessageUtil();String userId=msgUtil.getUserIdByCode(code);//生成微信js授权String jsapi_ticket=msgUtil.getJsapiTicketFromWx();//签名String url = MessageUtil.webUrl+"/wx/oaNewsMobileAction.do?action=getFileFromOa&fileUrl="+fileUrl;System.out.println(url);Map<String, String> ret = MessageUtil.sign(jsapi_ticket, url);req.setAttribute("str1", ret.get("signature"));req.setAttribute("time", ret.get("timestamp"));req.setAttribute("nonceStr", ret.get("nonceStr"));fileUrl.replaceAll("%2B", "\\+");//转换加号String strURL = MessageUtil.oaUrl+fileUrl;//在本地存放OA文件，然后转换成html，再对文档中的图片路径进行修改，最后输出到页面try {URL oaUrl = new URL(strURL);HttpURLConnection httpConn = (HttpURLConnection) oaUrl.openConnection();InputStream in = httpConn.getInputStream();//获取输出流HttpServletResponse response = ServletActionContext.getResponse();req.setCharacterEncoding("UTF-8");response.setCharacterEncoding("UTF-8");String name=fileUrl.substring(fileUrl.lastIndexOf("/")+1, fileUrl.length());//首先判断本地是否存在String path=req.getRealPath("");path=path.substring(0, path.lastIndexOf("\\")+1);File htmlFile=new File(path +  "OaFileToHtml\\"+name+".html");if(!htmlFile.exists()){//判断文件夹是否存在，创建文件夹String oaFilePath=path + "OaFile";//存放OA文档的文件夹路径;File oaFiles=new File(oaFilePath);if(!oaFiles.exists()){//如果文件夹不存在创建文件夹oaFiles.mkdirs();}//将OA消息存入本地File oafile=new File(oaFiles+ File.separator +name);OutputStream out = new FileOutputStream(oafile);//输出图片信息byte[] bytes = new byte[1024];  int cnt=0;  while ((cnt=in.read(bytes,0,bytes.length)) != -1) {  out.write(bytes, 0, cnt);  }  out.flush();out.close();in.close();//转换成htmlString htmlFilePath =path + "OaFileToHtml";//OA文件转成html的位置if(-1!=fileTypeTemp.indexOf("pdf")){//如果是pdf文件String htmlcontext = Pdf2htmlEXUtil.pdf2html_oa("D:\\pdf2htmlEX-v1.0\\pdf2htmlEX.exe",oafile.getPath(),htmlFilePath,oafile.getName());req.setAttribute("htmlcontext", htmlcontext);}else{//如果是office文件String htmlcontext=ConvertFileToHtml.toHtmlString(oafile, htmlFilePath);req.setAttribute("htmlcontext", htmlcontext);}}else{//已经存在转换成功的文档StringBuffer htmlSb = new StringBuffer();try {BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(htmlFile),Charset.forName("gb2312")));while (br.ready()) {htmlSb.append(br.readLine());}br.close();// 删除临时文件//htmlFile.delete();} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}// HTML文件字符串String htmlStr = htmlSb.toString();//System.out.println("htmlStr=" + htmlStr);if(-1!=fileTypeTemp.indexOf("pdf")){//如果是pdf文件req.setAttribute("htmlcontext", Pdf2htmlEXUtil.clearFormat(htmlStr,""));}else{//如果是office文件// 返回经过清洁的html文本req.setAttribute("htmlcontext", ConvertFileToHtml.clearFormat(htmlStr, ""));}}} catch (MalformedURLException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}return "lookfile";}}

-------------------pdf2html将pdf转成html文件-----------------

package com.wx.util;import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;public class Pdf2htmlEXUtil {/*** 调用pdf2htmlEX将pdf文件转换为html文件* * @param exeFilePath*            pdf2htmlEX.exe文件路径* @param pdfFile*            pdf文件绝对路径* @param [destDir] 生成的html文件存放路径* @param htmlName*            生成的html文件名称* @return*/public static boolean pdf2html(String exeFilePath, String pdfFile,String destDir, String htmlFileName) {if (!(exeFilePath != null && !"".equals(exeFilePath) && pdfFile != null&& !"".equals(pdfFile) && htmlFileName != null && !"".equals(htmlFileName))) {System.out.println("传递的参数有误！");return false;}Runtime rt = Runtime.getRuntime();StringBuilder command = new StringBuilder();command.append(exeFilePath).append(" ");if (destDir != null && !"".equals(destDir.trim()))// 生成文件存放位置,需要替换文件路径中的空格command.append("--dest-dir ").append(destDir.replace(" ", "\" \"")).append(" ");command.append("--optimize-text 1 ");// 尽量减少用于文本的HTML元素的数目 (default: 0)command.append("--zoom 1.4 ");command.append("--process-outline 0 ");// html中显示链接：0——false，1——truecommand.append("--font-format woff ");// 嵌入html中的字体后缀(default ttf)// ttf,otf,woff,svgcommand.append(pdfFile.replace(" ", "\" \"")).append(" ");// 需要替换文件路径中的空格if (htmlFileName != null && !"".equals(htmlFileName.trim())) {command.append(htmlFileName);if (htmlFileName.indexOf(".html") == -1)command.append(".html");}try {System.out.println("Command：" + command.toString());Process p = rt.exec(command.toString());StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(),"ERROR");// 开启屏幕标准错误流errorGobbler.start();StreamGobbler outGobbler = new StreamGobbler(p.getInputStream(),"STDOUT");// 开启屏幕标准输出流outGobbler.start();int w = p.waitFor();int v = p.exitValue();if (w == 0 && v == 0) {return true;}} catch (Exception e) {e.printStackTrace();}return false;}public static boolean pdf2html_linux(String pdfFile, String destDir,String htmlFileName) {if (!(pdfFile != null && !"".equals(pdfFile) && htmlFileName != null && !"".equals(htmlFileName))) {System.out.println("传递的参数有误！");return false;}Runtime rt = Runtime.getRuntime();StringBuilder command = new StringBuilder();command.append("pdf2htmlEX").append(" ");if (destDir != null && !"".equals(destDir.trim()))// 生成文件存放位置,需要替换文件路径中的空格command.append("--dest-dir ").append(destDir.replace(" ", "\" \"")).append(" ");command.append("--optimize-text 1 ");// 尽量减少用于文本的HTML元素的数目 (default: 0)command.append("--process-outline 0 ");// html中显示链接：0——false，1——truecommand.append("--font-format woff ");// 嵌入html中的字体后缀(default ttf)// ttf,otf,woff,svgcommand.append(pdfFile.replace(" ", "\" \"")).append(" ");// 需要替换文件路径中的空格if (htmlFileName != null && !"".equals(htmlFileName.trim())) {command.append(htmlFileName);if (htmlFileName.indexOf(".html") == -1)command.append(".html");}try {System.out.println("Command：" + command.toString());Process p = rt.exec(command.toString());StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(),"ERROR");// 开启屏幕标准错误流errorGobbler.start();StreamGobbler outGobbler = new StreamGobbler(p.getInputStream(),"STDOUT");// 开启屏幕标准输出流outGobbler.start();int w = p.waitFor();int v = p.exitValue();if (w == 0 && v == 0) {return true;}} catch (Exception e) {e.printStackTrace();}return false;}//转换OA的pdf文件public static String pdf2html_oa(String exeFilePath, String pdfFile,String destDir, String htmlFileName) {boolean flag =pdf2html(exeFilePath,pdfFile,destDir,htmlFileName);//if(true == flag){String htmlFile = destDir+File.separator+htmlFileName;// 获取html文件流StringBuffer htmlSb = new StringBuffer();try {BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(htmlFile),Charset.forName("gb2312")));while (br.ready()) {htmlSb.append(br.readLine());}br.close();// 删除临时文件//htmlFile.delete();} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}// HTML文件字符串String htmlStr = htmlSb.toString();//System.out.println("htmlStr=" + htmlStr);// 返回经过清洁的html文本return Pdf2htmlEXUtil.clearFormat(htmlStr,"");}else{return "";}}/*** * 清除pdf中一些不需要的html标记* * * * @param htmlStr* *            带有复杂html标记的html语句* * @return 去除了不需要html标记的语句*/public static String clearFormat(String htmlStr, String docImgPath) {//      htmlStr = htmlStr.replaceFirst("<BODY", "<DIV style='width:100%' ").replaceAll("</BODY>", "</DIV>");
//      htmlStr = htmlStr.replaceFirst("<body", "<div style='width:100%' ").replaceAll("</body>", "</div>");
//      htmlStr = htmlStr.replaceFirst("<img", "<img style='width:100%' ").replaceAll("</img>", "</img>");
//      htmlStr = htmlStr.replaceAll("<!DOCTYPE html>", "").replaceAll("<html>", "").replaceAll("</html>", "");
//      htmlStr = htmlStr.replaceAll("<head>", "").replaceAll("</head>", "");
//      htmlStr = htmlStr.replaceAll("<meta[\\s\\S]*>","");
//      htmlStr = htmlStr.replaceAll("<title[^)]*>","");// 获取body内容的正则String bodyReg = "<body .*</body>";Pattern bodyPattern = Pattern.compile(bodyReg);Matcher bodyMatcher = bodyPattern.matcher(htmlStr);if (bodyMatcher.find()) {// 获取BODY内容，并转化BODY标签为DIVhtmlStr = bodyMatcher.group().replaceFirst("<body", "<DIV").replaceAll("</body>", "</DIV>");}htmlStr = htmlStr.replaceAll("<img", "<img style='width:100vw;height:100vh' ");
//
//      // 把<P></P>转换成</div></div>保留样式
//      // content = content.replaceAll("(<P)([^>]*>.*?)(<\\/P>)",
//      // "<div$2</div>");
//      // 把<P></P>转换成</div></div>并删除样式
//      htmlStr = htmlStr.replaceAll("(<P)([^>]*)(>.*?)(<\\/P>)", "<p$3</p>");
//      // 删除不需要的标签
//      htmlStr = htmlStr.replaceAll("<[/]?(font|FONT|span|SPAN|xml|XML|del|DEL|ins|INS|meta|META|[ovwxpOVWXP]:\\w+)[^>]*?>","");
//      // 删除不需要的属性
//      htmlStr = htmlStr.replaceAll("<([^>]*)(?:lang|LANG|class|CLASS|style|STYLE|size|SIZE|face|FACE|[ovwxpOVWXP]:\\w+)=(?:'[^']*'|\"\"[^\"\"]*\"\"|[^>]+)([^>]*)>","<$1$2>");
//      //处理图片height
//      //htmlStr = htmlStr.replaceAll("(<img[^>]*?)\\s+width\\s*=\\s*\\S+","$1");
//      htmlStr = htmlStr.replaceAll("(<img[^>]*?)\\s+height\\s*=\\s*\\S+","$1");
//      htmlStr = htmlStr.replaceAll("(<IMG[^>]*?)\\s+HEIGHT\\s*=\\s*\\S+","$1"); return htmlStr;}public static void main(String[] args) {//测试转换工具pdf2html("D:\\pdf2htmlEX-v1.0\\pdf2htmlEX.exe","G:\\20181024.pdf","D:\\pdf2htmlEX-v1.0\\HTML","my.html");//测试转换OA文件pdf2html_oa("D:\\pdf2htmlEX-v1.0\\pdf2htmlEX.exe","G:\\20181024.pdf","D:\\pdf2htmlEX-v1.0\\HTML","my.html");}
}

-------------------转码线程，可以不用线程，也可以同步转换-----------------

package com.wx.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintWriter;
/*** * * @author muyunfei* * <p>Modification History:</p> * <p>Date       Author      Description</p>* <p>------------------------------------------------------------------</p>* <p>Oct 25, 2018           牟云飞                新建</p>*/
public class StreamGobbler extends Thread  {InputStream is;String type;OutputStream os;public StreamGobbler(InputStream is, String type) {this(is, type, null);}StreamGobbler(InputStream is, String type, OutputStream redirect) {this.is = is;this.type = type;this.os = redirect;}public void run() {InputStreamReader isr = null;BufferedReader br = null;PrintWriter pw = null;try {if (os != null)pw = new PrintWriter(os);isr = new InputStreamReader(is);br = new BufferedReader(isr);String line = null;while ((line = br.readLine()) != null) {if (pw != null)pw.println(line);System.out.println(type + ">" + line);}if (pw != null)pw.flush();} catch (IOException ioe) {ioe.printStackTrace();} finally {try {if (pw != null)pw.close();if (br != null)br.close();if (isr != null)isr.close();} catch (IOException e) {e.printStackTrace();}}}
}