RTF转HTML(<div>标签)格式的方法(java)
最近有一个将RTF格式的文件转换成HTML格式的需求,网上搜索发现相关资料比较少,能找到的一些资料也年代比较久远。经过一番摸索和测试,终于成功的将RTF转成了HTML(主要以div标签为主),并且解决了烦人的中文乱码问题。但是很遗憾,目前RTF文件里面的表格和图片还无法转换(没有找到方案)。
1、首先,我们需要先借助WebCAT里面的RTF2HTML这个类,WebCAT的下载地址为:http://webcat.sourceforge.net/或者https://download.csdn.net/download/Rookie_cc/12657315。(你也可以直接参考下面的代码,不用下载WebCAT)
2、具体代码如下:
RTF2HTML工具类:
package com.fish.fileparser.utils;import java.awt.Color;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.StringTokenizer;import javax.swing.text.AttributeSet;
import javax.swing.text.BadLocationException;
import javax.swing.text.DefaultStyledDocument;
import javax.swing.text.Document;
import javax.swing.text.Element;
import javax.swing.text.StyleConstants;
import javax.swing.text.rtf.RTFEditorKit;/*** Description of the Class**@author bmartins*@created 22 de Agosto de 2002*/
public class RTF2HTML {/*** rtf文件表格标识*/public static final String TABLE = "\\trowd";/*** rtf文件图片标识*/public static final String PICTURE = "\\*\\shppic";/*** Description of the Class**@author bmartins*@created 22 de Agosto de 2002*/private class HTMLStateMachine {private String alignNames[] = { "left", "center", "right" };/*** Description of the Field*/public boolean acceptFonts;private String fontName;private Color color;private int size;private int alignment;private boolean bold;private boolean italic;private boolean underline;private double firstLineIndent;private double oldLeftIndent;private double oldRightIndent;private double leftIndent;private double rightIndent;private boolean firstLine;/*** Constructor for the HTMLStateMachine object*/HTMLStateMachine() {acceptFonts = true;fontName = "";alignment = -1;bold = false;italic = false;underline = false;color = null;size = -1;firstLineIndent = 0.0D;oldLeftIndent = 0.0D;oldRightIndent = 0.0D;leftIndent = 0.0D;rightIndent = 0.0D;firstLine = false;}/*** Description of the Method**@param attributeset Description of the Parameter*@param stringbuffer Description of the Parameter*@param element Description of the Parameter*/public void updateState(AttributeSet attributeset,StringBuffer stringbuffer,Element element) {String s = element.getName();if (s.equalsIgnoreCase("paragraph")) {firstLine = true;}leftIndent =updateDouble(attributeset,leftIndent,StyleConstants.LeftIndent);rightIndent =updateDouble(attributeset,rightIndent,StyleConstants.RightIndent);if (leftIndent != oldLeftIndent || rightIndent != oldRightIndent) {closeIndentTable(stringbuffer, oldLeftIndent, oldRightIndent);}bold =updateBoolean(attributeset,StyleConstants.Bold,"b",bold,stringbuffer);italic =updateBoolean(attributeset,StyleConstants.Italic,"i",italic,stringbuffer);underline =updateBoolean(attributeset,StyleConstants.Underline,"u",underline,stringbuffer);size = updateFontSize(attributeset, size, stringbuffer);color = updateFontColor(attributeset, color, stringbuffer);if (acceptFonts) {fontName = updateFontName(attributeset, fontName, stringbuffer);}alignment = updateAlignment(attributeset, alignment, stringbuffer);firstLineIndent =updateDouble(attributeset,firstLineIndent,StyleConstants.FirstLineIndent);if (leftIndent != oldLeftIndent || rightIndent != oldRightIndent) {openIndentTable(stringbuffer, leftIndent, rightIndent);oldLeftIndent = leftIndent;oldRightIndent = rightIndent;}}/*** Description of the Method**@param stringbuffer Description of the Parameter*@param d Description of the Parameter*@param d1 Description of the Parameter*/private void openIndentTable(StringBuffer stringbuffer,double d,double d1) {if (d != 0.0D || d1 != 0.0D) {closeSubsetTags(stringbuffer);stringbuffer.append("<table><tr>");String s = getSpaceTab((int) (d / 4D));if (s.length() > 0) {stringbuffer.append("<td>" + s + "</td>");}stringbuffer.append("<td>");}}/*** Description of the Method**@param stringbuffer Description of the Parameter*@param d Description of the Parameter*@param d1 Description of the Parameter*/private void closeIndentTable(StringBuffer stringbuffer,double d,double d1) {if (d != 0.0D || d1 != 0.0D) {closeSubsetTags(stringbuffer);stringbuffer.append("</td>");String s = getSpaceTab((int) (d1 / 4D));if (s.length() > 0) {stringbuffer.append("<td>" + s + "</td>");}stringbuffer.append("</tr></table>");}}/*** Description of the Method**@param stringbuffer Description of the Parameter*/public void closeTags(StringBuffer stringbuffer) {closeSubsetTags(stringbuffer);closeTag(alignment, -1, "div", stringbuffer);alignment = -1;closeIndentTable(stringbuffer, oldLeftIndent, oldRightIndent);}/*** Description of the Method**@param stringbuffer Description of the Parameter*/private void closeSubsetTags(StringBuffer stringbuffer) {closeTag(bold, "b", stringbuffer);closeTag(italic, "i", stringbuffer);closeTag(underline, "u", stringbuffer);closeTag(color, "font", stringbuffer);closeTag(fontName, "font", stringbuffer);closeTag(size, -1, "font", stringbuffer);bold = false;italic = false;underline = false;color = null;fontName = "";size = -1;}/*** Description of the Method**@param flag Description of the Parameter*@param s Description of the Parameter*@param stringbuffer Description of the Parameter*/private void closeTag(boolean flag,String s,StringBuffer stringbuffer) {if (flag) {stringbuffer.append("</" + s + ">");}}/*** Description of the Method**@param color1 Description of the Parameter*@param s Description of the Parameter*@param stringbuffer Description of the Parameter*/private void closeTag(Color color1,String s,StringBuffer stringbuffer) {if (color1 != null) {stringbuffer.append("</" + s + ">");}}/*** Description of the Method**@param s Description of the Parameter*@param s1 Description of the Parameter*@param stringbuffer Description of the Parameter*/private void closeTag(String s, String s1, StringBuffer stringbuffer) {if (s.length() > 0) {stringbuffer.append("</" + s1 + ">");}}/*** Description of the Method**@param i Description of the Parameter*@param j Description of the Parameter*@param s Description of the Parameter*@param stringbuffer Description of the Parameter*/private void closeTag(int i,int j,String s,StringBuffer stringbuffer) {if (i > j) {stringbuffer.append("</" + s + ">");}}/*** Description of the Method**@param attributeset Description of the Parameter*@param k Description of the Parameter*@param stringbuffer Description of the Parameter*@return Description of the Return Value*/private int updateAlignment(AttributeSet attributeset,int k,StringBuffer stringbuffer) {int i = k;Object obj = attributeset.getAttribute(StyleConstants.Alignment);if (obj == null)return i;int j = ((Integer) obj).intValue();if (j == 3) {j = 0;}if (j != i && j >= 0 && j <= 2) {if (i > -1) {stringbuffer.append("</div>");}stringbuffer.append("<div align=\"" + alignNames[j] + "\">");i = j;}return i;}/*** Description of the Method**@param attributeset Description of the Parameter*@param color3 Description of the Parameter*@param stringbuffer Description of the Parameter*@return Description of the Return Value*/private Color updateFontColor(AttributeSet attributeset,Color color3,StringBuffer stringbuffer) {Color color1 = color3;Object obj = attributeset.getAttribute(StyleConstants.Foreground);if (obj == null)return color1;Color color2 = (Color) obj;if (color2 != color1) {if (color1 != null) {stringbuffer.append("</font>");}if (color2 != null) {stringbuffer.append("<font color=\"#" + makeColorString(color2) + "\">");}}color1 = color2;return color1;}/*** Description of the Method**@param attributeset Description of the Parameter*@param s2 Description of the Parameter*@param stringbuffer Description of the Parameter*@return Description of the Return Value*/private String updateFontName(AttributeSet attributeset,String s2,StringBuffer stringbuffer) {String s = s2;Object obj = attributeset.getAttribute(StyleConstants.FontFamily);if (obj == null)return s;String s1 = (String) obj;if (!s1.equals(s)) {if (!s.equals("")) {stringbuffer.append("</font>");}stringbuffer.append("<font face=\"" + s1 + "\">");}s = s1;return s;}/*** Description of the Method**@param attributeset Description of the Parameter*@param d2 Description of the Parameter*@param obj Description of the Parameter*@return Description of the Return Value*/private double updateDouble(AttributeSet attributeset,double d2,Object obj) {double d = d2;Object obj1 = attributeset.getAttribute(obj);if (obj1 != null) {d = ((Float) obj1).floatValue();}return d;}/*** Description of the Method**@param attributeset Description of the Parameter*@param k Description of the Parameter*@param stringbuffer Description of the Parameter*@return Description of the Return Value*/private int updateFontSize(AttributeSet attributeset,int k,StringBuffer stringbuffer) {int i = k;Object obj = attributeset.getAttribute(StyleConstants.FontSize);if (obj == null)return i;int j = ((Integer) obj).intValue();if (j != i) {if (i != -1) {stringbuffer.append("</font>");}stringbuffer.append("<font size=\"" + j / 4 + "\">");}i = j;return i;}/*** Description of the Method**@param attributeset Description of the Parameter*@param obj Description of the Parameter*@param s Description of the Parameter*@param flag2 Description of the Parameter*@param stringbuffer Description of the Parameter*@return Description of the Return Value*/private boolean updateBoolean(AttributeSet attributeset,Object obj,String s,boolean flag2,StringBuffer stringbuffer) {boolean flag = flag2;Object obj1 = attributeset.getAttribute(obj);if (obj1 != null) {boolean flag1 = ((Boolean) obj1).booleanValue();if (flag1 != flag) {if (flag1) {stringbuffer.append("<" + s + ">");} else {stringbuffer.append("</" + s + ">");}}flag = flag1;}return flag;}/*** Description of the Method**@param color1 Description of the Parameter*@return Description of the Return Value*/private String makeColorString(Color color1) {String s = Long.toString(color1.getRGB() & 0xffffff, 16);if (s.length() < 6) {StringBuffer stringbuffer = new StringBuffer();for (int i = s.length(); i < 6; i++) {stringbuffer.append("0");}stringbuffer.append(s);s = stringbuffer.toString();}return s;}/*** Description of the Method**@param s2 Description of the Parameter*@return Description of the Return Value*/public String performFirstLineIndent(String s2) {String s = s2;if (firstLine) {if (firstLineIndent != 0.0D) {int i = (int) (firstLineIndent / 4D);s = getSpaceTab(i) + s;}firstLine = false;}return s;}/*** Gets the spaceTab attribute of the HTMLStateMachine object**@param i Description of the Parameter*@return The spaceTab value*/public String getSpaceTab(int i) {StringBuffer stringbuffer = new StringBuffer();for (int j = 0; j < i; j++) {stringbuffer.append(" ");}return stringbuffer.toString();}}/*** Constructor for the RTF2HTML object*/public RTF2HTML() {}private int sizeCount = 0;/*** Description of the Method**@return Description of the Return Value*/public int originalSize() {return sizeCount;}/*** Description of the Method**@param input Description of the Parameter*@return Description of the Return Value*@exception IOException Description of the Exception*/public String convertRTFToHTML(Reader input) throws IOException {BufferedReader strm = new BufferedReader(input);StringBuffer sb = new StringBuffer();int s;while ((s = strm.read()) != -1) {sb.append((char) s);}return convertRTFToHTML(sb.toString());}/*** Description of the Method**@param s4 Description of the Parameter*@return Description of the Return Value*/public String convertRTFToHTML(String s4) {String s2 = s4;sizeCount = s2.length();HTMLStateMachine htmlstatemachine = new HTMLStateMachine();s2 = convertRTFStringToHTML(s2, htmlstatemachine);String s3 = new String("<html><body>");StringTokenizer st = new StringTokenizer(s2);while (st.hasMoreTokens()) {String s = st.nextToken();if (s.startsWith("http://")) {s = "<a href='" + s + "'>" + s + "</a>";}s3 += s + " ";}return s3 + "</body></html>";}/*** Description of the Method**@param input Description of the Parameter*@return Description of the Return Value*@exception IOException Description of the Exception*/public String convertRTFToHTML(InputStream input) throws IOException {BufferedReader strm = new BufferedReader(new InputStreamReader(input));StringBuffer sb = new StringBuffer();int s;while ((s = strm.read()) != -1) {sb.append((char) s);}return convertRTFToHTML(sb.toString());}/*** Description of the Method**@param input Description of the Parameter*@return Description of the Return Value*@exception IOException Description of the Exception*/public String convertRTFToHTML(File input) throws IOException {BufferedReader strm = new BufferedReader(new FileReader(input));StringBuffer sb = new StringBuffer();int s;while ((s = strm.read()) != -1) {sb.append((char) s);}String rtfStr = sb.toString();//判断rtf文件中是否包含表格if (rtfStr.contains(TABLE)) {System.out.println("文件:" + input.getName() + "中包含表格,暂时无法转换!");return null;}//判断rtf文件中是否包含图片if (rtfStr.contains(PICTURE)) {System.out.println("文件:" + input.getName() + "中包含图片,暂时无法转换!");return null;}return convertRTFToHTML(rtfStr);}/*** Description of the Method**@param input Description of the Parameter*@return Description of the Return Value*@exception IOException Description of the Exception*/public String convertRTFToHTML(URL input) throws IOException {HttpURLConnection conn = (HttpURLConnection) input.openConnection();conn.setAllowUserInteraction(false);conn.setRequestProperty("User-agent","Mozilla/4.0 (compatible; MSIE 5.5; Windows 98");conn.setInstanceFollowRedirects(true);conn.connect();BufferedReader strm =new BufferedReader(new InputStreamReader(conn.getInputStream()));StringBuffer sb = new StringBuffer();int s;while ((s = strm.read()) != -1) {sb.append((char) s);}return convertRTFToHTML(sb.toString());}/*** Description of the Method**@param s Description of the Parameter*@return Description of the Return Value*/public String convertRTFStringToHTML(String s) {HTMLStateMachine htmlstatemachine = new HTMLStateMachine();RTFEditorKit rtfeditorkit = new RTFEditorKit();DefaultStyledDocument defaultstyleddocument =new DefaultStyledDocument();readString(s, defaultstyleddocument, rtfeditorkit);return scanDocument(defaultstyleddocument, htmlstatemachine);}/*** Description of the Method**@param s2 Description of the Parameter*@param htmlstatemachine Description of the Parameter*@return Description of the Return Value*/private String convertRTFStringToHTML(String s2,HTMLStateMachine htmlstatemachine) {String s = s2;RTFEditorKit rtfeditorkit = new RTFEditorKit();DefaultStyledDocument defaultstyleddocument =new DefaultStyledDocument();readString(s, defaultstyleddocument, rtfeditorkit);s = scanDocument(defaultstyleddocument, htmlstatemachine);return s;}/*** Description of the Method**@param s Description of the Parameter*@param document Description of the Parameter*@param rtfeditorkit Description of the Parameter*/private void readString(String s,Document document,RTFEditorKit rtfeditorkit) {try {ByteArrayInputStream bytearrayinputstream =new ByteArrayInputStream(s.getBytes());rtfeditorkit.read(bytearrayinputstream, document, 0);} catch (Exception exception) {return;// exception.printStackTrace();}}/*** Description of the Method**@param document Description of the Parameter*@param htmlstatemachine Description of the Parameter*@return Description of the Return Value*/private String scanDocument(Document document,HTMLStateMachine htmlstatemachine) {String s = "";try {StringBuffer stringbuffer = new StringBuffer();Element element = document.getDefaultRootElement();recurseElements(element, document, stringbuffer, htmlstatemachine);htmlstatemachine.closeTags(stringbuffer);s = stringbuffer.toString();} catch (Exception exception) {return s;// exception.printStackTrace();}return s;}/*** Description of the Method**@param element Description of the Parameter*@param document Description of the Parameter*@param stringbuffer Description of the Parameter*@param htmlstatemachine Description of the Parameter*/private void recurseElements(Element element,Document document,StringBuffer stringbuffer,HTMLStateMachine htmlstatemachine) {for (int i = 0; i < element.getElementCount(); i++) {Element element1 = element.getElement(i);scanAttributes(element1, document, stringbuffer, htmlstatemachine);recurseElements(element1, document, stringbuffer, htmlstatemachine);}}/*** Description of the Method**@param element Description of the Parameter*@param document Description of the Parameter*@param stringbuffer Description of the Parameter*@param htmlstatemachine Description of the Parameter*/private void scanAttributes(Element element,Document document,StringBuffer stringbuffer,HTMLStateMachine htmlstatemachine) {try {int i = element.getStartOffset();int j = element.getEndOffset();String s = document.getText(i, j - i);javax.swing.text.AttributeSet attributeset =element.getAttributes();htmlstatemachine.updateState(attributeset, stringbuffer, element);String s1 = element.getName();if (s1.equalsIgnoreCase("content")) {s = s.replaceAll("\\t", htmlstatemachine.getSpaceTab(8));s = s.replaceAll("\\n", "<br />\n");s = htmlstatemachine.performFirstLineIndent(s);stringbuffer.append(s);}} catch (BadLocationException badlocationexception) {return;// badlocationexception.printStackTrace();}}/*** Description of the Method**@param in Description of the Parameter*@return Description of the Return Value*@exception Exception Description of the Exception*/public InputStream parse(File in) throws Exception {return parse(new FileInputStream(in));}/*** Description of the Method**@param in Description of the Parameter*@return Description of the Return Value*@exception Exception Description of the Exception*/public InputStream parse(URL in) throws Exception {HttpURLConnection conn = (HttpURLConnection) in.openConnection();conn.setAllowUserInteraction(false);conn.setRequestProperty("User-agent", "www.tumba.pt");conn.setInstanceFollowRedirects(true);conn.connect();return parse(conn.getInputStream());}/*** Arguments are: 0. Name of input SWF**@param in Description of the Parameter*@return Description of the Return Value*@exception Exception Description of the Exception*/public InputStream parse(InputStream in) throws Exception {BufferedReader strm = new BufferedReader(new InputStreamReader(in));StringBuffer sb = new StringBuffer();int s;while ((s = strm.read()) != -1) {sb.append((char) s);}String s2 = convertRTFToHTML(sb.toString());return new ByteArrayInputStream(s2.getBytes());}}
测试:
package com.fish.fileparser.utils;import org.apache.commons.lang.StringUtils;import java.io.*;
import java.util.ArrayList;
import java.util.List;public class Test {public static void main(String[] args) {convertRTFToHTML("F:\\rtf", "F:\\html");}/*** RTF转HTML* @param sourceFile 源文件夹(保存需要转换的rft文件)* @param targetFile 目标文件夹(保存转换后的html文件)*/public static void convertRTFToHTML(String sourceFile, String targetFile) {List<String> fileUrls = getFiles(sourceFile);for (String fileUrl : fileUrls) {try {//调用RTF2HTMLString rlt = new RTF2HTML().convertRTFToHTML(new File(fileUrl));if (StringUtils.isBlank(rlt)) {break;}//解决中文乱码问题String context = new String(rlt.getBytes("ISO8859_1"), "GBK");FileWriter fw;String[] fileSplit = fileUrl.split("\\\\");//拼接html文件名称String targetFileName = targetFile + "\\\\" + fileSplit[fileSplit.length - 1].split("\\.")[0] + ".html";File f = new File(targetFileName);try {if(!f.exists()){f.createNewFile();}fw = new FileWriter(f);BufferedWriter out = new BufferedWriter(fw);//将转换好的文件写入out.write(context, 0, context.length()-1);out.close();} catch (IOException e) {e.printStackTrace();}FileWriter w = new FileWriter(targetFile);w.write(context);w.close();} catch (Exception e) {e.printStackTrace();}}}/*** 获取某个目录下所有直接下级文件,不包括目录下的子目录的下的文件*/public static List<String> getFiles(String path) {List<String> files = new ArrayList<>();File file = new File(path);File[] tempList = file.listFiles();for (int i = 0; i < tempList.length; i++) {if (tempList[i].isFile()) {files.add(tempList[i].toString());}}return files;}
}
至此,rtf转html就完成了。
RTF转HTML(<div>标签)格式的方法(java)相关推荐
- html中div不在火狐居中,Firefox嵌套CSS中div标签居中问题解决方法
本文和大家重点讨论一下Firefox嵌套CSS中div标签的居中问题的解决方法,主要包括使用line-height垂直居中,清除容器浮动,不让链接折行,始终让Firefox显示滚动条等内容. Fire ...
- html语言中div的用法,HTML中div 标签的使用方法
HTML中 标签的使用方法 发布时间:2020-11-06 14:25:42 来源:亿速云 阅读:133 作者:小新 这篇文章主要介绍了HTML中 标签的使用方法,具有一定借鉴价值,需要的朋友可以参考 ...
- 如何用div装html,div标签如何使用的
div标签的使用方法:1.div标签可以通过设置class或id来获取样式:2.div标签也可以直接通过内联式的方法获取样式. 本文操作环境:Windows7系统.Dell G3电脑.HTML5&am ...
- html语言div怎么使用,什么是div标签?HTML中div标签怎么使用?
HTML中的div标签是实现网页的重要基础,是学习HTML知识必不可少的内容,本篇文章就来为大家介绍关于HTML中div标签的使用方法. 什么是div标签? div标签表示一组必要的结构.目的是将夹在 ...
- div和div之间画横线_HTML在两个div标签中间画一条竖线的方法
HTML在两个div标签中间画一条竖线的方法 发布时间:2020-09-14 11:34:51 来源:亿速云 阅读:170 作者:小新 这篇文章主要介绍HTML在两个div标签中间画一条竖线的方法,文 ...
- python获取div标签的id_Python 获取div标签中的文字实例
预备知识点 compile 函数 compile 函数用于编译正则表达式,生成一个正则表达式( Pattern )对象,供 match() 和 search() 这两个函数使用. 语法格式为: re. ...
- HTML5清除2个div标签的空白,DIV标签里面IMG图片下方留有空白怎么办
我们很多个人博客网站都会广告位投放一些图片广告,在网页设计中,图片是不可缺少的素材,但是在 div 标签里面放入 img 图片的话,有时候会在图片的下方出现一行空间的区域,如果单纯的图片不醒目或是图片 ...
- html p代码的效果,html元素 p 标签的使用方法及作用
html元素 标签是英文" paragraph"的缩写,是用来定义一个段落文本内容的,有关该标签的使用方法及作用详解如下: 标签的定义 标签是用来定义一段段落文本内容的: 标签定义 ...
- html中td内容不换行显示,html小技巧之td,div标签里内容不换行
html小技巧之td,div标签里内容不换行 在一些页面开发中,除自己操作外,引起换行的情况一般有: Ex一.td标签里内容长度过长引起换行: Ex二.div标签(或其他标记)里内容有文本和图片引起换 ...
- PDF转换word格式的方法总结
PDF转换word格式的方法总结 作者: 善用佳软 日期: 2009-12-09 分类: 1 文本办公, PDF 标签: PDF <全面接触PDF:最好用的PDF软件汇总>中主要介绍了pd ...
最新文章
- Retrofit 网络请求参数注解@Path @Field @Query 等使用
- iOS - Flutter混合开发
- C 标准库 - ctype.h之isalnum使用
- UA MATH564 概率论 Dirichlet分布
- window部署DM8的DEM系统
- [PAT乙级]1039 到底买不买
- UE4 HTC VIVE 多人联机
- Django后端编辑图片提取主要颜色API
- 有史以来最详细 安装部署Kubernetes Dashboard (补充解决官方出现的一些RBAC CERT等问题)
- oracle 导入导出指定表
- Python 模块学习
- 德鲁伊 oltp oltp_内存中OLTP –招待看门狗的三个关键点–检查点文件
- 跟工作选择障碍同学聊一聊现实的问题……
- Atitit 函数调用的原理与本质attilax总结 stdcall cdecl区别
- 硅谷课堂第十二课-公众号点播课程和直播管理模块
- tp6 集成swoole
- Linux挂载新硬盘与格式化数据盘和查看磁盘格式
- Linux 重定向和追加(、 指令)
- 【安全】Shellshock漏洞
- 作为网络推广从业人员每天的工作内容和日程