1、开始、关闭libreoffice服务;

开始之前同步字体文件时间,是因为创建soffice服务时,服务会检查所需加载的文件的时间,如果其认为时间不符,则其可能会重新加载,耗时较长,因此需事先统一时间。

使用时如果需要多次调用,最后每次调用均开启后关闭,否则libreoffice会创建一个缓存文档并越用越大,处理时间会增加。

classOfficeProcess(object):def __init__(self):self.p=0subprocess.Popen('find /usr/share/fonts | xargs touch -m -t 201801010000.00', shell=True)defstart_office(self):self.p= subprocess.Popen('soffice --pidfile=sof.pid --invisible --accept="socket,host=localhost,port=2002;urp;"', shell=True)whileTrue:try:local_context=uno.getComponentContext()resolver= local_context.getServiceManager().createInstanceWithContext('com.sun.star.bridge.UnoUrlResolver', local_context)resolver.resolve('uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext')returnexcept:print(ts(), "wait for connecting soffice...")time.sleep(1)continuedefstop_office(self):with open("sof.pid", "rb") as f:try:os.kill(int(f.read()), signal.SIGTERM)self.p.wait()except:pass

2、init service manager

local_context =uno.getComponentContext()service_manager=local_context.getServiceManager()resolver= service_manager.createInstanceWithContext('com.sun.star.bridge.UnoUrlResolver', local_context)self.ctx= resolver.resolve('uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext')self.smgr=self.ctx.ServiceManagerself.desktop= self.smgr.createInstanceWithContext('com.sun.star.frame.Desktop', self.ctx)

3、从二进制数据中读取doc文档

defImportFromMemory(self, data):istream= self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream', self.ctx)istream.initialize((uno.ByteSequence(data), ))pv=PropertyValue()pv.Name= 'InputStream'pv.Value=istreamself.doc= {'doc': []}try:self.document= self.desktop.loadComponentFromURL('private:stream/swriter', '_blank', 0, (pv, ))self.text=self.document.getText()except:self.text= None

4、读取doc文档中的数据

defExportToJson(self):try:l= self.__ParseText(self.text, self.__Callback(self.doc['doc']))self.doc['length'] =lexcept:self.doc= {'doc': [], 'length': 0}returnjson.dumps(self.doc)@staticmethoddef __Callback(alist):defAppend(sth):alist.append(sth)return Append

def __ParseText(self, text, func):l=0text_it=text.createEnumeration()whiletext_it.hasMoreElements():element=text_it.nextElement()if element.supportsService('com.sun.star.text.Paragraph'):l+= self.__ParseParagraph(element, func)elif element.supportsService('com.sun.star.text.TextTable'):l+= self.__ParseTable(element, func)else:passreturn l

def __ParseParagraph(self, paragraph, func):p= {'paragraph': []}l=0paragraph_it=paragraph.createEnumeration()whileparagraph_it.hasMoreElements():portion=paragraph_it.nextElement()if portion.TextPortionType == 'Text':l+= self.__ParsePortionText(portion, self.__Callback(p['paragraph']))elif portion.TextPortionType == 'SoftPageBreak':passelif portion.TextPortionType == 'TextField':l+= self.__ParsePortionText(portion, self.__Callback(p['paragraph']))else:l+= self.__ParseTextContent(portion, self.__Callback(p['paragraph']))if hasattr(paragraph, 'createContentEnumeration'):l+= self.__ParseTextContent(paragraph, self.__Callback(p['paragraph']))p['length'] =lfunc(p)returnldef __ParseTextContent(self, textcontent, func):l=0content_it= textcontent.createContentEnumeration('com.sun.star.text.TextContent')whilecontent_it.hasMoreElements():element=content_it.nextElement()if element.supportsService('com.sun.star.text.TextGraphicObject'):l+= self.__ParsePortionGraphic(element, func)elif element.supportsService('com.sun.star.text.TextEmbeddedObject'):passelif element.supportsService('com.sun.star.text.TextFrame'):l+= self.__ParseFrame(element, func)elif element.supportsService('com.sun.star.drawing.GroupShape'):l+= self.__ParseGroup(element, func)else:passreturnldef __ParseFrame(self, frame, func):f= {'frame': []}l= self.__ParseText(frame.getText(), self.__Callback(f['frame']))f['length'] =lfunc(f)returnldef __ParseGroup(self, group, func):l=0for i inrange(group.getCount()):it=group.getByIndex(i)if it.supportsService('com.sun.star.drawing.Text'):l+= self.__ParseFrame(it, func)else:passreturnldef __ParsePortionText(self, portion_text, func):func({'portion': portion_text.String, 'length': len(portion_text.String)})returnlen(portion_text.String)def __ParsePortionGraphic(self, portion_graphic, func):gp= self.smgr.createInstanceWithContext('com.sun.star.graphic.GraphicProvider', self.ctx)stream= self.smgr.createInstanceWithContext('com.sun.star.io.TempFile', self.ctx)pv1=PropertyValue()pv1.Name= 'OutputStream'pv1.Value=streampv2=PropertyValue()pv2.Name= 'MimeType'pv2.Value= 'image/png'gp.storeGraphic(portion_graphic.Graphic, (pv1, pv2))stream.getOutputStream().flush()stream.seek(0)l=stream.getInputStream().available()b= uno.ByteSequence(b'')stream.seek(0)l, b=stream.getInputStream().readBytes(b, l)img= {'image': base64.b64encode(b.value).decode('ascii')}img['height'] =portion_graphic.Heightimg['width'] =portion_graphic.Widthimg['actualheight'] =portion_graphic.ActualSize.Heightimg['actualwidth'] =portion_graphic.ActualSize.Widthimg['croptop'] =portion_graphic.GraphicCrop.Topimg['cropbottom'] =portion_graphic.GraphicCrop.Bottomimg['cropleft'] =portion_graphic.GraphicCrop.Leftimg['cropright'] =portion_graphic.GraphicCrop.Rightimg['length'] =0func(img)return0def __ParseTable(self, table, func):l=0try:matrix= self.__GetTableMatrix(table)seps= self.__GetTableSeparators(table)t={}count=0for ri inmatrix.keys():t[ri]={}for ci inmatrix[ri].keys():t[ri][ci]=dict(matrix[ri][ci])del t[ri][ci]['cell']t[ri][ci]['content'] =[]l+= self.__ParseText(matrix[ri][ci]['cell'], self.__Callback(t[ri][ci]['content']))count+= t[ri][ci]['rowspan'] * t[ri][ci]['colspan']if count != len(t) *len(seps):raise ValueError('count of cells error')func({'table': t, 'row': len(t), 'column': len(seps), 'length': l, 'tableid': self.table_id})self.table_id+= 1except:l=0print('discard wrong table')returnl@staticmethoddef __GetTableSeparators(table):result=[table.TableColumnRelativeSum]for ri inrange(table.getRows().getCount()):result+= [s.Position for s intable.getRows().getByIndex(ri).TableColumnSeparators]result=sorted(set(result))for i in range(len(result) - 1):result[i]+= 1 if result[i] + 1 == result[i + 1] else0returnsorted(set(result))@staticmethoddef __NameToRC(name):r= int(re.sub('[A-Za-z]', '', name)) - 1cstr= re.sub('[0-9]', '', name)c=0for i inrange(len(cstr)):if cstr[i] >= 'A' and cstr[i] <= 'Z':c= c * 52 + ord(cstr[i]) - ord('A')else:c= c * 52 + 26 + ord(cstr[i]) - ord('a')returnr, c@staticmethoddef __GetTableMatrix(table):result={}for name intable.getCellNames():ri, ci= WordToJson.__NameToRC(name)cell=table.getCellByName(name)if ri not inresult:result[ri]={}result[ri][ci]= {'cell': cell, 'rowspan': cell.RowSpan, 'name': name}seps= WordToJson.__GetTableSeparators(table)for ri inresult.keys():sep= [s.Position for s in table.getRows().getByIndex(ri).TableColumnSeparators] +[table.TableColumnRelativeSum]sep=sorted(set(sep))for ci inresult[ri].keys():right= seps.index(sep[ci]) if sep[ci] in seps else seps.index(sep[ci] + 1)left= -1 if ci == 0 else seps.index(sep[ci - 1]) if sep[ci - 1] in seps else seps.index(sep[ci - 1] + 1)result[ri][ci]['colspan'] = right -leftreturn result

5、写doc文档

self.doco = self.desktop.loadComponentFromURL('private:factory/swriter', '_blank', 0, ())self.texto=self.doco.getText()self.cursoro=self.texto.createTextCursor()self.cursoro.ParaBottomMargin= 500

def __WriteText(self, text, texto, cursoro):for it intext:if 'paragraph' init:self.__WriteParagraph(it, texto, cursoro)elif 'image' init:self.__WritePortionGraphic(it, texto, cursoro)elif 'table' init:self.__WriteTable(it, texto, cursoro)def __WriteParagraph(self, paragraph, texto, cursoro):if paragraph['length'] >0:if 'result' inparagraph:for it in paragraph['result']:texto.insertString(cursoro, it['trans_sen'], False)else:texto.insertString(cursoro, paragraph['paragraph'], False)texto.insertControlCharacter(cursoro, ControlCharacter.PARAGRAPH_BREAK, False)def __WritePortionGraphic(self, portion_graphic, texto, cursoro):png_base64= portion_graphic['image']png=base64.b64decode(png_base64)gp= self.smgr.createInstanceWithContext('com.sun.star.graphic.GraphicProvider', self.ctx)istream= self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream', self.ctx)istream.initialize((uno.ByteSequence(png), ))pv=PropertyValue()pv.Name= 'InputStream'pv.Value=istreamactualsize= uno.createUnoStruct('com.sun.star.awt.Size')actualsize.Height= portion_graphic['actualheight'] if 'actualheight' in portion_graphic else portion_graphic['height']actualsize.Width= portion_graphic['actualwidth'] if 'actualwidth' in portion_graphic else portion_graphic['width']graphiccrop= uno.createUnoStruct('com.sun.star.text.GraphicCrop')graphiccrop.Top= portion_graphic['croptop'] if 'croptop' in portion_graphic else0graphiccrop.Bottom= portion_graphic['cropbottom'] if 'cropbottom' in portion_graphic else0graphiccrop.Left= portion_graphic['cropleft'] if 'cropleft' in portion_graphic else0graphiccrop.Right= portion_graphic['cropright'] if 'cropright' in portion_graphic else0image= self.doco.createInstance('com.sun.star.text.TextGraphicObject')image.Surround=NONEimage.Graphic=gp.queryGraphic((pv, ))image.Height= portion_graphic['height']image.Width= portion_graphic['width']image.setPropertyValue('ActualSize', actualsize)image.setPropertyValue('GraphicCrop', graphiccrop)texto.insertTextContent(cursoro, image, False)texto.insertControlCharacter(cursoro, ControlCharacter.PARAGRAPH_BREAK, False)def __WriteTable(self, table, texto, cursoro):tableo= self.doco.createInstance('com.sun.star.text.TextTable')tableo.initialize(table['row'], table['column'])texto.insertTextContent(cursoro, tableo, False)#texto.insertControlCharacter(cursoro, ControlCharacter.PARAGRAPH_BREAK, False)tcursoro = tableo.createCursorByCellName("A1")hitbug=Falseif table['row'] > 1:tcursoro.goDown(1, True)hitbug= tcursoro.getRangeName() == 'A1'for ri in sorted([int(r) for r in table['table'].keys()]):rs= table['table'][str(ri)]for ci in sorted([int(c) for c inrs.keys()]):cell=rs[str(ci)]if hitbug == False and (cell['rowspan'] > 1 or cell['colspan'] > 1):tcursoro.gotoCellByName(cell['name'], False)if cell['rowspan'] > 1:tcursoro.goDown(cell['rowspan'] - 1, True)if cell['colspan'] > 1:tcursoro.goRight(cell['colspan'] - 1, True)tcursoro.mergeRange()ctexto= tableo.getCellByName(cell['name'])if ctexto ==None:continueccursoro=ctexto.createTextCursor()ccursoro.CharWeight=FontWeight.NORMALccursoro.CharWeightAsian=FontWeight.NORMALccursoro.ParaAdjust=LEFTself.__WriteText(cell['content'], ctexto, ccursoro)

6、生成二进制的doc文档数据

        streamo = self.smgr.createInstanceWithContext('com.sun.star.io.Pipe', self.ctx)self.doco.storeToURL('private:stream', (PropertyValue('FilterName', 0, 'MS Word 2007 XML', 0), PropertyValue('OutputStream', 0, streamo, 0)))streamo.flush()_, datao= streamo.readBytes(None, streamo.available())

7、从doc文档数据生成pdf的二进制数据

        streamo = self.smgr.createInstanceWithContext('com.sun.star.io.Pipe', self.ctx)self.doco.storeToURL('private:stream', (PropertyValue('FilterName', 0, 'writer_pdf_Export', 0), PropertyValue('OutputStream', 0, streamo, 0)))streamo.flush()_, datap= streamo.readBytes(None, streamo.available())

8、读取excel二进制数据

  defImportFromMemory(self, data):istream= self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream', self.ctx)istream.initialize((uno.ByteSequence(data), ))pv=PropertyValue()pv.Name= 'InputStream'pv.Value=istreamself.doc= {'doc': []}try:print("before loadComponentFromURL")self.document= self.desktop.loadComponentFromURL('private:stream/scalc', '_blank', 0, (pv, ))self.sheets=self.document.getSheets()print("ImportFromMemory done")except:print("ImportFromMemory failed")self.sheets= None

9、读取excel的文本数据

    defExportToJson(self):try:l= self.__ParseText(self.sheets, self.__Callback(self.doc['doc']))self.doc['length'] =lexcept:self.doc= {'doc': [], 'length': 0}return json.dumps(self.doc)

    def __ParseText(self, sheets, func):l=0sheets_it=sheets.createEnumeration()whilesheets_it.hasMoreElements():element=sheets_it.nextElement()if element.supportsService('com.sun.star.sheet.Spreadsheet'):l+= self.__ParseSpreadsheet(element, func)returnldef __ParseSpreadsheet(self, spreadsheet, func):l=0p= {'spreadsheet': []}visible_cells_it=spreadsheet.queryVisibleCells().getCells().createEnumeration()whilevisible_cells_it.hasMoreElements():cell=visible_cells_it.nextElement()type=cell.getType()if type ==self.EMPTY:print("cell.type==empty")elif type ==self.VALUE:print("cell.type==VALUE", "value=", cell.getValue(), cell.getCellAddress ())elif type ==self.TEXT:print("cell.type==TEXT","content=", cell.getString().encode("UTF-8"), cell.getCellAddress ())l+= self.__ParseCellText(spreadsheet, cell, self.__Callback(p['spreadsheet']))print("__ParseCellText=", p)elif type ==self.FORMULA:print("cell.type==FORMULA", "formula=", cell.getValue())p['length'] =lfunc(p)returnldef __ParseCellText(self, sheet, cell, func):try:x=cell.getCellAddress().Columny=cell.getCellAddress().Rowsheetname=sheet.getName()except:x= -1y= -1sheetname=Nonefunc({'celltext': cell.getString(), 'x': x, 'y': y, 'sheetname': sheetname, 'length': len(cell.getString())})return len(cell.getString())

     self.EMPTY = uno.Enum("com.sun.star.table.CellContentType", "EMPTY")self.TEXT= uno.Enum("com.sun.star.table.CellContentType", "TEXT")self.FORMULA= uno.Enum("com.sun.star.table.CellContentType", "FORMULA")self.VALUE= uno.Enum("com.sun.star.table.CellContentType", "VALUE")

10、替换excel的文本信息

    defImportFromJson(self, data):doc=json.loads(data)try:self.__WriteText(doc['doc'])except:pass

    def __WriteText(self, text):print("__WriteText begin:", text)sheet=Nonefor it intext:if 'paragraph' in it and 'sheetname' init:if sheet == None or sheet.getName() != it['sheetname']:try:sheet= self.sheets.getByName(it['sheetname'])print("getsheet:", it['sheetname'], "=", sheet.getName())except:sheet=Nonecontinueself.__WriteParagraph(it, sheet)def __WriteParagraph(self, paragraph, sheet):print("__WriteParagraph")if paragraph['length'] >0:try:x= paragraph['x']y= paragraph['y']print("getcell:", x, y)cell=sheet.getCellByPosition(x, y)print("getcell done")except:returnif 'result' inparagraph:for it in paragraph['result']:print("cell=", cell.getString())cell.setString(it['trans_sen'])print("cell,", cell.getString(), ",done")

11、生成excel文档二进制数据

      streamo = self.smgr.createInstanceWithContext('com.sun.star.io.Pipe', self.ctx)self.document.storeToURL('private:stream', (PropertyValue('FilterName', 0, 'Calc MS Excel 2007 XML', 0), PropertyValue('OutputStream', 0, streamo, 0)))streamo.flush()_, datao= streamo.readBytes(None, streamo.available())

12、生成excel的pdf文档

        streamo = self.smgr.createInstanceWithContext('com.sun.star.io.Pipe', self.ctx)self.document.storeToURL('private:stream', (PropertyValue('FilterName', 0, 'calc_pdf_Export', 0), PropertyValue('OutputStream', 0, streamo, 0)))streamo.flush()_, datap= streamo.readBytes(None, streamo.available())

转载于:https://www.cnblogs.com/zl1991/p/10615881.html

libreoffice python 操作word及excel文档相关推荐

  1. libreoffice python_libreoffice python 操作word及excel文档的方法

    1.开始.关闭libreoffice服务: 开始之前同步字体文件时间,是因为创建soffice服务时,服务会检查所需加载的文件的时间,如果其认为时间不符,则其可能会重新加载,耗时较长,因此需事先统一时 ...

  2. PyPDF2--如何使用python操作你的PDF文档

    PyPDF2–如何使用python操作你的PDF文档 前言 大家好!最近想操作一下PDF文档,总是收费,于是浅尝辄止地了解了一下python当中的PyPDF2这个库.借助本篇博客总结了一下个人所学到的 ...

  3. word、excel文档内容更新技术方案

    需求背景 惯例先说下背景. 生产.研发业务上往往使用大量word和excel文档来作为资料载体,如操作规程.控制手册.卡片--,这些文档会反复使用到一些设备.工艺等参数数据.参数属性主要是名称.编码. ...

  4. mac如何用python打开excel,Mac——利用Python读取与写入Excel文档

    Mac--利用Python读取与写入Excel文档 目的:按照自定义的格式写入或读取Excel文档,如标红加粗等 Python代码: import xlwt import pandas as pd d ...

  5. python处理word或者pdf文件_利用python程序生成word和PDF文档的方法

    一.程序导出word文档的方法 将web/html内容导出为world文档,再java中有很多解决方案,比如使用Jacob.Apache POI.Java2Word.iText等各种方式,以及使用fr ...

  6. word插入excel文档显示图标的方法

    描述:word插入excel文档显示图标的方法 步骤: 菜单栏->插入->对象 由文件创建->浏览文件夹 勾选显示为图标->确定 文件就被插入word了

  7. 计算机无法建立word文档,(电脑中右键不能新建word和excel文档怎么办)为何电脑无法新建excle...

    电脑中右键不能新建word和excel文档怎么办 开始,找到运行命,输入regedit,打开注册表. 在左侧找到hkey_classes_root目录,并展开. 首先,我们利用ctrl f 快捷键,查 ...

  8. Word及Excel文档的Python脚本处理

    第一部分. Openpyxl部分: 1. 使用安装openpyxl的正确的指令是: A. pip install openpyxl B. install openpyxl C. setup openp ...

  9. c#获取txt,word,excel文档内容方法

    获取txt文档的内容  1 public string ResumeTxt(string path)  2 {  3    string str = string.Empty;  4         ...

最新文章

  1. Linux 创建子进程执行任务
  2. php自定义扩展函数,Laravel框架中扩展函数、扩展自定义类的方法
  3. http默认超时时间_Bof 是一个HTTP客户端,旨在尽可能方便用户使用
  4. CRI-O将如何把Kubernetes推上容器生态系统的中心位置
  5. DiscuzNT改造-远程内容自动采集-DNT2.5(定时采集、源码下载)
  6. weka: FCBFSearch
  7. 【NOI1995】石子合并
  8. java model 中文乱码,java传值乱码解决方法
  9. 制作.sens数据集跑通bundlefusion
  10. 父子组建传值_浅谈Vue父子组件和非父子组件传值问题
  11. 乐高创意机器人moc_LEGO乐高MOC作品欣赏:超有爱机器人偶E-MOTE
  12. 让sublime编译php、js
  13. Folder and jar
  14. php kindeditor,在PHP使用kindeditor
  15. python使用win32*模块模拟人工操作——城通网盘下载器(一)
  16. c语言温度转换作业帮,C++摄氏度和华氏度互相转化
  17. Linux 下的Bluetooth 架构
  18. 小程序与H5,APP有什么不同-小程序支付开发1
  19. 项目场景:jetson nano conda Illegal instruction (core dumped)
  20. 集成公告|Anima协议上线Moonbeam

热门文章

  1. 微信皮肤css,微信小程序实现皮肤功能(夜间模式)_婳祎_前端开发者
  2. StereoPannerNode
  3. 图解TCPIP-MIME
  4. opencv 轮廓层次结构
  5. requests与bs4编码
  6. Pandas 求余运算
  7. three.js和php,详解three.js本地运行的方法
  8. linux系统下部署go语言环境
  9. 2020年海南大学计算机调剂,2020年海南大学招收调剂生
  10. php注册树模式,PHP设计模式之注册树模式