引入jar包

</dependencies><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>3.15</version></dependency>    <dependency><groupId>com.syncthemall</groupId><artifactId>boilerpipe</artifactId><version>1.2.1</version></dependency></dependencies>

关键代码工具类


import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;import org.apache.poi.xssf.model.SharedStringsTable;
import org.springframework.stereotype.Component;
import org.xml.sax.helpers.DefaultHandler;import java.io.InputStream;import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;@Component
public class ExcelUtil extends DefaultHandler {private SharedStringsTable sst;private String lastContents;private boolean nextIsString;private int sheetIndex = -1;//存储某一行的数据private List<String> rowlist = new ArrayList<String>();private int curRow = 0;private int curCol = 0;private String col = "";//存储某个sheet的数据private List<List<String>> list = new ArrayList<>();//存储整个excel的数据private List<List<List<String>>> data;public static void main(String[] args) throws Exception {new ExcelUtil().process("C:\\Users\\zhang\\Desktop\\1-50蔡晨浩等人案件要素 - 删空白行和列(1).xlsx");}/*** 读取第一个工作簿的入口方法** @param path*/public void readOneSheet(String path) throws Exception {OPCPackage pkg = OPCPackage.open(path);XSSFReader r = new XSSFReader(pkg);SharedStringsTable sst = r.getSharedStringsTable();XMLReader parser = fetchSheetParser(sst);InputStream sheet = r.getSheet("rId1");InputSource sheetSource = new InputSource(sheet);parser.parse(sheetSource);sheet.close();}/*** 读取所有工作簿的入口方法** @param path* @throws Exception*/public List<List<List<String>>> process(String path) throws Exception {data = new ArrayList<>();OPCPackage pkg = OPCPackage.open(path);XSSFReader r = new XSSFReader(pkg);SharedStringsTable sst = r.getSharedStringsTable();XMLReader parser = fetchSheetParser(sst);Iterator<InputStream> sheets = r.getSheetsData();while (sheets.hasNext()) {
//            System.out.println("换了个sheet=========================================================");curRow = 0;sheetIndex++;InputStream sheet = sheets.next();InputSource sheetSource = new InputSource(sheet);parser.parse(sheetSource);data.add(list);list = new ArrayList<>();sheet.close();}return data;}/*** 该方法自动被调用,每读一行调用一次,在方法中写自己的业务逻辑即可** @param sheetIndex 工作簿序号* @param curRow     处理到第几行* @param rowList    当前数据行的数据集合*/public void optRow(int sheetIndex, int curRow, List<String> rowList) {list.add(rowList);String temp = "";for (String str : rowList) {temp += str + "_";}
//        System.out.println("最终值:"+temp);   }public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");this.sst = sst;parser.setContentHandler(this);return parser;}public void startElement(String uri, String localName, String name,Attributes attributes) throws SAXException {// c => 单元格  if (name.equals("c")) {col = attributes.getValue("r");// 如果下一个元素是 SST 的索引,则将nextIsString标记为true  String cellType = attributes.getValue("t");if (cellType != null && cellType.equals("s")) {nextIsString = true;} else {nextIsString = false;}}// 置空   lastContents = "";}public void endElement(String uri, String localName, String name)throws SAXException {// 根据SST的索引值的到单元格的真正要存储的字符串  // 这时characters()方法可能会被调用多次  if (nextIsString) {try {int idx = Integer.parseInt(lastContents);lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();nextIsString = false;
//                System.out.println("lastContents值为:"+lastContents);} catch (Exception e) {e.printStackTrace();}}// v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引 // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符  if (name.equals("v")) {String value = lastContents.trim();rowlist.add(curCol, value);curCol++;} else {// 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法  if (name.equals("row")) {optRow(sheetIndex, curRow, rowlist);outMap(list, curRow);rowlist = new ArrayList<>();curRow++;curCol = 0;}}}public void characters(char[] ch, int start, int length)throws SAXException {// 得到单元格内容的值  lastContents += new String(ch, start, length);}/*** 测试输出** @param list* @param row*/private void outMap(List<List<String>> list, int row) {List<String> strings = list.get(row);for (String string : strings) {System.out.println(string);}
//        System.out.println("一行数据结束");}}  

参考文章POI 事件模式解析xlsx_决战屁屁虾的博客-CSDN博客_poi事件模式

后面发现以上代码无法识别出空白的单元格 所以又去查询了多方资料在官方api找到一个可以解决的demo 但poi的版本需要用4.10以上才可以

/* ====================================================================Licensed to the Apache Software Foundation (ASF) under one or morecontributor license agreements.  See the NOTICE file distributed withthis work for additional information regarding copyright ownership.The ASF licenses this file to You under the Apache License, Version 2.0(the "License"); you may not use this file except in compliance withthe License.  You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.
==================================================================== */import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;import javax.xml.parsers.ParserConfigurationException;import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.XMLHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.model.Styles;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;/*** A rudimentary XLSX -&gt; CSV processor modeled on the* POI sample program XLS2CSVmra from the package* org.apache.poi.hssf.eventusermodel.examples.* As with the HSSF version, this tries to spot missing*  rows and cells, and output empty entries for them.* <p>* Data sheets are read using a SAX parser to keep the* memory footprint relatively small, so this should be* able to read enormous workbooks.  The styles table and* the shared-string table must be kept in memory.  The* standard POI styles table class is used, but a custom* (read-only) class is used for the shared string table* because the standard POI SharedStringsTable grows very* quickly with the number of unique strings.* <p>* For a more advanced implementation of SAX event parsing* of XLSX files, see {@link XSSFEventBasedExcelExtractor}* and {@link XSSFSheetXMLHandler}. Note that for many cases,* it may be possible to simply use those with a custom* {@link SheetContentsHandler} and no SAX code needed of* your own!*/
@SuppressWarnings({"java:S106","java:S4823","java:S1192"})
public class XLSX2CSV {//存储某个sheet的数据private List<List<String>> list = new ArrayList<>();//存储整个excel的数据private List<List<List<String>>> data = new ArrayList<>();//存储某一行的数据private List<String> rowlist = new ArrayList<String>();/*** Uses the XSSF Event SAX helpers to do most of the work*  of parsing the Sheet XML, and outputs the contents*  as a (basic) CSV.*/private class SheetToCSV implements SheetContentsHandler {private boolean firstCellOfRow;private int currentRow = -1;private int currentCol = -1;private void outputMissingRows(int number) {for (int i=0; i<number; i++) {for (int j=0; j<minColumns; j++) {output.append(',');}output.append('\n');}}@Overridepublic void startRow(int rowNum) {// If there were gaps, output the missing rowsoutputMissingRows(rowNum-currentRow-1);// Prepare for this rowfirstCellOfRow = true;currentRow = rowNum;currentCol = -1;}@Overridepublic void endRow(int rowNum) {list.add(rowlist);// Ensure the minimum number of columnsfor (int i=currentCol; i<minColumns; i++) {output.append(',');}output.append('\n');}@Overridepublic void cell(String cellReference, String formattedValue,XSSFComment comment) {if (firstCellOfRow) {rowlist = new ArrayList<>();firstCellOfRow = false;} else {output.append(',');output.println();}// gracefully handle missing CellRef here in a similar way as XSSFCell doesif(cellReference == null) {cellReference = new CellAddress(currentRow, currentCol).formatAsString();}// Did we miss any cells?int thisCol = (new CellReference(cellReference)).getCol();int missedCols = thisCol - currentCol - 1;for (int i=0; i<missedCols; i++) {output.append(',');rowlist.add("");}output.println();// no need to append anything if we do not have a valueif (formattedValue == null) {return;}currentCol = thisCol;// Number or string?try {//noinspection ResultOfMethodCallIgnoredDouble.parseDouble(formattedValue);rowlist.add(formattedValue);output.append(formattedValue);} catch (Exception e) {// let's remove quotes if they are already thereif (formattedValue.startsWith("\"") && formattedValue.endsWith("\"")) {formattedValue = formattedValue.substring(1, formattedValue.length()-1);}output.append('"');rowlist.add(formattedValue.replace("\"", "\"\""));// encode double-quote with two double-quotes to produce a valid CSV formatoutput.append(formattedValue.replace("\"", "\"\""));output.append('"');}}@Overridepublic void headerFooter(String s, boolean b, String s1) {}}///private final OPCPackage xlsxPackage;/*** Number of columns to read starting with leftmost*/private final int minColumns;/*** Destination for data*/private final PrintStream output;/*** Creates a new XLSX -&gt; CSV converter** @param pkg        The XLSX package to process* @param output     The PrintStream to output the CSV to* @param minColumns The minimum number of columns to output, or -1 for no minimum*/public XLSX2CSV(OPCPackage pkg, PrintStream output, int minColumns) {this.xlsxPackage = pkg;this.output = output;this.minColumns = minColumns;}/*** Parses and shows the content of one sheet* using the specified styles and shared-strings tables.** @param styles The table of styles that may be referenced by cells in the sheet* @param strings The table of strings that may be referenced by cells in the sheet* @param sheetInputStream The stream to read the sheet-data from.* @exception java.io.IOException An IO exception from the parser,*            possibly from a byte stream or character stream*            supplied by the application.* @throws SAXException if parsing the XML data fails.*/public void processSheet(Styles styles,SharedStrings strings,SheetContentsHandler sheetHandler,InputStream sheetInputStream) throws IOException, SAXException {DataFormatter formatter = new DataFormatter();InputSource sheetSource = new InputSource(sheetInputStream);try {XMLReader sheetParser = XMLHelper.newXMLReader();ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false);sheetParser.setContentHandler(handler);sheetParser.parse(sheetSource);} catch(ParserConfigurationException e) {throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());}}/*** Initiates the processing of the XLS workbook file to CSV.** @throws IOException If reading the data from the package fails.* @throws SAXException if parsing the XML data fails.*/public List process() throws IOException, OpenXML4JException, SAXException {ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);StylesTable styles = xssfReader.getStylesTable();XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();int index = 0;while (iter.hasNext()) {try (InputStream stream = iter.next()) {String sheetName = iter.getSheetName();this.output.println();this.output.println(sheetName + " [index=" + index + "]:");processSheet(styles, strings, new SheetToCSV(), stream);data.add(list);list = new ArrayList<>();}++index;}return data;}public static void main(String[] args) throws Exception {File xlsxFile = new File("C:\\Users\\zhang\\Desktop\\1-50蔡晨浩等人案件要素 - 删空白行和列(1).xlsx");if (!xlsxFile.exists()) {System.err.println("Not found or not a file: " + xlsxFile.getPath());return;}int minColumns = -1;if (args.length >= 2)minColumns = Integer.parseInt(args[1]);// The package open is instantaneous, as it should be.try (OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ)) {XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);xlsx2csv.process();}}
}

poi事件模式读取excel相关推荐

  1. POI事件模式读取Excel 2003文件

    一.概述 1. Excel 2003文件(即后缀为xls)是二进制文件,存储结构为复合文档,POI读取xls文件有两种方式 用户模式(usermodel):一次性将xls文件读入到内存,创建dom结构 ...

  2. 解决POI事件驱动模式读取不到Java代码创建的Excel表格数据问题

    场景 使用POI官网上的事件驱动模式的示例方法,读取单sheet单次创建的Excel表格文件(.xlsx),Microsoft Excel和WPS Excel创建的表格文件可以正常读取数据,但是jav ...

  3. POI驱动模式读取Excel2007

    项目需要进行导入优化的时候,因为之前用poi旧版本读取excel时效率比较慢,后来了解的poi的驱动模式后,准备用来改造导入方法.在大批量数据面前效率提升比较明显(几百几千行数据时效率提供微弱) . ...

  4. 使用Apache下poi创建和读取excel文件

    一:使用apache下poi创建excel文档 1 @Test 2 /* 3 * 使用Apache poi创建excel文件 4 */ 5 public void testCreateExcel() ...

  5. 【Apache POI】Java 读取Excel文件

    Excel内容如下: 封装Excel行列字段的Bean: public class Employee {private String name;private String gender;privat ...

  6. 使用POI写入和读取Excel文件中的数据

    一,POI中提供的常用方法 Workbook类提供的方法 createSheet("员工信息表") 创建某张表getSheetAt(0) 根据工作簿索引获取工作表getSheetN ...

  7. 下载poi包+java读取excel

    1.下载POI 打开网站http://poi.apache.org/download.html,选择版本下载. 2.解压完成后 ,把解压出来的所有的xx.jar都放在新的文件夹,然后右键你的eclip ...

  8. Java POI:如何读取Excel单元格值而不是计算公式

    我正在使用Apache POI API从Excel文件中获取值. 除了含有公式的单元格外,一切都很好.实际上,cell.getStringCellValue()返回单元格中使用的公式而不是单元格的值. ...

  9. POI设置和读取excel分组信息,多级分组设置

    一.设置分组信息 1.一级分组信息设置 sheet.groupRow(1, 2); sheet.groupColumn(1, 2); 2.多级分组信息设置 poi中提供的方法是没有级别这个参数,通过查 ...

  10. poi 不自动计算 设置单元格公式_Java POI:如何读取Excel单元格的值而不是计算公式?...

    上面的建议对我来说不起作用cell.getRawValue()返回与AUT上的excel单元格相同的公式,所以写下面的函数,它工作: public void readFormula() throws ...

最新文章

  1. 在线作图|两分钟在线做中性群落模型分析
  2. Spring Security源码分析十三:Spring Security 基于表达式的权限控制
  3. 【每日提高之声明式事物】spring声明式事务 同一类内方法调用事务失效
  4. 查询本机公网ip地址的方法
  5. 计算机 课题学科代码,学科分类与代码表课题.doc
  6. 从数学入手,3招打破机器学习的边界
  7. 状态管理之cookie使用及其限制、session会话
  8. Deformable CNNs论文笔记
  9. inventor2019有无CAE_Inventor2019最新下载_Inventor2019正式版 - 软件帝
  10. vc6 设置静态文本框透明_微信还能这么玩?半透明的微信背景主题用起来!
  11. Android开发笔记(九十三)装饰模式
  12. [NIO]dawn之Task具体解释
  13. vc 星号密码查看方法
  14. 拍牌神器是怎样炼成的(一)--- 键鼠模拟之WinAPI
  15. html ckplayer.swf,让KindEditor支持MP4视频(使用ckplayer播放器)
  16. 深度学习_目标检测_SPP(Spatial Pyramid Pooling)详解
  17. 国家的超级计算机用处,超级计算机是什么,有什么用处?
  18. 致远SPM解决方案之费用管理
  19. Python语言实现龙女仆康娜酱字符飞出
  20. 空间里相片批量导入u盘_多台电脑同步文件还在用 U 盘牵线?GoodSync 帮你一键搞定...

热门文章

  1. 博客群建软件-Google会认为哪些网站是作弊网站?
  2. landesk 卸载_LANDesk软件分发在项目中的深入探索(续2)—客户端已安装应用程序的远程卸载...
  3. 微信公众平台网页授权两次重定向跳转问题
  4. 深圳再当特区,引爆AI万亿市场;阿里·TTS语音合成模型;美团·AI数据库异常监测系统;面向后端开发者的CS课程;前沿论文 | ShowMeAI资讯日报
  5. android自定义通知栏_推送图片
  6. laravel实现短信验证码功能
  7. git 记住账号密码、忽略部分文件、合并分支、将远程分支拉取到本地
  8. 软件工程:PG.SE.PL.PM角色定义
  9. Creator仿超级玛丽小游戏源码分享
  10. [XJTU计算机网络安全与管理]——第八讲密钥管理和分发