Archive | February, 2015

Extract Text from docx,pptx,xlsx using Apache POI 3.9

25 Feb

Java Tutorial and Code Solution

import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import javax.xml.parsers.ParserConfigurationException; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; import org.apache.poi.xssf.extractor.XSSFExcelExtractor; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.xmlbeans.XmlException; import org.xml.sax.SAXException; public class FileParser{ /** * This method parses the .docx files. * * @param docx * @throws FileNotFoundException * @throws IOException * @throws XmlException * @throws InvalidFormatException * @throws OpenXML4JException * @throws ParserConfigurationException * @throws SAXException */ public void DocFileContentParser(OPCPackage docx) throws FileNotFoundException, IOException, XmlException, InvalidFormatException, OpenXML4JException, ParserConfigurationException, SAXException { XWPFWordExtractor xw = new XWPFWordExtractor(docx); System.out.println(xw.getText()); } /** * This method parses the pptx files * * @param pptx * @throws FileNotFoundException * @throws IOException * @throws InvalidFormatException * @throws XmlException * @throws OpenXML4JException */ public void ppFileContentParser(OPCPackage pptx) throws FileNotFoundException, IOException, InvalidFormatException, XmlException, OpenXML4JException { XSLFPowerPointExtractor xw = new XSLFPowerPointExtractor(pptx); System.out.println(xw.getText()); } /** * This method parsed xlsx files * * @param xlsx * @throws FileNotFoundException * @throws IOException * @throws InvalidFormatException *…

View original post 118 more words