public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor
Modifier and Type | Class and Description |
---|---|
protected static class |
XSSFExcelExtractorDecorator.HeaderFooterFromString |
protected static class |
XSSFExcelExtractorDecorator.SheetTextAsHTML
Turns formatted sheet events into HTML
|
protected static class |
XSSFExcelExtractorDecorator.XSSFSheetInterestingPartsCapturer
Captures information on interesting tags, whilst
delegating the main work to the formatting handler
|
Modifier and Type | Field and Description |
---|---|
protected Map<String,String> |
drawingHyperlinks |
protected org.apache.poi.ss.usermodel.DataFormatter |
formatter |
protected static org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper |
hfHelper
Allows access to headers/footers from raw xml strings
|
protected Metadata |
metadata |
protected ParseContext |
parseContext |
protected List<org.apache.poi.openxml4j.opc.PackagePart> |
sheetParts |
config, EMBEDDED_RELATIONSHIPS, extractor
Constructor and Description |
---|
XSSFExcelExtractorDecorator(ParseContext context,
org.apache.poi.ooxml.extractor.POIXMLTextExtractor extractor,
Locale locale) |
Modifier and Type | Method and Description |
---|---|
protected void |
addDrawingHyperLinks(org.apache.poi.openxml4j.opc.PackagePart sheetPart) |
protected void |
buildXHTML(XHTMLContentHandler xhtml)
Populates the
XHTMLContentHandler object received as parameter. |
protected void |
configureExtractor(org.apache.poi.ooxml.extractor.POIXMLTextExtractor extractor,
Locale locale) |
protected void |
extractHeaderFooter(String hf,
XHTMLContentHandler xhtml) |
protected void |
extractHyperLinks(org.apache.poi.openxml4j.opc.PackagePart sheetPart,
XHTMLContentHandler xhtml) |
protected List<org.apache.poi.openxml4j.opc.PackagePart> |
getMainDocumentParts()
In Excel files, sheets have things embedded in them,
and sheet drawings which have the images
|
void |
getXHTML(ContentHandler handler,
Metadata metadata,
ParseContext context)
Parses the document into a sequence of XHTML SAX events sent to the
given content handler.
|
protected void |
processShapes(List<org.apache.poi.xssf.usermodel.XSSFShape> shapes,
XHTMLContentHandler xhtml) |
void |
processSheet(org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler sheetContentsExtractor,
org.apache.poi.xssf.model.CommentsTable comments,
org.apache.poi.xssf.model.StylesTable styles,
org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable strings,
InputStream sheetInputStream) |
getDocument, getJustFileName, getMetadataExtractor, handleEmbeddedFile, loadLinkedRelationships
protected static org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper hfHelper
protected final org.apache.poi.ss.usermodel.DataFormatter formatter
protected final List<org.apache.poi.openxml4j.opc.PackagePart> sheetParts
protected Metadata metadata
protected ParseContext parseContext
public XSSFExcelExtractorDecorator(ParseContext context, org.apache.poi.ooxml.extractor.POIXMLTextExtractor extractor, Locale locale)
protected void configureExtractor(org.apache.poi.ooxml.extractor.POIXMLTextExtractor extractor, Locale locale)
public void getXHTML(ContentHandler handler, Metadata metadata, ParseContext context) throws SAXException, org.apache.xmlbeans.XmlException, IOException, TikaException
OOXMLExtractor
getXHTML
in interface OOXMLExtractor
getXHTML
in class AbstractOOXMLExtractor
SAXException
org.apache.xmlbeans.XmlException
IOException
TikaException
OOXMLExtractor.getXHTML(ContentHandler, Metadata, ParseContext)
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, org.apache.xmlbeans.XmlException, IOException
AbstractOOXMLExtractor
XHTMLContentHandler
object received as parameter.buildXHTML
in class AbstractOOXMLExtractor
SAXException
org.apache.xmlbeans.XmlException
IOException
XSSFExcelExtractor.getText()
protected void addDrawingHyperLinks(org.apache.poi.openxml4j.opc.PackagePart sheetPart)
protected void extractHyperLinks(org.apache.poi.openxml4j.opc.PackagePart sheetPart, XHTMLContentHandler xhtml) throws SAXException
SAXException
protected void extractHeaderFooter(String hf, XHTMLContentHandler xhtml) throws SAXException
SAXException
protected void processShapes(List<org.apache.poi.xssf.usermodel.XSSFShape> shapes, XHTMLContentHandler xhtml) throws SAXException
SAXException
public void processSheet(org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler sheetContentsExtractor, org.apache.poi.xssf.model.CommentsTable comments, org.apache.poi.xssf.model.StylesTable styles, org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, SAXException
IOException
SAXException
protected List<org.apache.poi.openxml4j.opc.PackagePart> getMainDocumentParts() throws TikaException
getMainDocumentParts
in class AbstractOOXMLExtractor
TikaException
Copyright © 2007–2020 The Apache Software Foundation. All rights reserved.