public class WordExtractor extends Object
Modifier and Type | Class and Description |
---|---|
static class |
WordExtractor.TagAndStyle |
Modifier and Type | Field and Description |
---|---|
protected ParseContext |
context |
protected OfficeParserConfig |
officeParserConfig |
protected Metadata |
parentMetadata |
Constructor and Description |
---|
WordExtractor(ParseContext context,
Metadata metadata) |
Modifier and Type | Method and Description |
---|---|
static WordExtractor.TagAndStyle |
buildParagraphTagAndStyle(String styleName,
boolean isTable)
Given a style name, return what tag should be used, and
what style should be applied to it.
|
protected Detector |
getDetector() |
protected MimeTypes |
getMimeTypes()
Deprecated.
|
protected String |
getPassword()
Returns the password to be used for this file, or null
if no / default password should be used
|
protected TikaConfig |
getTikaConfig() |
protected void |
handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
String resourceName,
XHTMLContentHandler xhtml)
Handle an office document that's embedded at the POIFS level
|
protected void |
handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
XHTMLContentHandler xhtml)
Handle an office document that's embedded at the POIFS level
|
protected void |
handleEmbeddedResource(TikaInputStream resource,
Metadata embeddedMetadata,
String filename,
String relationshipID,
org.apache.poi.hpsf.ClassID storageClassID,
String mediaType,
XHTMLContentHandler xhtml,
boolean outputHtml) |
protected void |
handleEmbeddedResource(TikaInputStream resource,
String filename,
String relationshipID,
org.apache.poi.hpsf.ClassID storageClassID,
String mediaType,
XHTMLContentHandler xhtml,
boolean outputHtml) |
protected void |
handleEmbeddedResource(TikaInputStream resource,
String filename,
String relationshipID,
String mediaType,
XHTMLContentHandler xhtml,
boolean outputHtml) |
protected void |
parse(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml) |
protected void |
parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
XHTMLContentHandler xhtml) |
protected void |
parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml) |
protected void |
parseWord6(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
XHTMLContentHandler xhtml) |
protected final Metadata parentMetadata
protected final OfficeParserConfig officeParserConfig
protected final ParseContext context
public WordExtractor(ParseContext context, Metadata metadata)
public static WordExtractor.TagAndStyle buildParagraphTagAndStyle(String styleName, boolean isTable)
protected void parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected void parse(org.apache.poi.poifs.filesystem.DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected void parseWord6(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected void parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected TikaConfig getTikaConfig()
protected Detector getDetector()
protected MimeTypes getMimeTypes()
embeddedDocumentUtil
protected String getPassword()
protected void handleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected void handleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected void handleEmbeddedResource(TikaInputStream resource, Metadata embeddedMetadata, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, String resourceName, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
Copyright © 2007–2022 The Apache Software Foundation. All rights reserved.