public class WordExtractor extends Object
Modifier and Type | Class and Description |
---|---|
static class |
WordExtractor.TagAndStyle |
Constructor and Description |
---|
WordExtractor(ParseContext context) |
Modifier and Type | Method and Description |
---|---|
static WordExtractor.TagAndStyle |
buildParagraphTagAndStyle(String styleName,
boolean isTable)
Given a style name, return what tag should be used, and
what style should be applied to it.
|
protected Detector |
getDetector() |
protected MimeTypes |
getMimeTypes() |
protected String |
getPassword()
Returns the password to be used for this file, or null
if no / default password should be used
|
protected TikaConfig |
getTikaConfig() |
protected void |
handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
XHTMLContentHandler xhtml)
Handle an office document that's embedded at the POIFS level
|
protected void |
handleEmbeddedResource(TikaInputStream resource,
String filename,
String relationshipID,
String mediaType,
XHTMLContentHandler xhtml,
boolean outputHtml) |
protected void |
parse(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml) |
protected void |
parse(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
XHTMLContentHandler xhtml) |
protected void |
parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml) |
protected void |
parseWord6(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
XHTMLContentHandler xhtml) |
public WordExtractor(ParseContext context)
public static WordExtractor.TagAndStyle buildParagraphTagAndStyle(String styleName, boolean isTable)
protected void parse(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected void parse(org.apache.poi.poifs.filesystem.DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected void parseWord6(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected void parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected TikaConfig getTikaConfig()
protected Detector getDetector()
protected MimeTypes getMimeTypes()
protected String getPassword()
protected void handleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException
IOException
SAXException
TikaException
Copyright © 2007-2015 The Apache Software Foundation. All Rights Reserved.