public class WordExtractor extends Object
| Modifier and Type | Class and Description | 
|---|---|
| static class  | WordExtractor.TagAndStyle | 
| Modifier and Type | Field and Description | 
|---|---|
| protected ParseContext | context | 
| protected OfficeParserConfig | officeParserConfig | 
| protected Metadata | parentMetadata | 
| Constructor and Description | 
|---|
| WordExtractor(ParseContext context,
             Metadata metadata) | 
| Modifier and Type | Method and Description | 
|---|---|
| static WordExtractor.TagAndStyle | buildParagraphTagAndStyle(String styleName,
                         boolean isTable)Given a style name, return what tag should be used, and
 what style should be applied to it. | 
| protected Detector | getDetector() | 
| protected MimeTypes | getMimeTypes()Deprecated. 
 | 
| protected String | getPassword()Returns the password to be used for this file, or null
 if no / default password should be used | 
| protected TikaConfig | getTikaConfig() | 
| protected void | handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
                       String resourceName,
                       XHTMLContentHandler xhtml)Handle an office document that's embedded at the POIFS level | 
| protected void | handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
                       XHTMLContentHandler xhtml)Handle an office document that's embedded at the POIFS level | 
| protected void | handleEmbeddedResource(TikaInputStream resource,
                      Metadata embeddedMetadata,
                      String filename,
                      String relationshipID,
                      org.apache.poi.hpsf.ClassID storageClassID,
                      String mediaType,
                      XHTMLContentHandler xhtml,
                      boolean outputHtml) | 
| protected void | handleEmbeddedResource(TikaInputStream resource,
                      String filename,
                      String relationshipID,
                      org.apache.poi.hpsf.ClassID storageClassID,
                      String mediaType,
                      XHTMLContentHandler xhtml,
                      boolean outputHtml) | 
| protected void | handleEmbeddedResource(TikaInputStream resource,
                      String filename,
                      String relationshipID,
                      String mediaType,
                      XHTMLContentHandler xhtml,
                      boolean outputHtml) | 
| protected void | parse(org.apache.poi.poifs.filesystem.DirectoryNode root,
     XHTMLContentHandler xhtml) | 
| protected void | parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
     XHTMLContentHandler xhtml) | 
| protected void | parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root,
          XHTMLContentHandler xhtml) | 
| protected void | parseWord6(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
          XHTMLContentHandler xhtml) | 
protected final Metadata parentMetadata
protected final OfficeParserConfig officeParserConfig
protected final ParseContext context
public WordExtractor(ParseContext context, Metadata metadata)
public static WordExtractor.TagAndStyle buildParagraphTagAndStyle(String styleName, boolean isTable)
protected void parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
                     XHTMLContentHandler xhtml)
              throws IOException,
                     SAXException,
                     TikaException
IOExceptionSAXExceptionTikaExceptionprotected void parse(org.apache.poi.poifs.filesystem.DirectoryNode root,
                     XHTMLContentHandler xhtml)
              throws IOException,
                     SAXException,
                     TikaException
IOExceptionSAXExceptionTikaExceptionprotected void parseWord6(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
                          XHTMLContentHandler xhtml)
                   throws IOException,
                          SAXException,
                          TikaException
IOExceptionSAXExceptionTikaExceptionprotected void parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root,
                          XHTMLContentHandler xhtml)
                   throws IOException,
                          SAXException
IOExceptionSAXExceptionprotected TikaConfig getTikaConfig()
protected Detector getDetector()
protected MimeTypes getMimeTypes()
embeddedDocumentUtilprotected String getPassword()
protected void handleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException
IOExceptionSAXExceptionTikaExceptionprotected void handleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException
IOExceptionSAXExceptionTikaExceptionprotected void handleEmbeddedResource(TikaInputStream resource, Metadata embeddedMetadata, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException
IOExceptionSAXExceptionTikaExceptionprotected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
                                       XHTMLContentHandler xhtml)
                                throws IOException,
                                       SAXException,
                                       TikaException
IOExceptionSAXExceptionTikaExceptionprotected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
                                       String resourceName,
                                       XHTMLContentHandler xhtml)
                                throws IOException,
                                       SAXException,
                                       TikaException
IOExceptionSAXExceptionTikaExceptionCopyright © 2007–2021 The Apache Software Foundation. All rights reserved.