Package org.apache.tika.parser.microsoft
Class WordExtractor
- java.lang.Object
- 
- org.apache.tika.parser.microsoft.WordExtractor
 
- 
 public class WordExtractor extends Object 
- 
- 
Nested Class SummaryNested Classes Modifier and Type Class Description static classWordExtractor.TagAndStyle
 - 
Field SummaryFields Modifier and Type Field Description protected ParseContextcontextprotected OfficeParserConfigofficeParserConfigprotected MetadataparentMetadata
 - 
Constructor SummaryConstructors Constructor Description WordExtractor(ParseContext context, Metadata metadata)
 - 
Method SummaryAll Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description static WordExtractor.TagAndStylebuildParagraphTagAndStyle(String styleName, boolean isTable)Given a style name, return what tag should be used, and what style should be applied to it.protected DetectorgetDetector()protected StringgetPassword()Returns the password to be used for this file, or null if no / default password should be usedprotected TikaConfiggetTikaConfig()protected voidhandleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, String resourceName, XHTMLContentHandler xhtml, boolean outputHtml)Handle an office document that's embedded at the POIFS levelprotected voidhandleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, XHTMLContentHandler xhtml, boolean outputHtml)Handle an office document that's embedded at the POIFS levelprotected voidhandleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml)protected voidhandleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml)protected voidhandleEmbeddedResource(TikaInputStream resource, Metadata embeddedMetadata, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml)protected voidparse(org.apache.poi.poifs.filesystem.DirectoryNode root, XHTMLContentHandler xhtml)protected voidparse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, XHTMLContentHandler xhtml)protected voidparseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root, XHTMLContentHandler xhtml)protected voidparseWord6(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, XHTMLContentHandler xhtml)static StringtryToGetMsgTitle(org.apache.poi.poifs.filesystem.DirectoryEntry node, String defaultVal)
 
- 
- 
- 
Field Detail- 
parentMetadataprotected final Metadata parentMetadata 
 - 
officeParserConfigprotected final OfficeParserConfig officeParserConfig 
 - 
contextprotected final ParseContext context 
 
- 
 - 
Constructor Detail- 
WordExtractorpublic WordExtractor(ParseContext context, Metadata metadata) 
 
- 
 - 
Method Detail- 
buildParagraphTagAndStylepublic static WordExtractor.TagAndStyle buildParagraphTagAndStyle(String styleName, boolean isTable) Given a style name, return what tag should be used, and what style should be applied to it.
 - 
parseprotected void parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException- Throws:
- IOException
- SAXException
- TikaException
 
 - 
parseprotected void parse(org.apache.poi.poifs.filesystem.DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException- Throws:
- IOException
- SAXException
- TikaException
 
 - 
parseWord6protected void parseWord6(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException- Throws:
- IOException
- SAXException
- TikaException
 
 - 
parseWord6protected void parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException- Throws:
- IOException
- SAXException
 
 - 
getTikaConfigprotected TikaConfig getTikaConfig() 
 - 
getDetectorprotected Detector getDetector() 
 - 
getPasswordprotected String getPassword() Returns the password to be used for this file, or null if no / default password should be used
 - 
handleEmbeddedResourceprotected void handleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException - Throws:
- IOException
- SAXException
- TikaException
 
 - 
handleEmbeddedResourceprotected void handleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException - Throws:
- IOException
- SAXException
- TikaException
 
 - 
handleEmbeddedResourceprotected void handleEmbeddedResource(TikaInputStream resource, Metadata embeddedMetadata, String filename, String relationshipID, org.apache.poi.hpsf.ClassID storageClassID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException - Throws:
- IOException
- SAXException
- TikaException
 
 - 
handleEmbeddedOfficeDocprotected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaExceptionHandle an office document that's embedded at the POIFS level- Throws:
- IOException
- SAXException
- TikaException
 
 - 
handleEmbeddedOfficeDocprotected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, String resourceName, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaExceptionHandle an office document that's embedded at the POIFS level- Throws:
- IOException
- SAXException
- TikaException
 
 
- 
 
-