org.apache.tika.parser.microsoft
Class WordExtractor
java.lang.Object
org.apache.tika.parser.microsoft.WordExtractor
public class WordExtractor
- extends Object
Method Summary |
static WordExtractor.TagAndStyle |
buildParagraphTagAndStyle(String styleName,
boolean isTable)
Given a style name, return what tag should be used, and
what style should be applied to it. |
protected void |
handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
XHTMLContentHandler xhtml)
Handle an office document that's embedded at the POIFS level |
protected void |
handleEmbeddedResource(TikaInputStream resource,
String filename,
String mediaType,
XHTMLContentHandler xhtml,
boolean outputHtml)
|
protected void |
parse(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml)
|
protected void |
parse(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
XHTMLContentHandler xhtml)
|
protected void |
parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml)
|
protected void |
parseWord6(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
XHTMLContentHandler xhtml)
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
WordExtractor
public WordExtractor(ParseContext context)
parse
protected void parse(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
XHTMLContentHandler xhtml)
throws IOException,
SAXException,
TikaException
- Throws:
IOException
SAXException
TikaException
parse
protected void parse(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml)
throws IOException,
SAXException,
TikaException
- Throws:
IOException
SAXException
TikaException
parseWord6
protected void parseWord6(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
XHTMLContentHandler xhtml)
throws IOException,
SAXException,
TikaException
- Throws:
IOException
SAXException
TikaException
parseWord6
protected void parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root,
XHTMLContentHandler xhtml)
throws IOException,
SAXException,
TikaException
- Throws:
IOException
SAXException
TikaException
buildParagraphTagAndStyle
public static WordExtractor.TagAndStyle buildParagraphTagAndStyle(String styleName,
boolean isTable)
- Given a style name, return what tag should be used, and
what style should be applied to it.
handleEmbeddedResource
protected void handleEmbeddedResource(TikaInputStream resource,
String filename,
String mediaType,
XHTMLContentHandler xhtml,
boolean outputHtml)
throws IOException,
SAXException,
TikaException
- Throws:
IOException
SAXException
TikaException
handleEmbeddedOfficeDoc
protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
XHTMLContentHandler xhtml)
throws IOException,
SAXException,
TikaException
- Handle an office document that's embedded at the POIFS level
- Throws:
IOException
SAXException
TikaException
Copyright © 2007-2011 The Apache Software Foundation. All Rights Reserved.