org.apache.tika.parser.microsoft
Class WordExtractor
java.lang.Object
org.apache.tika.parser.microsoft.WordExtractor
public class WordExtractor
- extends java.lang.Object
Method Summary |
static WordExtractor.TagAndStyle |
buildParagraphTagAndStyle(java.lang.String styleName,
boolean isTable)
Given a style name, return what tag should be used, and
what style should be applied to it. |
protected void |
copy(org.apache.poi.poifs.filesystem.DirectoryEntry sourceDir,
org.apache.poi.poifs.filesystem.DirectoryEntry destDir)
|
protected void |
handleEmbeddedResource(TikaInputStream resource,
java.lang.String filename,
java.lang.String mediaType,
XHTMLContentHandler xhtml,
boolean outputHtml)
|
protected void |
handleEmbededOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
XHTMLContentHandler xhtml)
Handle an office document that's embedded at the POIFS level |
protected void |
parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
XHTMLContentHandler xhtml)
|
protected void |
parseWord6(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
XHTMLContentHandler xhtml)
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
WordExtractor
public WordExtractor(ParseContext context)
parse
protected void parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
XHTMLContentHandler xhtml)
throws java.io.IOException,
org.xml.sax.SAXException,
TikaException
- Throws:
java.io.IOException
org.xml.sax.SAXException
TikaException
parseWord6
protected void parseWord6(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
XHTMLContentHandler xhtml)
throws java.io.IOException,
org.xml.sax.SAXException,
TikaException
- Throws:
java.io.IOException
org.xml.sax.SAXException
TikaException
buildParagraphTagAndStyle
public static WordExtractor.TagAndStyle buildParagraphTagAndStyle(java.lang.String styleName,
boolean isTable)
- Given a style name, return what tag should be used, and
what style should be applied to it.
handleEmbeddedResource
protected void handleEmbeddedResource(TikaInputStream resource,
java.lang.String filename,
java.lang.String mediaType,
XHTMLContentHandler xhtml,
boolean outputHtml)
throws java.io.IOException,
org.xml.sax.SAXException,
TikaException
- Throws:
java.io.IOException
org.xml.sax.SAXException
TikaException
handleEmbededOfficeDoc
protected void handleEmbededOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
XHTMLContentHandler xhtml)
throws java.io.IOException,
org.xml.sax.SAXException,
TikaException
- Handle an office document that's embedded at the POIFS level
- Throws:
java.io.IOException
org.xml.sax.SAXException
TikaException
copy
protected void copy(org.apache.poi.poifs.filesystem.DirectoryEntry sourceDir,
org.apache.poi.poifs.filesystem.DirectoryEntry destDir)
throws java.io.IOException
- Throws:
java.io.IOException
Copyright © 2007-2011 The Apache Software Foundation. All Rights Reserved.