org.apache.tika.parser.microsoft
Class WordExtractor

java.lang.Object
  extended by org.apache.tika.parser.microsoft.WordExtractor

public class WordExtractor
extends Object


Nested Class Summary
static class WordExtractor.TagAndStyle
           
 
Constructor Summary
WordExtractor(ParseContext context)
           
 
Method Summary
static WordExtractor.TagAndStyle buildParagraphTagAndStyle(String styleName, boolean isTable)
          Given a style name, return what tag should be used, and what style should be applied to it.
protected  void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, XHTMLContentHandler xhtml)
          Handle an office document that's embedded at the POIFS level
protected  void handleEmbeddedResource(TikaInputStream resource, String filename, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml)
           
protected  void parse(org.apache.poi.poifs.filesystem.DirectoryNode root, XHTMLContentHandler xhtml)
           
protected  void parse(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem, XHTMLContentHandler xhtml)
           
protected  void parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root, XHTMLContentHandler xhtml)
           
protected  void parseWord6(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem, XHTMLContentHandler xhtml)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

WordExtractor

public WordExtractor(ParseContext context)
Method Detail

parse

protected void parse(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
                     XHTMLContentHandler xhtml)
              throws IOException,
                     SAXException,
                     TikaException
Throws:
IOException
SAXException
TikaException

parse

protected void parse(org.apache.poi.poifs.filesystem.DirectoryNode root,
                     XHTMLContentHandler xhtml)
              throws IOException,
                     SAXException,
                     TikaException
Throws:
IOException
SAXException
TikaException

parseWord6

protected void parseWord6(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
                          XHTMLContentHandler xhtml)
                   throws IOException,
                          SAXException,
                          TikaException
Throws:
IOException
SAXException
TikaException

parseWord6

protected void parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root,
                          XHTMLContentHandler xhtml)
                   throws IOException,
                          SAXException,
                          TikaException
Throws:
IOException
SAXException
TikaException

buildParagraphTagAndStyle

public static WordExtractor.TagAndStyle buildParagraphTagAndStyle(String styleName,
                                                                  boolean isTable)
Given a style name, return what tag should be used, and what style should be applied to it.


handleEmbeddedResource

protected void handleEmbeddedResource(TikaInputStream resource,
                                      String filename,
                                      String mediaType,
                                      XHTMLContentHandler xhtml,
                                      boolean outputHtml)
                               throws IOException,
                                      SAXException,
                                      TikaException
Throws:
IOException
SAXException
TikaException

handleEmbeddedOfficeDoc

protected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
                                       XHTMLContentHandler xhtml)
                                throws IOException,
                                       SAXException,
                                       TikaException
Handle an office document that's embedded at the POIFS level

Throws:
IOException
SAXException
TikaException


Copyright © 2007-2011 The Apache Software Foundation. All Rights Reserved.