org.apache.tika.parser.microsoft
Class WordExtractor

java.lang.Object
  extended by org.apache.tika.parser.microsoft.WordExtractor

public class WordExtractor
extends java.lang.Object


Nested Class Summary
static class WordExtractor.TagAndStyle
           
 
Constructor Summary
WordExtractor(ParseContext context)
           
 
Method Summary
static WordExtractor.TagAndStyle buildParagraphTagAndStyle(java.lang.String styleName, boolean isTable)
          Given a style name, return what tag should be used, and what style should be applied to it.
protected  void copy(org.apache.poi.poifs.filesystem.DirectoryEntry sourceDir, org.apache.poi.poifs.filesystem.DirectoryEntry destDir)
           
protected  void handleEmbeddedResource(TikaInputStream resource, java.lang.String filename, java.lang.String mediaType, XHTMLContentHandler xhtml, boolean outputHtml)
           
protected  void handleEmbededOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir, XHTMLContentHandler xhtml)
          Handle an office document that's embedded at the POIFS level
protected  void parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, XHTMLContentHandler xhtml)
           
protected  void parseWord6(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, XHTMLContentHandler xhtml)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

WordExtractor

public WordExtractor(ParseContext context)
Method Detail

parse

protected void parse(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
                     XHTMLContentHandler xhtml)
              throws java.io.IOException,
                     org.xml.sax.SAXException,
                     TikaException
Throws:
java.io.IOException
org.xml.sax.SAXException
TikaException

parseWord6

protected void parseWord6(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
                          XHTMLContentHandler xhtml)
                   throws java.io.IOException,
                          org.xml.sax.SAXException,
                          TikaException
Throws:
java.io.IOException
org.xml.sax.SAXException
TikaException

buildParagraphTagAndStyle

public static WordExtractor.TagAndStyle buildParagraphTagAndStyle(java.lang.String styleName,
                                                                  boolean isTable)
Given a style name, return what tag should be used, and what style should be applied to it.


handleEmbeddedResource

protected void handleEmbeddedResource(TikaInputStream resource,
                                      java.lang.String filename,
                                      java.lang.String mediaType,
                                      XHTMLContentHandler xhtml,
                                      boolean outputHtml)
                               throws java.io.IOException,
                                      org.xml.sax.SAXException,
                                      TikaException
Throws:
java.io.IOException
org.xml.sax.SAXException
TikaException

handleEmbededOfficeDoc

protected void handleEmbededOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
                                      XHTMLContentHandler xhtml)
                               throws java.io.IOException,
                                      org.xml.sax.SAXException,
                                      TikaException
Handle an office document that's embedded at the POIFS level

Throws:
java.io.IOException
org.xml.sax.SAXException
TikaException

copy

protected void copy(org.apache.poi.poifs.filesystem.DirectoryEntry sourceDir,
                    org.apache.poi.poifs.filesystem.DirectoryEntry destDir)
             throws java.io.IOException
Throws:
java.io.IOException


Copyright © 2007-2010 The Apache Software Foundation. All Rights Reserved.