org.apache.tika.parser.microsoft
Class CustomOutlookExtractor

java.lang.Object
  extended by org.apache.tika.parser.microsoft.CustomOutlookExtractor

public class CustomOutlookExtractor
extends java.lang.Object

Outlook Message Parser. % tika default OutlookExtractor: - correct a bug: attachments were detected twice - add the content of the attached file (in the XHTMLContentHandler)


Constructor Summary
CustomOutlookExtractor(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem, org.apache.tika.parser.ParseContext context)
           
 
Method Summary
protected  void copy(org.apache.poi.poifs.filesystem.DirectoryEntry arg0, org.apache.poi.poifs.filesystem.DirectoryEntry arg1)
           
protected  void handleEmbeddedResource(org.apache.tika.io.TikaInputStream arg0, java.lang.String arg1, java.lang.String arg2, org.apache.tika.sax.XHTMLContentHandler arg3, boolean arg4)
           
protected  void handleEmbededOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry arg0, org.apache.tika.sax.XHTMLContentHandler arg1)
           
 void parse(org.apache.tika.sax.XHTMLContentHandler xhtml, org.apache.tika.metadata.Metadata metadata)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

CustomOutlookExtractor

public CustomOutlookExtractor(org.apache.poi.poifs.filesystem.POIFSFileSystem filesystem,
                              org.apache.tika.parser.ParseContext context)
                       throws org.apache.tika.exception.TikaException
Throws:
org.apache.tika.exception.TikaException
Method Detail

parse

public void parse(org.apache.tika.sax.XHTMLContentHandler xhtml,
                  org.apache.tika.metadata.Metadata metadata)
           throws org.apache.tika.exception.TikaException,
                  org.xml.sax.SAXException,
                  java.io.IOException
Throws:
org.apache.tika.exception.TikaException
org.xml.sax.SAXException
java.io.IOException

handleEmbeddedResource

protected void handleEmbeddedResource(org.apache.tika.io.TikaInputStream arg0,
                                      java.lang.String arg1,
                                      java.lang.String arg2,
                                      org.apache.tika.sax.XHTMLContentHandler arg3,
                                      boolean arg4)
                               throws java.io.IOException,
                                      org.xml.sax.SAXException,
                                      org.apache.tika.exception.TikaException
Throws:
java.io.IOException
org.xml.sax.SAXException
org.apache.tika.exception.TikaException

handleEmbededOfficeDoc

protected void handleEmbededOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry arg0,
                                      org.apache.tika.sax.XHTMLContentHandler arg1)
                               throws java.io.IOException,
                                      org.xml.sax.SAXException,
                                      org.apache.tika.exception.TikaException
Throws:
java.io.IOException
org.xml.sax.SAXException
org.apache.tika.exception.TikaException

copy

protected void copy(org.apache.poi.poifs.filesystem.DirectoryEntry arg0,
                    org.apache.poi.poifs.filesystem.DirectoryEntry arg1)
             throws java.io.IOException
Throws:
java.io.IOException


Copyright © 2004-2011. All Rights Reserved.