/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.microsoft;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;

import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.hsmf.parsers.POIFSChunkParser;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.SAXException;

/**
 * Outlook Message Parser.
 */
class CustomOutlookExtractor {

    private final Chunks CHUNKS;

    private final POIFSChunkParser parser;

    public CustomOutlookExtractor(POIFSFileSystem filesystem) throws TikaException {
        try {
            this.parser = new POIFSChunkParser(filesystem);
            this.CHUNKS = this.parser.identifyChunks();
        } catch (IOException e) {
            throw new TikaException("Failed to parse Outlook chunks", e);
        }
    }

    @SuppressWarnings("unchecked")
	public void parse(XHTMLContentHandler xhtml, Metadata metadata)
            throws TikaException, SAXException {
        String subject = getChunk(this.CHUNKS.subjectChunk);
        String from = getChunk(this.CHUNKS.displayFromChunk);

        metadata.set(MSOffice.AUTHOR, from);
        metadata.set(DublinCore.TITLE, subject);
        metadata.set(DublinCore.SUBJECT, getChunk(this.CHUNKS.conversationTopic));

        xhtml.element("h1", subject);

        xhtml.startElement("dl");
        header(xhtml, "From", from);
        header(xhtml, "To", getChunk(this.CHUNKS.displayToChunk));
        header(xhtml, "Cc", getChunk(this.CHUNKS.displayCCChunk));
        header(xhtml, "Bcc", getChunk(this.CHUNKS.displayBCCChunk));
        xhtml.endElement("dl");

        xhtml.element("div", getChunk(this.CHUNKS.textBodyChunk));
        
        // Process attachments
        Map<String, ByteArrayInputStream> attachments = this.parser.getAttachmentList();
		Parser parser = new AutoDetectParser();		
		
		for (Iterator<String> iterator = attachments.keySet().iterator(); iterator
				.hasNext();) {
			Metadata attachmentMetadata = new Metadata();
			String fileName = iterator.next();
			ByteArrayInputStream fileContent = attachments.get(fileName);
			
			BodyContentHandler handlerAttachments = new BodyContentHandler();
			try {
				parser.parse(fileContent, handlerAttachments, attachmentMetadata);
			} catch (IOException e) {
	            throw new TikaException("Failed to parse Outlook attachment", e);
	        }
			
			//xhtml.element("div", fileName);
			xhtml.element("div", handlerAttachments.toString());
		}
    }

    private void header(XHTMLContentHandler xhtml, String key, String value)
            throws SAXException {
        if (value.length() > 0) {
            xhtml.element("dt", key);
            xhtml.element("dd", value);
        }
    }

    /**
     * Returns the content of the identified string chunk in the
     * current document. Returns the empty string if the identified
     * chunk does not exist in the current document.
     *
     * @param chunk string chunk identifier
     * @return content of the identified chunk, or the empty string
     */
    private String getChunk(StringChunk chunk) {
        try {
            return this.parser.getDocumentNode(chunk).toString();
        } catch (ChunkNotFoundException e) {
            return "";
        }
    }

}
