001/*
002 * ModeShape (http://www.modeshape.org)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *       http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.modeshape.sequencer.odf;
017
018import java.io.ByteArrayInputStream;
019import java.io.IOException;
020import java.io.InputStream;
021import java.util.Calendar;
022import java.util.List;
023import java.util.stream.Collectors;
024import javax.jcr.NamespaceRegistry;
025import javax.jcr.Node;
026import javax.jcr.Property;
027import javax.jcr.RepositoryException;
028import javax.jcr.Value;
029import javax.jcr.ValueFactory;
030import org.modeshape.common.util.CheckArg;
031import org.modeshape.common.util.StringUtil;
032import org.modeshape.jcr.api.Binary;
033import org.modeshape.jcr.api.JcrConstants;
034import org.modeshape.jcr.api.nodetype.NodeTypeManager;
035import org.modeshape.jcr.api.sequencer.Sequencer;
036
037/**
038 * A sequencer that processes the binary content of an OpenDocument file, extracts the metadata for the document, and then writes that
039 * metadata to the repository.
040 * <p>
041 * This sequencer produces data that corresponds to the following structure:
042 * <ul>
043 * <li><strong>odf:metadata</strong> node of type <code>odf:metadata</code>
044 * <ul>
045 * <li><strong>jcr:mimeType</strong> - optional string property for the mime type of the document</li>
046 * <li><strong>odf:creationDate</strong> - optional date property specifying the creation date</li>
047 * <li><strong>odf:creator</strong> - optional string property for the document's creator</li>
048 * <li><strong>odf:description</strong> - optional string property for the document's description</li>
049 * <li><strong>odf:editingCycles</strong> - optional long property for the number of editing cycles</li>
050 * <li><strong>odf:editingTime</strong> - optional long property for the total editing time</li>
051 * <li><strong>odf:generator</strong> - optional string for the generator of the document</li>
052 * <li><strong>odf:initialCreator</strong> - optional string for the initial creator of the document</li>
053 * <li><strong>odf:keywords</strong> - optional multi-valued string property for the keywords of the document</li>
054 * <li><strong>odf:language</strong> - optional string property for the language of the document</li>
055 * <li><strong>odf:modificationDate</strong> - optional date property for specifying the last modification date</li>
056 * <li><strong>odf:printedBy</strong> - optional string property for the printedBy field</li>
057 * <li><strong>odf:printDate</strong> - optional date property specifying the last print date</li>
058 * <li><strong>odf:subject</strong> - optional string property for the document's subject</li>
059 * <li><strong>odf:title</strong> - optional string property for the document's title</li>
060 * <li><strong>odf:pages</strong> - optional long property specifying number of pages (documents and presentations)</li>
061 * <li><strong>odf:sheets</strong> - optional long property specifying number of sheets (spreadsheets)</li>
062 * </ul>
063 * </li>
064 * </ul>
065 * </p>
066 * 
067 * @since 5.1
068 */
069public class OdfMetadataSequencer extends Sequencer {
070
071    @Override
072    public void initialize( NamespaceRegistry registry,
073                            NodeTypeManager nodeTypeManager ) throws RepositoryException, IOException {
074        super.registerNodeTypes("odf.cnd", nodeTypeManager, true);
075        registerDefaultMimeTypes(OdfMetadata.MIME_TYPE_STRINGS);
076    }
077
078    @Override
079    public boolean execute( Property inputProperty,
080                            Node outputNode,
081                            Context context ) throws Exception {
082        Binary binaryValue = (Binary) inputProperty.getBinary();
083        CheckArg.isNotNull(binaryValue, "binary");
084        String mimeType = binaryValue.getMimeType();
085
086        Node sequencedNode = getMetadataNode(outputNode);
087        setPropertyIfMetadataPresent(sequencedNode, JcrConstants.JCR_MIME_TYPE, mimeType);
088        return processBasicMetadata(sequencedNode, binaryValue);
089    }
090
091    private boolean processBasicMetadata( Node sequencedNode,
092                                          Binary binaryValue ) {
093        OdfMetadata metadata = null;
094        try (InputStream stream = binaryValue.getStream()) {
095            metadata = new OdfMetadata(stream);
096            if (metadata.check()) {
097                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.PAGES, metadata.getPages());
098                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.SHEETS, metadata.getSheets());
099                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.CREATION_DATE, metadata.getCreationDate());
100                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.CREATOR, metadata.getCreator());
101                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.DESCRIPTION, metadata.getDescription());
102                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.EDITING_CYCLES, metadata.getEditingCycles());
103                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.EDITING_TIME, metadata.getEditingTime());
104                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.GENERATOR, metadata.getGenerator());
105                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.INITIAL_CREATOR, metadata.getInitialCreator());
106                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.KEYWORDS, metadata.getKeywords());
107                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.LANGUAGE, metadata.getLanguage());
108                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.MODIFICATION_DATE, metadata.getModificationDate());
109                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.PRINTED_BY, metadata.getPrintedBy());
110                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.PRINT_DATE, metadata.getPrintDate());
111                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.SUBJECT, metadata.getSubject());
112                setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.TITLE, metadata.getTitle());
113
114                return true;
115            }
116        } catch (Exception e) {
117            getLogger().error(e, "Couldn't process stream.");
118        }
119        return false;
120    }
121
122    private Node getMetadataNode( Node outputNode ) throws RepositoryException {
123        if (outputNode.isNew()) {
124            outputNode.setPrimaryType(OdfMetadataLexicon.METADATA_NODE);
125            return outputNode;
126        }
127        return outputNode.addNode(OdfMetadataLexicon.METADATA_NODE, OdfMetadataLexicon.METADATA_NODE);
128    }
129
130    private void setPropertyIfMetadataPresent( Node node,
131                                               String propertyName,
132                                               Object value ) throws RepositoryException {
133        if (value != null) {
134            if (value instanceof String && !StringUtil.isBlank((String) value)) {
135                node.setProperty(propertyName, (String) value);
136            } else if (value instanceof Boolean) {
137                node.setProperty(propertyName, (Boolean) value);
138            } else if (value instanceof Long) {
139                node.setProperty(propertyName, (Long) value);
140            } else if (value instanceof Integer) {
141                node.setProperty(propertyName, new Long((Integer) value));
142            } else if (value instanceof Calendar) {
143                node.setProperty(propertyName, (Calendar) value); 
144            } else if (value instanceof byte[]) {
145                InputStream is = new ByteArrayInputStream((byte []) value);
146                javax.jcr.Binary binaryProperty = node.getSession().getValueFactory().createBinary(is);
147                node.setProperty(propertyName, binaryProperty);
148            } else if (value instanceof List) {
149                ValueFactory vf = node.getSession().getValueFactory();
150                List<Value> values = ((List<?>) value).stream().filter(val -> val instanceof String)
151                                                      .map(val -> vf.createValue((String) val)).collect(Collectors.toList());
152                if (!values.isEmpty()) {
153                    node.setProperty(propertyName, values.toArray(new Value[values.size()]));
154                }
155            } else {
156                getLogger().warn("The value of the property {0} has unknown type and couldn't be saved.", propertyName);
157            }
158        }
159    }
160}