001/* 002 * ModeShape (http://www.modeshape.org) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.modeshape.sequencer.odf; 017 018import java.io.ByteArrayInputStream; 019import java.io.IOException; 020import java.io.InputStream; 021import java.util.Calendar; 022import java.util.List; 023import java.util.stream.Collectors; 024import javax.jcr.NamespaceRegistry; 025import javax.jcr.Node; 026import javax.jcr.Property; 027import javax.jcr.RepositoryException; 028import javax.jcr.Value; 029import javax.jcr.ValueFactory; 030import org.modeshape.common.util.CheckArg; 031import org.modeshape.common.util.StringUtil; 032import org.modeshape.jcr.api.Binary; 033import org.modeshape.jcr.api.JcrConstants; 034import org.modeshape.jcr.api.nodetype.NodeTypeManager; 035import org.modeshape.jcr.api.sequencer.Sequencer; 036 037/** 038 * A sequencer that processes the binary content of an OpenDocument file, extracts the metadata for the document, and then writes that 039 * metadata to the repository. 040 * <p> 041 * This sequencer produces data that corresponds to the following structure: 042 * <ul> 043 * <li><strong>odf:metadata</strong> node of type <code>odf:metadata</code> 044 * <ul> 045 * <li><strong>jcr:mimeType</strong> - optional string property for the mime type of the document</li> 046 * <li><strong>odf:creationDate</strong> - optional date property specifying the creation date</li> 047 * <li><strong>odf:creator</strong> - optional string property for the document's creator</li> 048 * <li><strong>odf:description</strong> - optional string property for the document's description</li> 049 * <li><strong>odf:editingCycles</strong> - optional long property for the number of editing cycles</li> 050 * <li><strong>odf:editingTime</strong> - optional long property for the total editing time</li> 051 * <li><strong>odf:generator</strong> - optional string for the generator of the document</li> 052 * <li><strong>odf:initialCreator</strong> - optional string for the initial creator of the document</li> 053 * <li><strong>odf:keywords</strong> - optional multi-valued string property for the keywords of the document</li> 054 * <li><strong>odf:language</strong> - optional string property for the language of the document</li> 055 * <li><strong>odf:modificationDate</strong> - optional date property for specifying the last modification date</li> 056 * <li><strong>odf:printedBy</strong> - optional string property for the printedBy field</li> 057 * <li><strong>odf:printDate</strong> - optional date property specifying the last print date</li> 058 * <li><strong>odf:subject</strong> - optional string property for the document's subject</li> 059 * <li><strong>odf:title</strong> - optional string property for the document's title</li> 060 * <li><strong>odf:pages</strong> - optional long property specifying number of pages (documents and presentations)</li> 061 * <li><strong>odf:sheets</strong> - optional long property specifying number of sheets (spreadsheets)</li> 062 * </ul> 063 * </li> 064 * </ul> 065 * </p> 066 * 067 * @since 5.1 068 */ 069public class OdfMetadataSequencer extends Sequencer { 070 071 @Override 072 public void initialize( NamespaceRegistry registry, 073 NodeTypeManager nodeTypeManager ) throws RepositoryException, IOException { 074 super.registerNodeTypes("odf.cnd", nodeTypeManager, true); 075 registerDefaultMimeTypes(OdfMetadata.MIME_TYPE_STRINGS); 076 } 077 078 @Override 079 public boolean execute( Property inputProperty, 080 Node outputNode, 081 Context context ) throws Exception { 082 Binary binaryValue = (Binary) inputProperty.getBinary(); 083 CheckArg.isNotNull(binaryValue, "binary"); 084 String mimeType = binaryValue.getMimeType(); 085 086 Node sequencedNode = getMetadataNode(outputNode); 087 setPropertyIfMetadataPresent(sequencedNode, JcrConstants.JCR_MIME_TYPE, mimeType); 088 return processBasicMetadata(sequencedNode, binaryValue); 089 } 090 091 private boolean processBasicMetadata( Node sequencedNode, 092 Binary binaryValue ) { 093 OdfMetadata metadata = null; 094 try (InputStream stream = binaryValue.getStream()) { 095 metadata = new OdfMetadata(stream); 096 if (metadata.check()) { 097 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.PAGES, metadata.getPages()); 098 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.SHEETS, metadata.getSheets()); 099 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.CREATION_DATE, metadata.getCreationDate()); 100 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.CREATOR, metadata.getCreator()); 101 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.DESCRIPTION, metadata.getDescription()); 102 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.EDITING_CYCLES, metadata.getEditingCycles()); 103 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.EDITING_TIME, metadata.getEditingTime()); 104 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.GENERATOR, metadata.getGenerator()); 105 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.INITIAL_CREATOR, metadata.getInitialCreator()); 106 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.KEYWORDS, metadata.getKeywords()); 107 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.LANGUAGE, metadata.getLanguage()); 108 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.MODIFICATION_DATE, metadata.getModificationDate()); 109 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.PRINTED_BY, metadata.getPrintedBy()); 110 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.PRINT_DATE, metadata.getPrintDate()); 111 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.SUBJECT, metadata.getSubject()); 112 setPropertyIfMetadataPresent(sequencedNode, OdfMetadataLexicon.TITLE, metadata.getTitle()); 113 114 return true; 115 } 116 } catch (Exception e) { 117 getLogger().error(e, "Couldn't process stream."); 118 } 119 return false; 120 } 121 122 private Node getMetadataNode( Node outputNode ) throws RepositoryException { 123 if (outputNode.isNew()) { 124 outputNode.setPrimaryType(OdfMetadataLexicon.METADATA_NODE); 125 return outputNode; 126 } 127 return outputNode.addNode(OdfMetadataLexicon.METADATA_NODE, OdfMetadataLexicon.METADATA_NODE); 128 } 129 130 private void setPropertyIfMetadataPresent( Node node, 131 String propertyName, 132 Object value ) throws RepositoryException { 133 if (value != null) { 134 if (value instanceof String && !StringUtil.isBlank((String) value)) { 135 node.setProperty(propertyName, (String) value); 136 } else if (value instanceof Boolean) { 137 node.setProperty(propertyName, (Boolean) value); 138 } else if (value instanceof Long) { 139 node.setProperty(propertyName, (Long) value); 140 } else if (value instanceof Integer) { 141 node.setProperty(propertyName, new Long((Integer) value)); 142 } else if (value instanceof Calendar) { 143 node.setProperty(propertyName, (Calendar) value); 144 } else if (value instanceof byte[]) { 145 InputStream is = new ByteArrayInputStream((byte []) value); 146 javax.jcr.Binary binaryProperty = node.getSession().getValueFactory().createBinary(is); 147 node.setProperty(propertyName, binaryProperty); 148 } else if (value instanceof List) { 149 ValueFactory vf = node.getSession().getValueFactory(); 150 List<Value> values = ((List<?>) value).stream().filter(val -> val instanceof String) 151 .map(val -> vf.createValue((String) val)).collect(Collectors.toList()); 152 if (!values.isEmpty()) { 153 node.setProperty(propertyName, values.toArray(new Value[values.size()])); 154 } 155 } else { 156 getLogger().warn("The value of the property {0} has unknown type and couldn't be saved.", propertyName); 157 } 158 } 159 } 160}