001/*
002 * ModeShape (http://www.modeshape.org)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *       http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.modeshape.sequencer.pdf;
017
018import java.io.InputStream;
019import java.util.ArrayList;
020import java.util.Calendar;
021import java.util.List;
022
023import org.apache.pdfbox.pdmodel.PDDocument;
024import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
025import org.apache.pdfbox.pdmodel.common.PDMetadata;
026import org.apache.xmpbox.XMPMetadata;
027import org.apache.xmpbox.schema.XMPBasicSchema;
028import org.apache.xmpbox.xml.DomXmpParser;
029
030/**
031 * Utility for extracting XMP metadata from PDF files.
032 * 
033 * @since 5.1
034 */
035public class PdfXmpMetadata {
036
037    private String baseURL;
038    private Calendar createDate;
039    private String creatorTool;
040    private List<String> identifier = new ArrayList<>();
041    private Calendar metadataDate;
042    private Calendar modifyDate;
043    private String nickname;
044    private Integer rating;
045    private String label;
046
047    private InputStream in;
048
049    public PdfXmpMetadata( InputStream inputStream ) {
050        this.in = inputStream;
051    }
052
053    public boolean check() throws Exception {
054        try (PDDocument document = PDDocument.load(in)) {
055            Boolean encrypted = document.isEncrypted();
056
057            if (encrypted) {
058                return false;
059            }
060
061            PDDocumentCatalog catalog = document.getDocumentCatalog();
062            PDMetadata metadata = catalog.getMetadata();
063            if (metadata == null) {
064                return false;
065            }
066
067            DomXmpParser xmpParser = new DomXmpParser();
068            try (InputStream is = metadata.createInputStream()) {
069                XMPMetadata xmp = xmpParser.parse(is);
070                XMPBasicSchema basicSchema = xmp.getXMPBasicSchema();
071                if (basicSchema != null) {
072                    baseURL = basicSchema.getBaseURL();
073                    createDate = basicSchema.getCreateDate();
074                    creatorTool = basicSchema.getCreatorTool();
075                    if (basicSchema.getIdentifiers() != null) {
076                        identifier.addAll(basicSchema.getIdentifiers());
077                    }
078                    metadataDate = basicSchema.getMetadataDate();
079                    modifyDate = basicSchema.getModifyDate();
080                    nickname = basicSchema.getNickname();
081                    rating = basicSchema.getRating();
082                    label = basicSchema.getLabel();
083                    return true;
084                }
085                return false;
086            }
087            
088        }
089    }
090
091
092    public String getBaseURL() {
093        return baseURL;
094    }
095
096    public Calendar getCreateDate() {
097        return createDate;
098    }
099
100    public String getCreatorTool() {
101        return creatorTool;
102    }
103
104    public List<String> getIdentifier() {
105        return identifier;
106    }
107
108    public Calendar getMetadataDate() {
109        return metadataDate;
110    }
111
112    public Calendar getModifyDate() {
113        return modifyDate;
114    }
115
116    public String getNickname() {
117        return nickname;
118    }
119
120    public Integer getRating() {
121        return rating;
122    }
123
124    public String getLabel() {
125        return label;
126    }
127
128}