001/*
002 * Copyright 2015 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.fcrepo.migration.foxml;
017
018import org.apache.commons.codec.binary.Base64OutputStream;
019import org.fcrepo.migration.ContentDigest;
020import org.fcrepo.migration.DatastreamInfo;
021import org.fcrepo.migration.DatastreamVersion;
022import org.fcrepo.migration.DefaultContentDigest;
023import org.fcrepo.migration.DefaultObjectInfo;
024import org.fcrepo.migration.FedoraObjectProcessor;
025import org.fcrepo.migration.ObjectInfo;
026import org.fcrepo.migration.ObjectProperties;
027import org.fcrepo.migration.ObjectReference;
028import org.fcrepo.migration.StreamingFedoraObjectHandler;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032import javax.xml.bind.JAXBContext;
033import javax.xml.bind.JAXBElement;
034import javax.xml.bind.JAXBException;
035import javax.xml.bind.Unmarshaller;
036import javax.xml.stream.XMLEventReader;
037import javax.xml.stream.XMLEventWriter;
038import javax.xml.stream.XMLInputFactory;
039import javax.xml.stream.XMLOutputFactory;
040import javax.xml.stream.XMLStreamConstants;
041import javax.xml.stream.XMLStreamException;
042import javax.xml.stream.XMLStreamReader;
043import javax.xml.stream.events.XMLEvent;
044import java.io.ByteArrayOutputStream;
045import java.io.File;
046import java.io.FileOutputStream;
047import java.io.IOException;
048import java.io.InputStream;
049import java.io.UnsupportedEncodingException;
050import java.net.MalformedURLException;
051import java.net.URL;
052import java.util.ArrayList;
053import java.util.Arrays;
054import java.util.HashMap;
055import java.util.HashSet;
056import java.util.List;
057import java.util.Map;
058import java.util.Set;
059
060/**
061 * A FedoraObjectProcessor implementation that uses the STaX API to process
062 * a FOXML XML InputStream.
063 * @author mdurbin
064 */
065public class FoxmlInputStreamFedoraObjectProcessor implements FedoraObjectProcessor {
066
067    private static final Logger LOG = LoggerFactory.getLogger(FoxmlInputStreamFedoraObjectProcessor.class);
068
069    private static final String FOXML_NS = "info:fedora/fedora-system:def/foxml#";
070
071    private URLFetcher fetcher;
072
073    private String localFedoraServer;
074
075    private InternalIDResolver idResolver;
076
077    private InputStream stream;
078
079    private XMLStreamReader reader;
080
081    private List<File> tempFiles;
082
083    boolean isFedora2 = false;
084
085    /**
086     * The basic object information read from the XML stream at construction
087     * time by processing the root XML element and its attributes.
088     */
089    private ObjectInfo objectInfo;
090
091    /**
092     * foxml input stream fedora object processor.
093     * @param is the input stream
094     * @param fetcher the fetcher
095     * @param resolver the resolver
096     * @param localFedoraServer the host and port (formatted like "localhost:8080") of the fedora 3 server
097     *                          from which the content exposed by the "is" parameter comes.
098     * @throws XMLStreamException xml stream exception
099     */
100    public FoxmlInputStreamFedoraObjectProcessor(final InputStream is, final URLFetcher fetcher,
101                                                 final InternalIDResolver resolver, final String localFedoraServer)
102            throws XMLStreamException {
103        this.fetcher = fetcher;
104        this.idResolver = resolver;
105        this.localFedoraServer = localFedoraServer;
106        final XMLInputFactory factory = XMLInputFactory.newFactory();
107        stream = is;
108        reader = factory.createXMLStreamReader(is);
109        reader.nextTag();
110        final Map<String, String> attributes = getAttributes(reader, "PID", "VERSION", "FEDORA_URI", "schemaLocation");
111        if (attributes.get("VERSION") == null || !attributes.get("VERSION").equals("1.1")) {
112            isFedora2 = true;
113        }
114        objectInfo = new DefaultObjectInfo(attributes.get("PID"), attributes.get("FEDORA_URI"));
115        while (reader.next() == XMLStreamConstants.CHARACTERS) {
116        }
117
118        tempFiles = new ArrayList<File>();
119    }
120
121    @Override
122    public ObjectInfo getObjectInfo() {
123        return objectInfo;
124    }
125
126    @Override
127    public void processObject(final StreamingFedoraObjectHandler handler) {
128        handler.beginObject(objectInfo);
129        Foxml11DatastreamInfo dsInfo = null;
130        try {
131            handler.processObjectProperties(readProperties());
132            while (reader.hasNext()) {
133                if (reader.isCharacters()) {
134                    if (!reader.isWhiteSpace()) {
135                        throw new RuntimeException("Unexpected character data! \"" + reader.getText() + "\"");
136                    } else {
137                        // skip whitespace...
138                    }
139                } else if (reader.isStartElement()) {
140                    if (reader.getLocalName().equals("datastream")
141                            && reader.getNamespaceURI().equals(FOXML_NS)) {
142                        dsInfo = new Foxml11DatastreamInfo(objectInfo, reader);
143                    } else if (reader.getLocalName().equals("datastreamVersion")) {
144                        final DatastreamVersion v = new Foxml11DatastreamVersion(dsInfo, reader);
145                        handler.processDatastreamVersion(v);
146                    } else if (reader.getLocalName().equals("disseminator") && isFedora2) {
147                        readUntilClosed("disseminator", FOXML_NS);
148                        handler.processDisseminator();
149                    } else {
150                        throw new RuntimeException("Unexpected element! \"" + reader.getLocalName() + "\"!");
151                    }
152                } else if (reader.isEndElement() && (dsInfo != null && reader.getLocalName().equals("datastream"))) {
153                    dsInfo = null;
154                } else if (reader.isEndElement() && reader.getLocalName().equals("digitalObject")) {
155                    // end of document....
156                    handler.completeObject(objectInfo);
157                    cleanUpTempFiles();
158                } else {
159                    throw new RuntimeException("Unexpected xml structure! \"" + reader.getEventType() + "\" at line "
160                            + reader.getLocation().getLineNumber() + ", column "
161                            + reader.getLocation().getColumnNumber()
162                            + "!" + (reader.isCharacters() ? "  \"" + reader.getText() + "\"" : ""));
163                }
164                reader.next();
165            }
166
167        } catch (Exception e) {
168            handler.abortObject(objectInfo);
169            if (e instanceof RuntimeException) {
170                throw (RuntimeException) e;
171            }
172            throw new RuntimeException(e);
173        } finally {
174            cleanUpTempFiles();
175            close();
176        }
177    }
178
179    /**
180     * Close resources associated to the processor
181     */
182    public void close() {
183        try {
184            reader.close();
185        } catch (final XMLStreamException e) {
186            LOG.warn("Failed to close reader cleanly", e);
187        }
188        try {
189            stream.close();
190        } catch (IOException e) {
191            LOG.warn("Failed to close file cleanly", e);
192        }
193    }
194
195    private void cleanUpTempFiles() {
196        for (final File f : this.tempFiles) {
197            if (f.exists()) {
198                f.delete();
199            }
200        }
201    }
202
203    private ObjectProperties readProperties() throws JAXBException, XMLStreamException {
204        final JAXBContext jc = JAXBContext.newInstance(FoxmlObjectProperties.class);
205        final Unmarshaller unmarshaller = jc.createUnmarshaller();
206        final JAXBElement<FoxmlObjectProperties> p = unmarshaller.unmarshal(reader, FoxmlObjectProperties.class);
207        final FoxmlObjectProperties properties = p.getValue();
208        if (isFedora2) {
209            // Fedora 2 uses the rdf:type property with a literal value to differentiate between
210            // objects, behavior mechanism objects and behavior definition objects.  That literal
211            // cannot be retained as an rdf type in fedora4, nor can we use the generic mapping
212            // to map it, so we convert it to a dcterms:type right here.
213            for (FoxmlObjectProperty prop : properties.properties) {
214                if (prop.getName().equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) {
215                    prop.name = "http://purl.org/dc/terms/type";
216                }
217            }
218        }
219        return properties;
220    }
221
222    private void readUntilClosed(final String name, final String namespace) throws XMLStreamException {
223        while (reader.hasNext()) {
224            if (reader.isEndElement() && reader.getLocalName().equals(name)
225                    && reader.getNamespaceURI().equals(namespace)) {
226                return;
227            } else {
228                // skip all other stuff....
229            }
230            reader.next();
231        }
232    }
233
234    private class Foxml11DatastreamInfo implements DatastreamInfo {
235
236        private String id;
237
238        private String controlGroup;
239
240        private String fedoraUri;
241
242        private String state;
243
244        private boolean versionable;
245
246        private ObjectInfo objectInfo;
247
248        public Foxml11DatastreamInfo(final ObjectInfo objectInfo, final XMLStreamReader reader) {
249            this.objectInfo = objectInfo;
250            final Map<String, String> attributes
251            = getAttributes(reader, "ID", "CONTROL_GROUP", "FEDORA_URI", "STATE", "VERSIONABLE");
252            id = attributes.get("ID");
253            controlGroup = attributes.get("CONTROL_GROUP");
254            fedoraUri = attributes.get("FEDORA_URI");
255            state = attributes.get("STATE");
256            versionable = Boolean.valueOf(attributes.get("VERSIONABLE"));
257        }
258
259        @Override
260        public ObjectInfo getObjectInfo() {
261            return objectInfo;
262        }
263
264        @Override
265        public String getDatastreamId() {
266            return id;
267        }
268
269        @Override
270        public String getControlGroup() {
271            return controlGroup;
272        }
273
274        @Override
275        public String getFedoraURI() {
276            return fedoraUri;
277        }
278
279        @Override
280        public String getState() {
281            return state;
282        }
283
284        @Override
285        public boolean getVersionable() {
286            return versionable;
287        }
288    }
289
290    public class Foxml11DatastreamVersion implements DatastreamVersion {
291
292        private DatastreamInfo dsInfo;
293
294        private String id;
295        private String label;
296        private String created;
297        private String mimeType;
298        private String altIds;
299        private String formatUri;
300        private long size;
301        private ContentDigest contentDigest;
302        private CachedContent dsContent;
303
304        /**
305         * foxml datastream version.
306         * @param dsInfo the datastream information
307         * @param reader the reader
308         * @throws XMLStreamException xml stream exception
309         */
310        public Foxml11DatastreamVersion(final DatastreamInfo dsInfo,
311                final XMLStreamReader reader) throws XMLStreamException {
312            this.dsInfo = dsInfo;
313            final Map<String, String> dsAttributes = getAttributes(reader, "ID", "LABEL",
314                    "CREATED", "MIMETYPE", "ALT_IDS", "FORMAT_URI", "SIZE");
315            id = dsAttributes.get("ID");
316            label = dsAttributes.get("LABEL");
317            created = dsAttributes.get("CREATED");
318            mimeType = dsAttributes.get("MIMETYPE");
319            altIds = dsAttributes.get("ALT_IDS");
320            formatUri = dsAttributes.get("FORMAT_URI");
321            size = dsAttributes.containsKey("SIZE") ? Long.parseLong(dsAttributes.get("SIZE")) : -1;
322            reader.next();
323
324            while (reader.hasNext()) {
325                if (reader.isCharacters()) {
326                    if (!reader.isWhiteSpace()) {
327                        throw new RuntimeException("Unexpected character data! \"" + reader.getText() + "\"");
328                    } else {
329                        // skip whitespace...
330                    }
331                } else if (reader.isStartElement()) {
332                    final String localName = reader.getLocalName();
333                    if (localName.equals("contentDigest")) {
334                        final Map<String, String> attributes = getAttributes(reader, "TYPE", "DIGEST");
335                        this.contentDigest = new DefaultContentDigest(attributes.get("TYPE"), attributes.get("DIGEST"));
336                    } else if (localName.equals("xmlContent")) {
337                        // this XML fragment may not be valid out of context
338                        // context, so write it out as a complete XML
339                        // file...
340                        reader.next();
341                        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
342                        final XMLEventReader eventReader = XMLInputFactory.newFactory().createXMLEventReader(reader);
343                        final XMLEventWriter eventWriter = XMLOutputFactory.newFactory().createXMLEventWriter(baos);
344                        while (eventReader.hasNext()) {
345                            final XMLEvent event = eventReader.nextEvent();
346                            if (event.isEndElement()
347                                    && event.asEndElement().getName().getLocalPart().equals("xmlContent")
348                                    && event.asEndElement().getName().getNamespaceURI().equals(FOXML_NS)) {
349                                eventWriter.close();
350                                break;
351                            } else {
352                                eventWriter.add(event);
353                            }
354                        }
355                        try {
356                            dsContent = new MemoryCachedContent(new String(baos.toByteArray(), "UTF-8"));
357                        } catch (final UnsupportedEncodingException e) {
358                            throw new RuntimeException(e);
359                        }
360                    } else if (localName.equals("contentLocation")) {
361                        final Map<String, String> attributes = getAttributes(reader, "REF", "TYPE");
362                        if (attributes.get("TYPE").equals("INTERNAL_ID")) {
363                            dsContent = idResolver.resolveInternalID(attributes.get("REF"));
364                        } else {
365                            try {
366                                String ref = attributes.get("REF");
367                                if (ref.contains("local.fedora.server")) {
368                                    ref = ref.replace("local.fedora.server", localFedoraServer);
369                                }
370                                dsContent = new URLCachedContent(new URL(ref), fetcher);
371                            } catch (final MalformedURLException e) {
372                                throw new RuntimeException(e);
373                            }
374                        }
375                    } else if (localName.equals("binaryContent")) {
376                        try {
377                            final File f = File.createTempFile("decoded", "file");
378                            tempFiles.add(f);
379                            final Base64OutputStream out = new Base64OutputStream(new FileOutputStream(f), false);
380                            while (reader.next() == XMLStreamConstants.CHARACTERS) {
381                                out.write(reader.getText().getBytes("UTF-8"));
382                            }
383                            out.flush();
384                            out.close();
385                            dsContent = new FileCachedContent(f);
386                        } catch (final IOException e) {
387                            throw new RuntimeException(e);
388                        }
389                        readUntilClosed("binaryContent", FOXML_NS);
390                    } else {
391                        throw new RuntimeException("Unexpected element! \"" + reader.getLocalName() + "\"!");
392                    }
393                } else if (reader.isEndElement()) {
394                    if (reader.getLocalName().equals("datastreamVersion")) {
395                        return;
396                    }
397                } else {
398                    throw new RuntimeException("Unexpected xml structure! \"" + reader.getEventType() + "\" at line "
399                            + reader.getLocation().getLineNumber() + ", column "
400                            + reader.getLocation().getColumnNumber()
401                            + "!" + (reader.isCharacters() ? "  \"" + reader.getText() + "\"" : ""));
402                }
403                reader.next();
404            }
405
406        }
407
408        @Override
409        public DatastreamInfo getDatastreamInfo() {
410            return dsInfo;
411        }
412
413        @Override
414        public String getVersionId() {
415            return id;
416        }
417
418        @Override
419        public String getMimeType() {
420            return mimeType;
421        }
422
423        @Override
424        public String getLabel() {
425            return label;
426        }
427
428        @Override
429        public String getCreated() {
430            return created;
431        }
432
433        @Override
434        public String getAltIds() {
435            return altIds;
436        }
437
438        @Override
439        public String getFormatUri() {
440            return formatUri;
441        }
442
443        @Override
444        public long getSize() {
445            return size;
446        }
447
448        @Override
449        public ContentDigest getContentDigest() {
450            return contentDigest;
451        }
452
453        @Override
454        public InputStream getContent() throws IOException {
455            return dsContent.getInputStream();
456        }
457
458        @Override
459        public String getExternalOrRedirectURL() {
460            if (dsContent instanceof URLCachedContent) {
461                return ((URLCachedContent) dsContent).getURL().toString();
462            } else {
463                throw new IllegalStateException();
464            }
465        }
466
467        @Override
468        public boolean isFirstVersionIn(final ObjectReference obj) {
469            final List<DatastreamVersion> datastreams =
470                    obj.getDatastreamVersions(getDatastreamInfo().getDatastreamId());
471            return datastreams.indexOf(this) == 0;
472        }
473
474        @Override
475        public boolean isLastVersionIn(final ObjectReference obj) {
476            final List<DatastreamVersion> datastreams =
477                    obj.getDatastreamVersions(getDatastreamInfo().getDatastreamId());
478            return datastreams.indexOf(this) == datastreams.size() - 1;
479        }
480    }
481
482    private static Map<String, String> getAttributes(final XMLStreamReader r,
483            final String ... allowedNames) {
484        final HashMap<String, String> result = new HashMap<String, String>();
485        final Set<String> allowed = new HashSet<String>(Arrays.asList(allowedNames));
486        for (int i = 0; i < r.getAttributeCount(); i ++) {
487            final String localName = r.getAttributeLocalName(i);
488            final String value = r.getAttributeValue(i);
489            if (allowed.contains(localName)) {
490                result.put(localName, value);
491            } else {
492                System.err.println("Unexpected attribute: " + localName + " = \"" + value + "\"");
493            }
494        }
495        return result;
496
497    }
498
499}