001/*
002 * Copyright 2015 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.fcrepo.migration.idmappers;
017
018import org.apache.commons.io.FileUtils;
019import org.apache.lucene.analysis.Analyzer;
020import org.apache.lucene.analysis.standard.StandardAnalyzer;
021import org.apache.lucene.document.Document;
022import org.apache.lucene.document.Field;
023import org.apache.lucene.document.StringField;
024import org.apache.lucene.index.IndexWriter;
025import org.apache.lucene.index.IndexWriterConfig;
026import org.apache.lucene.index.Term;
027import org.apache.lucene.search.IndexSearcher;
028import org.apache.lucene.search.SearcherManager;
029import org.apache.lucene.search.TermQuery;
030import org.apache.lucene.search.TopDocs;
031import org.apache.lucene.store.Directory;
032import org.apache.lucene.store.FSDirectory;
033import org.fcrepo.migration.Fedora4Client;
034import org.fcrepo.migration.MigrationIDMapper;
035import org.slf4j.Logger;
036
037import java.io.File;
038import java.io.IOException;
039
040import static org.slf4j.LoggerFactory.getLogger;
041
042/**
043 * An IDMapper implementation that maps identifiers by simply creating new
044 * objects in the Fedora 4 instance and thus delegating to whatever pid minter
045 * is configured for the repository.  Unlike other ID mappers, just getting an
046 * ID through mapObjectPath creates an object in the repository!
047 *
048 * Meanwhile, it maintains a persistent copy of the mapping in a Lucene Index
049 * on the filesystem.
050 *
051 * @author Mike Durbin
052 */
053public class OpaqueIDMapper implements MigrationIDMapper {
054
055    private static final Logger ID_LOGGER = getLogger("id-mapper");
056    private static final Logger LOGGER = getLogger(OpaqueIDMapper.class);
057
058    private Fedora4Client f4Client;
059
060    /**
061     * A lucene SearcherManager over an index maintained by this class.
062     * For object created as part of this migration operation a document
063     * exists in this index that contains a "pid" field and a "path" field.
064     * The pid field is the original fedora 3 pid, the path field is the
065     * path within the destination repository for that pid.
066     */
067    private SearcherManager searcherManager;
068
069    /**
070     * A lucene IndexWriter for the index exposed by 'searcher'.
071     */
072    private IndexWriter writer;
073
074    /**
075     * A constructor.
076     * @param cachedIDIndexDir the directory (or null) where the index of generated pids should be maintained
077     * @param f4Client a Fedora 4 client to mediate interactions with the repository
078     * @throws IOException IO exception creating temp and index files/directories
079     */
080    public OpaqueIDMapper(final File cachedIDIndexDir, final Fedora4Client f4Client) throws IOException {
081        this.f4Client = f4Client;
082        final File indexDir;
083        if (cachedIDIndexDir == null) {
084            final File temp = File.createTempFile("tempfile", "basedir");
085            temp.delete();
086            temp.mkdir();
087            indexDir = new File(temp, "index");
088            LOGGER.info("No generated ID index directory specified.  Creating temporary index at \""
089                    + indexDir.getAbsolutePath() + "\".");
090            Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
091                @Override
092                public void run() {
093                    try {
094                        searcherManager.close();
095                        writer.close();
096                        writer.getDirectory().close();
097                        LOGGER.info("Deleting generated ID index directory at \"" + indexDir.getAbsolutePath()
098                                + "\"...");
099                        FileUtils.deleteDirectory(indexDir);
100                    } catch (IOException e) {
101                        LOGGER.error("Unable to delete generated ID index directory at \"" + indexDir.getAbsolutePath()
102                                + "\"!", e);
103                        e.printStackTrace();
104                    }
105                }
106            }));
107        } else {
108            indexDir = cachedIDIndexDir;
109        }
110
111        final Directory dir = FSDirectory.open(indexDir.toPath());
112        if (indexDir.exists()) {
113            LOGGER.warn("Index exists at \"" + indexDir.getPath() + "\" and will be used.  "
114                    + "To clear index, simply delete this directory and re-run the application.");
115        }
116        final Analyzer analyzer = new StandardAnalyzer();
117        final IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
118        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
119        writer = new IndexWriter(dir, iwc);
120        writer.commit();
121
122        searcherManager = new SearcherManager(writer, false, false, null);
123    }
124
125    @Override
126    public String mapObjectPath(final String pid) {
127        final String cachedPath = getCachedObjectPath(pid);
128        if (cachedPath != null) {
129            return cachedPath;
130        }
131        final String path = f4Client.createPlaceholder(null);
132        this.cacheObjectPath(pid, path);
133        return path;
134
135    }
136
137    @Override
138    public String mapDatastreamPath(final String pid, final String dsid) {
139        final String cachedObjectPath = getCachedObjectPath(pid);
140        if (cachedObjectPath != null) {
141            return cachedObjectPath + "/" + dsid;
142        }
143        final String path = f4Client.createPlaceholder(null);
144        this.cacheObjectPath(pid, path);
145        return path + "/" + dsid;
146    }
147
148    @Override
149    public String getBaseURL() {
150        return f4Client.getRepositoryUrl();
151    }
152
153    private String getCachedObjectPath(final String pid) {
154        try {
155            final IndexSearcher s = searcherManager.acquire();
156            try {
157                final TopDocs result = s.search(new TermQuery(new Term("pid", pid)), 2);
158                LOGGER.trace("Found " + result.totalHits + " hit(s) for pid=" + pid);
159                if (result.totalHits == 1) {
160                    return s.doc(result.scoreDocs[0].doc).get("path");
161                } else if (result.totalHits < 1) {
162                    return null;
163                } else {
164                    throw new IllegalStateException(result.totalHits
165                            + " paths registered for the pid \"" + pid + "\".  ("
166                            + s.doc(result.scoreDocs[0].doc).get("path") + ", "
167                            + s.doc(result.scoreDocs[1].doc).get("path") + "...)");
168                }
169            } finally {
170                searcherManager.release(s);
171            }
172        } catch (IOException e) {
173            throw new RuntimeException(e);
174        }
175    }
176
177    private void cacheObjectPath(final String pid, final String path) {
178        ID_LOGGER.info(pid + " --> " + f4Client.getRepositoryUrl() + path);
179        try {
180            final Document doc = new Document();
181            doc.add(new StringField("path", path, Field.Store.YES));
182            doc.add(new StringField("pid", pid, Field.Store.YES));
183            LOGGER.trace("Added \"" + pid + "\" --> \"" + path + "\" to ID cache.");
184            writer.addDocument(doc);
185            writer.commit();
186            searcherManager.maybeRefreshBlocking();
187        } catch (IOException ex) {
188            throw new RuntimeException(ex);
189        }
190    }
191
192}