001/* 002 * Copyright 2015 DuraSpace, Inc. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.fcrepo.migration.idmappers; 017 018import org.apache.commons.io.FileUtils; 019import org.apache.lucene.analysis.Analyzer; 020import org.apache.lucene.analysis.standard.StandardAnalyzer; 021import org.apache.lucene.document.Document; 022import org.apache.lucene.document.Field; 023import org.apache.lucene.document.StringField; 024import org.apache.lucene.index.IndexWriter; 025import org.apache.lucene.index.IndexWriterConfig; 026import org.apache.lucene.index.Term; 027import org.apache.lucene.search.IndexSearcher; 028import org.apache.lucene.search.SearcherManager; 029import org.apache.lucene.search.TermQuery; 030import org.apache.lucene.search.TopDocs; 031import org.apache.lucene.store.Directory; 032import org.apache.lucene.store.FSDirectory; 033import org.fcrepo.migration.Fedora4Client; 034import org.fcrepo.migration.MigrationIDMapper; 035import org.slf4j.Logger; 036 037import java.io.File; 038import java.io.IOException; 039 040import static org.slf4j.LoggerFactory.getLogger; 041 042/** 043 * An IDMapper implementation that maps identifiers by simply creating new 044 * objects in the Fedora 4 instance and thus delegating to whatever pid minter 045 * is configured for the repository. Unlike other ID mappers, just getting an 046 * ID through mapObjectPath creates an object in the repository! 047 * 048 * Meanwhile, it maintains a persistent copy of the mapping in a Lucene Index 049 * on the filesystem. 050 * 051 * @author Mike Durbin 052 */ 053public class OpaqueIDMapper implements MigrationIDMapper { 054 055 private static final Logger ID_LOGGER = getLogger("id-mapper"); 056 private static final Logger LOGGER = getLogger(OpaqueIDMapper.class); 057 058 private Fedora4Client f4Client; 059 060 /** 061 * A lucene SearcherManager over an index maintained by this class. 062 * For object created as part of this migration operation a document 063 * exists in this index that contains a "pid" field and a "path" field. 064 * The pid field is the original fedora 3 pid, the path field is the 065 * path within the destination repository for that pid. 066 */ 067 private SearcherManager searcherManager; 068 069 /** 070 * A lucene IndexWriter for the index exposed by 'searcher'. 071 */ 072 private IndexWriter writer; 073 074 /** 075 * A constructor. 076 * @param cachedIDIndexDir the directory (or null) where the index of generated pids should be maintained 077 * @param f4Client a Fedora 4 client to mediate interactions with the repository 078 * @throws IOException IO exception creating temp and index files/directories 079 */ 080 public OpaqueIDMapper(final File cachedIDIndexDir, final Fedora4Client f4Client) throws IOException { 081 this.f4Client = f4Client; 082 final File indexDir; 083 if (cachedIDIndexDir == null) { 084 final File temp = File.createTempFile("tempfile", "basedir"); 085 temp.delete(); 086 temp.mkdir(); 087 indexDir = new File(temp, "index"); 088 LOGGER.info("No generated ID index directory specified. Creating temporary index at \"" 089 + indexDir.getAbsolutePath() + "\"."); 090 Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { 091 @Override 092 public void run() { 093 try { 094 searcherManager.close(); 095 writer.close(); 096 writer.getDirectory().close(); 097 LOGGER.info("Deleting generated ID index directory at \"" + indexDir.getAbsolutePath() 098 + "\"..."); 099 FileUtils.deleteDirectory(indexDir); 100 } catch (IOException e) { 101 LOGGER.error("Unable to delete generated ID index directory at \"" + indexDir.getAbsolutePath() 102 + "\"!", e); 103 e.printStackTrace(); 104 } 105 } 106 })); 107 } else { 108 indexDir = cachedIDIndexDir; 109 } 110 111 final Directory dir = FSDirectory.open(indexDir.toPath()); 112 if (indexDir.exists()) { 113 LOGGER.warn("Index exists at \"" + indexDir.getPath() + "\" and will be used. " 114 + "To clear index, simply delete this directory and re-run the application."); 115 } 116 final Analyzer analyzer = new StandardAnalyzer(); 117 final IndexWriterConfig iwc = new IndexWriterConfig(analyzer); 118 iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); 119 writer = new IndexWriter(dir, iwc); 120 writer.commit(); 121 122 searcherManager = new SearcherManager(writer, false, false, null); 123 } 124 125 @Override 126 public String mapObjectPath(final String pid) { 127 final String cachedPath = getCachedObjectPath(pid); 128 if (cachedPath != null) { 129 return cachedPath; 130 } 131 final String path = f4Client.createPlaceholder(null); 132 this.cacheObjectPath(pid, path); 133 return path; 134 135 } 136 137 @Override 138 public String mapDatastreamPath(final String pid, final String dsid) { 139 final String cachedObjectPath = getCachedObjectPath(pid); 140 if (cachedObjectPath != null) { 141 return cachedObjectPath + "/" + dsid; 142 } 143 final String path = f4Client.createPlaceholder(null); 144 this.cacheObjectPath(pid, path); 145 return path + "/" + dsid; 146 } 147 148 @Override 149 public String getBaseURL() { 150 return f4Client.getRepositoryUrl(); 151 } 152 153 private String getCachedObjectPath(final String pid) { 154 try { 155 final IndexSearcher s = searcherManager.acquire(); 156 try { 157 final TopDocs result = s.search(new TermQuery(new Term("pid", pid)), 2); 158 LOGGER.trace("Found " + result.totalHits + " hit(s) for pid=" + pid); 159 if (result.totalHits == 1) { 160 return s.doc(result.scoreDocs[0].doc).get("path"); 161 } else if (result.totalHits < 1) { 162 return null; 163 } else { 164 throw new IllegalStateException(result.totalHits 165 + " paths registered for the pid \"" + pid + "\". (" 166 + s.doc(result.scoreDocs[0].doc).get("path") + ", " 167 + s.doc(result.scoreDocs[1].doc).get("path") + "...)"); 168 } 169 } finally { 170 searcherManager.release(s); 171 } 172 } catch (IOException e) { 173 throw new RuntimeException(e); 174 } 175 } 176 177 private void cacheObjectPath(final String pid, final String path) { 178 ID_LOGGER.info(pid + " --> " + f4Client.getRepositoryUrl() + path); 179 try { 180 final Document doc = new Document(); 181 doc.add(new StringField("path", path, Field.Store.YES)); 182 doc.add(new StringField("pid", pid, Field.Store.YES)); 183 LOGGER.trace("Added \"" + pid + "\" --> \"" + path + "\" to ID cache."); 184 writer.addDocument(doc); 185 writer.commit(); 186 searcherManager.maybeRefreshBlocking(); 187 } catch (IOException ex) { 188 throw new RuntimeException(ex); 189 } 190 } 191 192}