001/** 002 * The contents of this file are subject to the license and copyright 003 * detailed in the LICENSE and NOTICE files at the root of the source 004 * tree. 005 * 006 */ 007package org.fcrepo.migration.foxml; 008 009import org.apache.commons.io.FileUtils; 010import org.apache.lucene.analysis.Analyzer; 011import org.apache.lucene.analysis.standard.StandardAnalyzer; 012import org.apache.lucene.document.Document; 013import org.apache.lucene.document.Field; 014import org.apache.lucene.document.StringField; 015import org.apache.lucene.index.DirectoryReader; 016import org.apache.lucene.index.IndexReader; 017import org.apache.lucene.index.IndexWriter; 018import org.apache.lucene.index.IndexWriterConfig; 019import org.apache.lucene.index.Term; 020import org.apache.lucene.search.IndexSearcher; 021import org.apache.lucene.search.TermQuery; 022import org.apache.lucene.search.TopDocs; 023import org.apache.lucene.store.Directory; 024import org.apache.lucene.store.FSDirectory; 025import org.slf4j.Logger; 026 027import java.io.File; 028import java.io.IOException; 029 030import static org.slf4j.LoggerFactory.getLogger; 031 032/** 033 * An InternalIDResolver implementation that generates an index of 034 * datastream ids (filenames) to file paths for the contents of a 035 * datastream directory. The directory is expected to contain just 036 * other directories and/or FOXML files. The FOXML files are expected 037 * to have a filename that is reversibly mapped from a fedora internal 038 * id for that datastream version. 039 * @author mdurbin 040 */ 041public abstract class DirectoryScanningIDResolver implements InternalIDResolver { 042 043 private static final Logger LOGGER = getLogger(InternalIDResolver.class); 044 045 /** 046 * A lucene IndexSearcher over an index maintained by this class. 047 * For every file found in the datastream directory a document exists 048 * in this index that contains an "id" field and a "path" field. The 049 * id field is the internal id, the path field is the full path to the 050 * file containing that datastream content. 051 */ 052 private final IndexSearcher searcher; 053 private final IndexReader reader; 054 private final FSDirectory fsDirectory; 055 056 /** 057 * directory scanning ID resolver 058 * @param cachedIndexDir the index directory. If it exists, the old cache will be used, if it doesn't a new 059 * cache will be built at that location. If it is null, a new cache will be built in 060 * the temp file space that will be deleted upon application shutdown. 061 * @param dsRoot the datastream root 062 * @throws IOException IO exception creating temp and index files/directories 063 */ 064 public DirectoryScanningIDResolver(final File cachedIndexDir, final File dsRoot) throws IOException { 065 final File indexDir; 066 if (cachedIndexDir == null) { 067 final File temp = File.createTempFile("tempfile", "basedir"); 068 temp.delete(); 069 temp.mkdir(); 070 indexDir = new File(temp, "index"); 071 LOGGER.info("No index directory specified. Creating temporary index at \"" 072 + indexDir.getAbsolutePath() + "\"."); 073 Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { 074 @Override 075 public void run() { 076 try { 077 if (searcher != null) { 078 searcher.getIndexReader().close(); 079 } 080 LOGGER.info("Deleting index directory at \"" + indexDir.getAbsolutePath() + "\"..."); 081 FileUtils.deleteDirectory(indexDir); 082 } catch (IOException e) { 083 LOGGER.error("Unable to delete index directory at \"" + indexDir.getAbsolutePath() + "\"!", e); 084 e.printStackTrace(); 085 } 086 } 087 })); 088 } else { 089 indexDir = cachedIndexDir; 090 } 091 092 // Index dir exists and is non-empty 093 if (indexDir.exists() && indexDir.list().length > 0) { 094 LOGGER.warn("Index exists at \"" + indexDir.getPath() + "\" and will be used. " 095 + "To clear index, simply delete this directory and re-run the application."); 096 } else { 097 final Analyzer analyzer = new StandardAnalyzer(); 098 final IndexWriterConfig iwc = new IndexWriterConfig(analyzer); 099 iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); 100 101 try (final Directory dir = FSDirectory.open(indexDir.toPath()); 102 final IndexWriter writer = new IndexWriter(dir, iwc)) { 103 104 LOGGER.info("Building an index of all the datastreams in \"" + dsRoot.getPath() + "\"..."); 105 indexDatastreams(writer, dsRoot); 106 107 writer.commit(); 108 109 } 110 } 111 112 fsDirectory = FSDirectory.open(indexDir.toPath()); 113 reader = DirectoryReader.open(fsDirectory); 114 searcher = new IndexSearcher(reader); 115 } 116 117 public void close() throws IOException { 118 reader.close(); 119 fsDirectory.close(); 120 } 121 122 @Override 123 public CachedContent resolveInternalID(final String id) { 124 try { 125 final TopDocs result = searcher.search(new TermQuery(new Term("id", id)), 2); 126 if (result.totalHits == 1) { 127 return new FileCachedContent(new File(searcher.doc(result.scoreDocs[0].doc).get("path"))); 128 } else if (result.totalHits < 1) { 129 throw new RuntimeException("Unable to resolve internal ID \"" + id + "\"!"); 130 } else { 131 throw new IllegalStateException(result.totalHits + " files matched the internal id \"" + id + "\". (" 132 + searcher.doc(result.scoreDocs[0].doc).get("path") + ", " 133 + searcher.doc(result.scoreDocs[1].doc).get("path") + "...)"); 134 } 135 } catch (final IOException e) { 136 throw new RuntimeException(e); 137 } 138 } 139 140 private void indexDatastreams(final IndexWriter writer, final File f) throws IOException { 141 if (f.isDirectory()) { 142 for (final File child : f.listFiles()) { 143 indexDatastreams(writer, child); 144 } 145 } else { 146 final Document doc = new Document(); 147 doc.add(new StringField("path", f.getPath(), Field.Store.YES)); 148 doc.add(new StringField("id", getInternalIdForFile(f), Field.Store.YES)); 149 LOGGER.trace("Added \"{}\" for: {}", getInternalIdForFile(f), f.getPath()); 150 writer.addDocument(doc); 151 } 152 } 153 154 /** 155 * Determines the internal id for the given file. 156 * 157 * @param f file to check for 158 * @return string containing internal id for the file 159 */ 160 protected abstract String getInternalIdForFile(File f); 161 162}