001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import java.io.File; 021import java.io.FilenameFilter; 022import java.io.IOException; 023import java.util.ArrayList; 024import java.util.Collections; 025import java.util.Comparator; 026import java.util.EnumSet; 027import java.util.Iterator; 028import java.util.List; 029import java.util.TreeSet; 030 031import org.apache.commons.logging.Log; 032import org.apache.commons.logging.LogFactory; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.hdfs.DFSConfigKeys; 035import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile; 036import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile; 037import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; 038import org.apache.hadoop.hdfs.util.MD5FileUtils; 039 040import com.google.common.base.Preconditions; 041import com.google.common.collect.ComparisonChain; 042import com.google.common.collect.Lists; 043import com.google.common.collect.Sets; 044 045/** 046 * The NNStorageRetentionManager is responsible for inspecting the storage 047 * directories of the NN and enforcing a retention policy on checkpoints 048 * and edit logs. 049 * 050 * It delegates the actual removal of files to a StoragePurger 051 * implementation, which might delete the files or instead copy them to 052 * a filer or HDFS for later analysis. 053 */ 054public class NNStorageRetentionManager { 055 056 private final int numCheckpointsToRetain; 057 private final long numExtraEditsToRetain; 058 private final int maxExtraEditsSegmentsToRetain; 059 private static final Log LOG = LogFactory.getLog( 060 NNStorageRetentionManager.class); 061 private final NNStorage storage; 062 private final StoragePurger purger; 063 private final LogsPurgeable purgeableLogs; 064 065 public NNStorageRetentionManager( 066 Configuration conf, 067 NNStorage storage, 068 LogsPurgeable purgeableLogs, 069 StoragePurger purger) { 070 this.numCheckpointsToRetain = conf.getInt( 071 DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 072 DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT); 073 this.numExtraEditsToRetain = conf.getLong( 074 DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, 075 DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_DEFAULT); 076 this.maxExtraEditsSegmentsToRetain = conf.getInt( 077 DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_KEY, 078 DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_DEFAULT); 079 Preconditions.checkArgument(numCheckpointsToRetain > 0, 080 "Must retain at least one checkpoint"); 081 Preconditions.checkArgument(numExtraEditsToRetain >= 0, 082 DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY + 083 " must not be negative"); 084 085 this.storage = storage; 086 this.purgeableLogs = purgeableLogs; 087 this.purger = purger; 088 } 089 090 public NNStorageRetentionManager(Configuration conf, NNStorage storage, 091 LogsPurgeable purgeableLogs) { 092 this(conf, storage, purgeableLogs, new DeletionStoragePurger()); 093 } 094 095 void purgeCheckpoints(NameNodeFile nnf) throws IOException { 096 purgeCheckpoinsAfter(nnf, -1); 097 } 098 099 void purgeCheckpoinsAfter(NameNodeFile nnf, long fromTxId) 100 throws IOException { 101 FSImageTransactionalStorageInspector inspector = 102 new FSImageTransactionalStorageInspector(EnumSet.of(nnf)); 103 storage.inspectStorageDirs(inspector); 104 for (FSImageFile image : inspector.getFoundImages()) { 105 if (image.getCheckpointTxId() > fromTxId) { 106 purger.purgeImage(image); 107 } 108 } 109 } 110 111 void purgeOldStorage(NameNodeFile nnf) throws IOException { 112 FSImageTransactionalStorageInspector inspector = 113 new FSImageTransactionalStorageInspector(EnumSet.of(nnf)); 114 storage.inspectStorageDirs(inspector); 115 116 long minImageTxId = getImageTxIdToRetain(inspector); 117 purgeCheckpointsOlderThan(inspector, minImageTxId); 118 119 if (nnf == NameNodeFile.IMAGE_ROLLBACK) { 120 // do not purge edits for IMAGE_ROLLBACK. 121 return; 122 } 123 124 // If fsimage_N is the image we want to keep, then we need to keep 125 // all txns > N. We can remove anything < N+1, since fsimage_N 126 // reflects the state up to and including N. However, we also 127 // provide a "cushion" of older txns that we keep, which is 128 // handy for HA, where a remote node may not have as many 129 // new images. 130 // 131 // First, determine the target number of extra transactions to retain based 132 // on the configured amount. 133 long minimumRequiredTxId = minImageTxId + 1; 134 long purgeLogsFrom = Math.max(0, minimumRequiredTxId - numExtraEditsToRetain); 135 136 ArrayList<EditLogInputStream> editLogs = new ArrayList<EditLogInputStream>(); 137 purgeableLogs.selectInputStreams(editLogs, purgeLogsFrom, false); 138 Collections.sort(editLogs, new Comparator<EditLogInputStream>() { 139 @Override 140 public int compare(EditLogInputStream a, EditLogInputStream b) { 141 return ComparisonChain.start() 142 .compare(a.getFirstTxId(), b.getFirstTxId()) 143 .compare(a.getLastTxId(), b.getLastTxId()) 144 .result(); 145 } 146 }); 147 148 // Remove from consideration any edit logs that are in fact required. 149 while (editLogs.size() > 0 && 150 editLogs.get(editLogs.size() - 1).getFirstTxId() >= minimumRequiredTxId) { 151 editLogs.remove(editLogs.size() - 1); 152 } 153 154 // Next, adjust the number of transactions to retain if doing so would mean 155 // keeping too many segments around. 156 while (editLogs.size() > maxExtraEditsSegmentsToRetain) { 157 purgeLogsFrom = editLogs.get(0).getLastTxId() + 1; 158 editLogs.remove(0); 159 } 160 161 // Finally, ensure that we're not trying to purge any transactions that we 162 // actually need. 163 if (purgeLogsFrom > minimumRequiredTxId) { 164 throw new AssertionError("Should not purge more edits than required to " 165 + "restore: " + purgeLogsFrom + " should be <= " 166 + minimumRequiredTxId); 167 } 168 169 purgeableLogs.purgeLogsOlderThan(purgeLogsFrom); 170 } 171 172 private void purgeCheckpointsOlderThan( 173 FSImageTransactionalStorageInspector inspector, 174 long minTxId) { 175 for (FSImageFile image : inspector.getFoundImages()) { 176 if (image.getCheckpointTxId() < minTxId) { 177 purger.purgeImage(image); 178 } 179 } 180 } 181 182 /** 183 * @param inspector inspector that has already inspected all storage dirs 184 * @return the transaction ID corresponding to the oldest checkpoint 185 * that should be retained. 186 */ 187 private long getImageTxIdToRetain(FSImageTransactionalStorageInspector inspector) { 188 189 List<FSImageFile> images = inspector.getFoundImages(); 190 TreeSet<Long> imageTxIds = Sets.newTreeSet(); 191 for (FSImageFile image : images) { 192 imageTxIds.add(image.getCheckpointTxId()); 193 } 194 195 List<Long> imageTxIdsList = Lists.newArrayList(imageTxIds); 196 if (imageTxIdsList.isEmpty()) { 197 return 0; 198 } 199 200 Collections.reverse(imageTxIdsList); 201 int toRetain = Math.min(numCheckpointsToRetain, imageTxIdsList.size()); 202 long minTxId = imageTxIdsList.get(toRetain - 1); 203 LOG.info("Going to retain " + toRetain + " images with txid >= " + 204 minTxId); 205 return minTxId; 206 } 207 208 /** 209 * Interface responsible for disposing of old checkpoints and edit logs. 210 */ 211 static interface StoragePurger { 212 void purgeLog(EditLogFile log); 213 void purgeImage(FSImageFile image); 214 } 215 216 static class DeletionStoragePurger implements StoragePurger { 217 @Override 218 public void purgeLog(EditLogFile log) { 219 LOG.info("Purging old edit log " + log); 220 deleteOrWarn(log.getFile()); 221 } 222 223 @Override 224 public void purgeImage(FSImageFile image) { 225 LOG.info("Purging old image " + image); 226 deleteOrWarn(image.getFile()); 227 deleteOrWarn(MD5FileUtils.getDigestFileForFile(image.getFile())); 228 } 229 230 private static void deleteOrWarn(File file) { 231 if (!file.delete()) { 232 // It's OK if we fail to delete something -- we'll catch it 233 // next time we swing through this directory. 234 LOG.warn("Could not delete " + file); 235 } 236 } 237 } 238 239 /** 240 * Delete old OIV fsimages. Since the target dir is not a full blown 241 * storage directory, we simply list and keep the latest ones. For the 242 * same reason, no storage inspector is used. 243 */ 244 void purgeOldLegacyOIVImages(String dir, long txid) { 245 File oivImageDir = new File(dir); 246 final String oivImagePrefix = NameNodeFile.IMAGE_LEGACY_OIV.getName(); 247 String filesInStorage[]; 248 249 // Get the listing 250 filesInStorage = oivImageDir.list(new FilenameFilter() { 251 @Override 252 public boolean accept(File dir, String name) { 253 return name.matches(oivImagePrefix + "_(\\d+)"); 254 } 255 }); 256 257 // Check whether there is any work to do. 258 if (filesInStorage.length <= numCheckpointsToRetain) { 259 return; 260 } 261 262 // Create a sorted list of txids from the file names. 263 TreeSet<Long> sortedTxIds = new TreeSet<Long>(); 264 for (String fName : filesInStorage) { 265 // Extract the transaction id from the file name. 266 long fTxId; 267 try { 268 fTxId = Long.parseLong(fName.substring(oivImagePrefix.length() + 1)); 269 } catch (NumberFormatException nfe) { 270 // This should not happen since we have already filtered it. 271 // Log and continue. 272 LOG.warn("Invalid file name. Skipping " + fName); 273 continue; 274 } 275 sortedTxIds.add(Long.valueOf(fTxId)); 276 } 277 278 int numFilesToDelete = sortedTxIds.size() - numCheckpointsToRetain; 279 Iterator<Long> iter = sortedTxIds.iterator(); 280 while (numFilesToDelete > 0 && iter.hasNext()) { 281 long txIdVal = iter.next().longValue(); 282 String fileName = NNStorage.getLegacyOIVImageFileName(txIdVal); 283 LOG.info("Deleting " + fileName); 284 File fileToDelete = new File(oivImageDir, fileName); 285 if (!fileToDelete.delete()) { 286 // deletion failed. 287 LOG.warn("Failed to delete image file: " + fileToDelete); 288 } 289 numFilesToDelete--; 290 } 291 } 292}