001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import java.io.File;
021import java.io.FilenameFilter;
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.Collections;
025import java.util.Comparator;
026import java.util.EnumSet;
027import java.util.Iterator;
028import java.util.List;
029import java.util.TreeSet;
030
031import org.apache.commons.logging.Log;
032import org.apache.commons.logging.LogFactory;
033import org.apache.hadoop.conf.Configuration;
034import org.apache.hadoop.hdfs.DFSConfigKeys;
035import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
036import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
037import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
038import org.apache.hadoop.hdfs.util.MD5FileUtils;
039
040import com.google.common.base.Preconditions;
041import com.google.common.collect.ComparisonChain;
042import com.google.common.collect.Lists;
043import com.google.common.collect.Sets;
044
045/**
046 * The NNStorageRetentionManager is responsible for inspecting the storage
047 * directories of the NN and enforcing a retention policy on checkpoints
048 * and edit logs.
049 * 
050 * It delegates the actual removal of files to a StoragePurger
051 * implementation, which might delete the files or instead copy them to
052 * a filer or HDFS for later analysis.
053 */
054public class NNStorageRetentionManager {
055  
056  private final int numCheckpointsToRetain;
057  private final long numExtraEditsToRetain;
058  private final int maxExtraEditsSegmentsToRetain;
059  private static final Log LOG = LogFactory.getLog(
060      NNStorageRetentionManager.class);
061  private final NNStorage storage;
062  private final StoragePurger purger;
063  private final LogsPurgeable purgeableLogs;
064  
065  public NNStorageRetentionManager(
066      Configuration conf,
067      NNStorage storage,
068      LogsPurgeable purgeableLogs,
069      StoragePurger purger) {
070    this.numCheckpointsToRetain = conf.getInt(
071        DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
072        DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT);
073    this.numExtraEditsToRetain = conf.getLong(
074        DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY,
075        DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_DEFAULT);
076    this.maxExtraEditsSegmentsToRetain = conf.getInt(
077        DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_KEY,
078        DFSConfigKeys.DFS_NAMENODE_MAX_EXTRA_EDITS_SEGMENTS_RETAINED_DEFAULT);
079    Preconditions.checkArgument(numCheckpointsToRetain > 0,
080        "Must retain at least one checkpoint");
081    Preconditions.checkArgument(numExtraEditsToRetain >= 0,
082        DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY +
083        " must not be negative");
084    
085    this.storage = storage;
086    this.purgeableLogs = purgeableLogs;
087    this.purger = purger;
088  }
089  
090  public NNStorageRetentionManager(Configuration conf, NNStorage storage,
091      LogsPurgeable purgeableLogs) {
092    this(conf, storage, purgeableLogs, new DeletionStoragePurger());
093  }
094
095  void purgeCheckpoints(NameNodeFile nnf) throws IOException {
096    purgeCheckpoinsAfter(nnf, -1);
097  }
098
099  void purgeCheckpoinsAfter(NameNodeFile nnf, long fromTxId)
100      throws IOException {
101    FSImageTransactionalStorageInspector inspector =
102        new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
103    storage.inspectStorageDirs(inspector);
104    for (FSImageFile image : inspector.getFoundImages()) {
105      if (image.getCheckpointTxId() > fromTxId) {
106        purger.purgeImage(image);
107      }
108    }
109  }
110
111  void purgeOldStorage(NameNodeFile nnf) throws IOException {
112    FSImageTransactionalStorageInspector inspector =
113        new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
114    storage.inspectStorageDirs(inspector);
115
116    long minImageTxId = getImageTxIdToRetain(inspector);
117    purgeCheckpointsOlderThan(inspector, minImageTxId);
118    
119    if (nnf == NameNodeFile.IMAGE_ROLLBACK) {
120      // do not purge edits for IMAGE_ROLLBACK.
121      return;
122    }
123
124    // If fsimage_N is the image we want to keep, then we need to keep
125    // all txns > N. We can remove anything < N+1, since fsimage_N
126    // reflects the state up to and including N. However, we also
127    // provide a "cushion" of older txns that we keep, which is
128    // handy for HA, where a remote node may not have as many
129    // new images.
130    //
131    // First, determine the target number of extra transactions to retain based
132    // on the configured amount.
133    long minimumRequiredTxId = minImageTxId + 1;
134    long purgeLogsFrom = Math.max(0, minimumRequiredTxId - numExtraEditsToRetain);
135    
136    ArrayList<EditLogInputStream> editLogs = new ArrayList<EditLogInputStream>();
137    purgeableLogs.selectInputStreams(editLogs, purgeLogsFrom, false);
138    Collections.sort(editLogs, new Comparator<EditLogInputStream>() {
139      @Override
140      public int compare(EditLogInputStream a, EditLogInputStream b) {
141        return ComparisonChain.start()
142            .compare(a.getFirstTxId(), b.getFirstTxId())
143            .compare(a.getLastTxId(), b.getLastTxId())
144            .result();
145      }
146    });
147
148    // Remove from consideration any edit logs that are in fact required.
149    while (editLogs.size() > 0 &&
150        editLogs.get(editLogs.size() - 1).getFirstTxId() >= minimumRequiredTxId) {
151      editLogs.remove(editLogs.size() - 1);
152    }
153    
154    // Next, adjust the number of transactions to retain if doing so would mean
155    // keeping too many segments around.
156    while (editLogs.size() > maxExtraEditsSegmentsToRetain) {
157      purgeLogsFrom = editLogs.get(0).getLastTxId() + 1;
158      editLogs.remove(0);
159    }
160    
161    // Finally, ensure that we're not trying to purge any transactions that we
162    // actually need.
163    if (purgeLogsFrom > minimumRequiredTxId) {
164      throw new AssertionError("Should not purge more edits than required to "
165          + "restore: " + purgeLogsFrom + " should be <= "
166          + minimumRequiredTxId);
167    }
168    
169    purgeableLogs.purgeLogsOlderThan(purgeLogsFrom);
170  }
171  
172  private void purgeCheckpointsOlderThan(
173      FSImageTransactionalStorageInspector inspector,
174      long minTxId) {
175    for (FSImageFile image : inspector.getFoundImages()) {
176      if (image.getCheckpointTxId() < minTxId) {
177        purger.purgeImage(image);
178      }
179    }
180  }
181
182  /**
183   * @param inspector inspector that has already inspected all storage dirs
184   * @return the transaction ID corresponding to the oldest checkpoint
185   * that should be retained. 
186   */
187  private long getImageTxIdToRetain(FSImageTransactionalStorageInspector inspector) {
188      
189    List<FSImageFile> images = inspector.getFoundImages();
190    TreeSet<Long> imageTxIds = Sets.newTreeSet();
191    for (FSImageFile image : images) {
192      imageTxIds.add(image.getCheckpointTxId());
193    }
194    
195    List<Long> imageTxIdsList = Lists.newArrayList(imageTxIds);
196    if (imageTxIdsList.isEmpty()) {
197      return 0;
198    }
199    
200    Collections.reverse(imageTxIdsList);
201    int toRetain = Math.min(numCheckpointsToRetain, imageTxIdsList.size());    
202    long minTxId = imageTxIdsList.get(toRetain - 1);
203    LOG.info("Going to retain " + toRetain + " images with txid >= " +
204        minTxId);
205    return minTxId;
206  }
207  
208  /**
209   * Interface responsible for disposing of old checkpoints and edit logs.
210   */
211  static interface StoragePurger {
212    void purgeLog(EditLogFile log);
213    void purgeImage(FSImageFile image);
214  }
215  
216  static class DeletionStoragePurger implements StoragePurger {
217    @Override
218    public void purgeLog(EditLogFile log) {
219      LOG.info("Purging old edit log " + log);
220      deleteOrWarn(log.getFile());
221    }
222
223    @Override
224    public void purgeImage(FSImageFile image) {
225      LOG.info("Purging old image " + image);
226      deleteOrWarn(image.getFile());
227      deleteOrWarn(MD5FileUtils.getDigestFileForFile(image.getFile()));
228    }
229
230    private static void deleteOrWarn(File file) {
231      if (!file.delete()) {
232        // It's OK if we fail to delete something -- we'll catch it
233        // next time we swing through this directory.
234        LOG.warn("Could not delete " + file);
235      }      
236    }
237  }
238
239  /**
240   * Delete old OIV fsimages. Since the target dir is not a full blown
241   * storage directory, we simply list and keep the latest ones. For the
242   * same reason, no storage inspector is used.
243   */
244  void purgeOldLegacyOIVImages(String dir, long txid) {
245    File oivImageDir = new File(dir);
246    final String oivImagePrefix = NameNodeFile.IMAGE_LEGACY_OIV.getName();
247    String filesInStorage[];
248
249    // Get the listing
250    filesInStorage = oivImageDir.list(new FilenameFilter() {
251      @Override
252      public boolean accept(File dir, String name) {
253        return name.matches(oivImagePrefix + "_(\\d+)");
254      }
255    });
256
257    // Check whether there is any work to do.
258    if (filesInStorage.length <= numCheckpointsToRetain) {
259      return;
260    }
261
262    // Create a sorted list of txids from the file names.
263    TreeSet<Long> sortedTxIds = new TreeSet<Long>();
264    for (String fName : filesInStorage) {
265      // Extract the transaction id from the file name.
266      long fTxId;
267      try {
268        fTxId = Long.parseLong(fName.substring(oivImagePrefix.length() + 1));
269      } catch (NumberFormatException nfe) {
270        // This should not happen since we have already filtered it.
271        // Log and continue.
272        LOG.warn("Invalid file name. Skipping " + fName);
273        continue;
274      }
275      sortedTxIds.add(Long.valueOf(fTxId));
276    }
277
278    int numFilesToDelete = sortedTxIds.size() - numCheckpointsToRetain;
279    Iterator<Long> iter = sortedTxIds.iterator();
280    while (numFilesToDelete > 0 && iter.hasNext()) {
281      long txIdVal = iter.next().longValue();
282      String fileName = NNStorage.getLegacyOIVImageFileName(txIdVal);
283      LOG.info("Deleting " + fileName);
284      File fileToDelete = new File(oivImageDir, fileName);
285      if (!fileToDelete.delete()) {
286        // deletion failed.
287        LOG.warn("Failed to delete image file: " + fileToDelete);
288      }
289      numFilesToDelete--;
290    }
291  }
292}