001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.namenode;
019
020import java.io.Closeable;
021import java.io.File;
022import java.io.IOException;
023import java.io.RandomAccessFile;
024import java.net.URI;
025import java.net.UnknownHostException;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.EnumSet;
029import java.util.HashMap;
030import java.util.Iterator;
031import java.util.List;
032import java.util.Properties;
033import java.util.UUID;
034import java.util.concurrent.CopyOnWriteArrayList;
035
036import org.apache.hadoop.classification.InterfaceAudience;
037import org.apache.hadoop.conf.Configuration;
038import org.apache.hadoop.fs.FileUtil;
039import org.apache.hadoop.hdfs.DFSUtil;
040import org.apache.hadoop.hdfs.protocol.HdfsConstants;
041import org.apache.hadoop.hdfs.protocol.LayoutVersion;
042import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
043import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
044import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
045import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
046import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
047import org.apache.hadoop.hdfs.server.common.Storage;
048import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
049import org.apache.hadoop.hdfs.server.common.Util;
050import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
051import org.apache.hadoop.hdfs.util.PersistentLongFile;
052import org.apache.hadoop.io.IOUtils;
053import org.apache.hadoop.net.DNS;
054import org.apache.hadoop.util.Time;
055
056import com.google.common.annotations.VisibleForTesting;
057import com.google.common.base.Preconditions;
058import com.google.common.collect.Lists;
059
060/**
061 * NNStorage is responsible for management of the StorageDirectories used by
062 * the NameNode.
063 */
064@InterfaceAudience.Private
065public class NNStorage extends Storage implements Closeable,
066    StorageErrorReporter {
067  static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
068  static final String LOCAL_URI_SCHEME = "file";
069
070  //
071  // The filenames used for storing the images
072  //
073  public enum NameNodeFile {
074    IMAGE     ("fsimage"),
075    TIME      ("fstime"), // from "old" pre-HDFS-1073 format
076    SEEN_TXID ("seen_txid"),
077    EDITS     ("edits"),
078    IMAGE_NEW ("fsimage.ckpt"),
079    IMAGE_ROLLBACK("fsimage_rollback"),
080    EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
081    EDITS_INPROGRESS ("edits_inprogress"),
082    EDITS_TMP ("edits_tmp"),
083    IMAGE_LEGACY_OIV ("fsimage_legacy_oiv");  // For pre-PB format
084
085    private String fileName = null;
086    private NameNodeFile(String name) { this.fileName = name; }
087    @VisibleForTesting
088    public String getName() { return fileName; }
089  }
090
091  /**
092   * Implementation of StorageDirType specific to namenode storage
093   * A Storage directory could be of type IMAGE which stores only fsimage,
094   * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which
095   * stores both fsimage and edits.
096   */
097  @VisibleForTesting
098  public static enum NameNodeDirType implements StorageDirType {
099    UNDEFINED,
100    IMAGE,
101    EDITS,
102    IMAGE_AND_EDITS;
103
104    @Override
105    public StorageDirType getStorageDirType() {
106      return this;
107    }
108
109    @Override
110    public boolean isOfType(StorageDirType type) {
111      if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS))
112        return true;
113      return this == type;
114    }
115  }
116
117  protected String blockpoolID = ""; // id of the block pool
118  
119  /**
120   * flag that controls if we try to restore failed storages
121   */
122  private boolean restoreFailedStorage = false;
123  private final Object restorationLock = new Object();
124  private boolean disablePreUpgradableLayoutCheck = false;
125
126
127  /**
128   * TxId of the last transaction that was included in the most
129   * recent fsimage file. This does not include any transactions
130   * that have since been written to the edit log.
131   */
132  protected volatile long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
133  
134  /**
135   * Time of the last checkpoint, in milliseconds since the epoch.
136   */
137  private long mostRecentCheckpointTime = 0;
138
139  /**
140   * list of failed (and thus removed) storages
141   */
142  final protected List<StorageDirectory> removedStorageDirs
143    = new CopyOnWriteArrayList<StorageDirectory>();
144
145  /**
146   * Properties from old layout versions that may be needed
147   * during upgrade only.
148   */
149  private HashMap<String, String> deprecatedProperties;
150
151  /**
152   * Construct the NNStorage.
153   * @param conf Namenode configuration.
154   * @param imageDirs Directories the image can be stored in.
155   * @param editsDirs Directories the editlog can be stored in.
156   * @throws IOException if any directories are inaccessible.
157   */
158  public NNStorage(Configuration conf, 
159                   Collection<URI> imageDirs, Collection<URI> editsDirs) 
160      throws IOException {
161    super(NodeType.NAME_NODE);
162
163    storageDirs = new CopyOnWriteArrayList<StorageDirectory>();
164    
165    // this may modify the editsDirs, so copy before passing in
166    setStorageDirectories(imageDirs, 
167                          Lists.newArrayList(editsDirs),
168                          FSNamesystem.getSharedEditsDirs(conf));
169  }
170
171  @Override // Storage
172  public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException {
173    if (disablePreUpgradableLayoutCheck) {
174      return false;
175    }
176
177    File oldImageDir = new File(sd.getRoot(), "image");
178    if (!oldImageDir.exists()) {
179      return false;
180    }
181    // check the layout version inside the image file
182    File oldF = new File(oldImageDir, "fsimage");
183    RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
184    try {
185      oldFile.seek(0);
186      int oldVersion = oldFile.readInt();
187      oldFile.close();
188      oldFile = null;
189      if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION)
190        return false;
191    } finally {
192      IOUtils.cleanup(LOG, oldFile);
193    }
194    return true;
195  }
196
197  @Override // Closeable
198  public void close() throws IOException {
199    unlockAll();
200    storageDirs.clear();
201  }
202
203  /**
204   * Set flag whether an attempt should be made to restore failed storage
205   * directories at the next available oppurtuinity.
206   *
207   * @param val Whether restoration attempt should be made.
208   */
209  void setRestoreFailedStorage(boolean val) {
210    LOG.warn("set restore failed storage to " + val);
211    restoreFailedStorage=val;
212  }
213
214  /**
215   * @return Whether failed storage directories are to be restored.
216   */
217  boolean getRestoreFailedStorage() {
218    return restoreFailedStorage;
219  }
220
221  /**
222   * See if any of removed storages is "writable" again, and can be returned
223   * into service.
224   */
225  void attemptRestoreRemovedStorage() {
226    // if directory is "alive" - copy the images there...
227    if(!restoreFailedStorage || removedStorageDirs.size() == 0)
228      return; //nothing to restore
229
230    /* We don't want more than one thread trying to restore at a time */
231    synchronized (this.restorationLock) {
232      LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+
233               "storarge. removedStorages size = " + removedStorageDirs.size());
234      for(Iterator<StorageDirectory> it
235            = this.removedStorageDirs.iterator(); it.hasNext();) {
236        StorageDirectory sd = it.next();
237        File root = sd.getRoot();
238        LOG.info("currently disabled dir " + root.getAbsolutePath() +
239                 "; type="+sd.getStorageDirType() 
240                 + ";canwrite="+FileUtil.canWrite(root));
241        if(root.exists() && FileUtil.canWrite(root)) {
242          LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
243          this.addStorageDir(sd); // restore
244          this.removedStorageDirs.remove(sd);
245        }
246      }
247    }
248  }
249
250  /**
251   * @return A list of storage directories which are in the errored state.
252   */
253  List<StorageDirectory> getRemovedStorageDirs() {
254    return this.removedStorageDirs;
255  }
256  
257  /**
258   * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)}
259   */
260  @VisibleForTesting
261  synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
262                                          Collection<URI> fsEditsDirs)
263      throws IOException {
264    setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>());
265  }
266
267  /**
268   * Set the storage directories which will be used. This should only ever be
269   * called from inside NNStorage. However, it needs to remain package private
270   * for testing, as StorageDirectories need to be reinitialised after using
271   * Mockito.spy() on this class, as Mockito doesn't work well with inner
272   * classes, such as StorageDirectory in this case.
273   *
274   * Synchronized due to initialization of storageDirs and removedStorageDirs.
275   *
276   * @param fsNameDirs Locations to store images.
277   * @param fsEditsDirs Locations to store edit logs.
278   * @throws IOException
279   */
280  @VisibleForTesting
281  synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
282                                          Collection<URI> fsEditsDirs,
283                                          Collection<URI> sharedEditsDirs)
284      throws IOException {
285    this.storageDirs.clear();
286    this.removedStorageDirs.clear();
287
288   // Add all name dirs with appropriate NameNodeDirType
289    for (URI dirName : fsNameDirs) {
290      checkSchemeConsistency(dirName);
291      boolean isAlsoEdits = false;
292      for (URI editsDirName : fsEditsDirs) {
293        if (editsDirName.compareTo(dirName) == 0) {
294          isAlsoEdits = true;
295          fsEditsDirs.remove(editsDirName);
296          break;
297        }
298      }
299      NameNodeDirType dirType = (isAlsoEdits) ?
300                          NameNodeDirType.IMAGE_AND_EDITS :
301                          NameNodeDirType.IMAGE;
302      // Add to the list of storage directories, only if the
303      // URI is of type file://
304      if(dirName.getScheme().compareTo("file") == 0) {
305        this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
306            dirType,
307            sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
308      }
309    }
310
311    // Add edits dirs if they are different from name dirs
312    for (URI dirName : fsEditsDirs) {
313      checkSchemeConsistency(dirName);
314      // Add to the list of storage directories, only if the
315      // URI is of type file://
316      if(dirName.getScheme().compareTo("file") == 0)
317        this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
318                    NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName)));
319    }
320  }
321
322  /**
323   * Return the storage directory corresponding to the passed URI
324   * @param uri URI of a storage directory
325   * @return The matching storage directory or null if none found
326   */
327  StorageDirectory getStorageDirectory(URI uri) {
328    try {
329      uri = Util.fileAsURI(new File(uri));
330      Iterator<StorageDirectory> it = dirIterator();
331      for (; it.hasNext(); ) {
332        StorageDirectory sd = it.next();
333        if (Util.fileAsURI(sd.getRoot()).equals(uri)) {
334          return sd;
335        }
336      }
337    } catch (IOException ioe) {
338      LOG.warn("Error converting file to URI", ioe);
339    }
340    return null;
341  }
342
343  /**
344   * Checks the consistency of a URI, in particular if the scheme
345   * is specified 
346   * @param u URI whose consistency is being checked.
347   */
348  private static void checkSchemeConsistency(URI u) throws IOException {
349    String scheme = u.getScheme();
350    // the URI should have a proper scheme
351    if(scheme == null) {
352      throw new IOException("Undefined scheme for " + u);
353    }
354  }
355
356  /**
357   * Retrieve current directories of type IMAGE
358   * @return Collection of URI representing image directories
359   * @throws IOException in case of URI processing error
360   */
361  Collection<URI> getImageDirectories() throws IOException {
362    return getDirectories(NameNodeDirType.IMAGE);
363  }
364
365  /**
366   * Retrieve current directories of type EDITS
367   * @return Collection of URI representing edits directories
368   * @throws IOException in case of URI processing error
369   */
370  Collection<URI> getEditsDirectories() throws IOException {
371    return getDirectories(NameNodeDirType.EDITS);
372  }
373
374  /**
375   * Return number of storage directories of the given type.
376   * @param dirType directory type
377   * @return number of storage directories of type dirType
378   */
379  int getNumStorageDirs(NameNodeDirType dirType) {
380    if(dirType == null)
381      return getNumStorageDirs();
382    Iterator<StorageDirectory> it = dirIterator(dirType);
383    int numDirs = 0;
384    for(; it.hasNext(); it.next())
385      numDirs++;
386    return numDirs;
387  }
388
389  /**
390   * Return the list of locations being used for a specific purpose.
391   * i.e. Image or edit log storage.
392   *
393   * @param dirType Purpose of locations requested.
394   * @throws IOException
395   */
396  Collection<URI> getDirectories(NameNodeDirType dirType)
397      throws IOException {
398    ArrayList<URI> list = new ArrayList<URI>();
399    Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() :
400                                    dirIterator(dirType);
401    for ( ;it.hasNext(); ) {
402      StorageDirectory sd = it.next();
403      try {
404        list.add(Util.fileAsURI(sd.getRoot()));
405      } catch (IOException e) {
406        throw new IOException("Exception while processing " +
407            "StorageDirectory " + sd.getRoot(), e);
408      }
409    }
410    return list;
411  }
412  
413  /**
414   * Determine the last transaction ID noted in this storage directory.
415   * This txid is stored in a special seen_txid file since it might not
416   * correspond to the latest image or edit log. For example, an image-only
417   * directory will have this txid incremented when edits logs roll, even
418   * though the edits logs are in a different directory.
419   *
420   * @param sd StorageDirectory to check
421   * @return If file exists and can be read, last recorded txid. If not, 0L.
422   * @throws IOException On errors processing file pointed to by sd
423   */
424  static long readTransactionIdFile(StorageDirectory sd) throws IOException {
425    File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
426    return PersistentLongFile.readFile(txidFile, 0);
427  }
428  
429  /**
430   * Write last checkpoint time into a separate file.
431   * @param sd storage directory
432   * @throws IOException
433   */
434  void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException {
435    Preconditions.checkArgument(txid >= 0, "bad txid: " + txid);
436    
437    File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
438    PersistentLongFile.writeFile(txIdFile, txid);
439  }
440
441  /**
442   * Set the transaction ID and time of the last checkpoint
443   * 
444   * @param txid transaction id of the last checkpoint
445   * @param time time of the last checkpoint, in millis since the epoch
446   */
447  void setMostRecentCheckpointInfo(long txid, long time) {
448    this.mostRecentCheckpointTxId = txid;
449    this.mostRecentCheckpointTime = time;
450  }
451
452  /**
453   * @return the transaction ID of the last checkpoint.
454   */
455  public long getMostRecentCheckpointTxId() {
456    return mostRecentCheckpointTxId;
457  }
458  
459  /**
460   * @return the time of the most recent checkpoint in millis since the epoch.
461   */
462  long getMostRecentCheckpointTime() {
463    return mostRecentCheckpointTime;
464  }
465
466  /**
467   * Write a small file in all available storage directories that
468   * indicates that the namespace has reached some given transaction ID.
469   * 
470   * This is used when the image is loaded to avoid accidental rollbacks
471   * in the case where an edit log is fully deleted but there is no
472   * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
473   * @param txid the txid that has been reached
474   */
475  public void writeTransactionIdFileToStorage(long txid) {
476    writeTransactionIdFileToStorage(txid, null);
477  }
478
479  /**
480   * Write a small file in all available storage directories that
481   * indicates that the namespace has reached some given transaction ID.
482   *
483   * This is used when the image is loaded to avoid accidental rollbacks
484   * in the case where an edit log is fully deleted but there is no
485   * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
486   * @param txid the txid that has been reached
487   * @param type the type of directory
488   */
489  public void writeTransactionIdFileToStorage(long txid,
490      NameNodeDirType type) {
491    // Write txid marker in all storage directories
492    for (Iterator<StorageDirectory> it = dirIterator(type); it.hasNext();) {
493      StorageDirectory sd = it.next();
494      try {
495        writeTransactionIdFile(sd, txid);
496      } catch(IOException e) {
497        // Close any edits stream associated with this dir and remove directory
498        LOG.warn("writeTransactionIdToStorage failed on " + sd,
499            e);
500        reportErrorsOnDirectory(sd);
501      }
502    }
503  }
504
505  /**
506   * Return the name of the image file that is uploaded by periodic
507   * checkpointing
508   *
509   * @return List of filenames to save checkpoints to.
510   */
511  public File[] getFsImageNameCheckpoint(long txid) {
512    ArrayList<File> list = new ArrayList<File>();
513    for (Iterator<StorageDirectory> it =
514                 dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
515      list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid));
516    }
517    return list.toArray(new File[list.size()]);
518  }
519
520  /**
521   * @return The first image file with the given txid and image type.
522   */
523  public File getFsImageName(long txid, NameNodeFile nnf) {
524    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
525        it.hasNext();) {
526      StorageDirectory sd = it.next();
527      File fsImage = getStorageFile(sd, nnf, txid);
528      if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
529        return fsImage;
530      }
531    }
532    return null;
533  }
534
535  /**
536   * @return The first image file whose txid is the same with the given txid and
537   * image type is one of the given types.
538   */
539  public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) {
540    for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
541        it.hasNext();) {
542      StorageDirectory sd = it.next();
543      for (NameNodeFile nnf : nnfs) {
544        File fsImage = getStorageFile(sd, nnf, txid);
545        if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
546          return fsImage;
547        }
548      }
549    }
550    return null;
551  }
552
553  public File getFsImageName(long txid) {
554    return getFsImageName(txid, NameNodeFile.IMAGE);
555  }
556
557  public File getHighestFsImageName() {
558    return getFsImageName(getMostRecentCheckpointTxId());
559  }
560
561  /** Create new dfs name directory.  Caution: this destroys all files
562   * in this filesystem. */
563  private void format(StorageDirectory sd) throws IOException {
564    sd.clearDirectory(); // create currrent dir
565    writeProperties(sd);
566    writeTransactionIdFile(sd, 0);
567
568    LOG.info("Storage directory " + sd.getRoot()
569             + " has been successfully formatted.");
570  }
571
572  /**
573   * Format all available storage directories.
574   */
575  public void format(NamespaceInfo nsInfo) throws IOException {
576    Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 ||
577        nsInfo.getLayoutVersion() == HdfsConstants.NAMENODE_LAYOUT_VERSION,
578        "Bad layout version: %s", nsInfo.getLayoutVersion());
579    
580    this.setStorageInfo(nsInfo);
581    this.blockpoolID = nsInfo.getBlockPoolID();
582    for (Iterator<StorageDirectory> it =
583                           dirIterator(); it.hasNext();) {
584      StorageDirectory sd = it.next();
585      format(sd);
586    }
587  }
588  
589  public static NamespaceInfo newNamespaceInfo()
590      throws UnknownHostException {
591    return new NamespaceInfo(newNamespaceID(), newClusterID(),
592        newBlockPoolID(), 0L);
593  }
594  
595  public void format() throws IOException {
596    this.layoutVersion = HdfsConstants.NAMENODE_LAYOUT_VERSION;
597    for (Iterator<StorageDirectory> it =
598                           dirIterator(); it.hasNext();) {
599      StorageDirectory sd = it.next();
600      format(sd);
601    }
602  }
603
604  /**
605   * Generate new namespaceID.
606   *
607   * namespaceID is a persistent attribute of the namespace.
608   * It is generated when the namenode is formatted and remains the same
609   * during the life cycle of the namenode.
610   * When a datanodes register they receive it as the registrationID,
611   * which is checked every time the datanode is communicating with the
612   * namenode. Datanodes that do not 'know' the namespaceID are rejected.
613   *
614   * @return new namespaceID
615   */
616  private static int newNamespaceID() {
617    int newID = 0;
618    while(newID == 0)
619      newID = DFSUtil.getRandom().nextInt(0x7FFFFFFF);  // use 31 bits only
620    return newID;
621  }
622
623  @Override // Storage
624  protected void setFieldsFromProperties(
625      Properties props, StorageDirectory sd) throws IOException {
626    super.setFieldsFromProperties(props, sd);
627    if (layoutVersion == 0) {
628      throw new IOException("NameNode directory "
629                            + sd.getRoot() + " is not formatted.");
630    }
631
632    // Set Block pool ID in version with federation support
633    if (NameNodeLayoutVersion.supports(
634        LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
635      String sbpid = props.getProperty("blockpoolID");
636      setBlockPoolID(sd.getRoot(), sbpid);
637    }
638    setDeprecatedPropertiesForUpgrade(props);
639  }
640
641  void readProperties(StorageDirectory sd, StartupOption startupOption)
642      throws IOException {
643    Properties props = readPropertiesFile(sd.getVersionFile());
644    if (HdfsServerConstants.RollingUpgradeStartupOption.ROLLBACK.matches
645        (startupOption)) {
646      int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion"));
647      if (lv > getServiceLayoutVersion()) {
648        // we should not use a newer version for rollingUpgrade rollback
649        throw new IncorrectVersionException(getServiceLayoutVersion(), lv,
650            "storage directory " + sd.getRoot().getAbsolutePath());
651      }
652      props.setProperty("layoutVersion",
653          Integer.toString(HdfsConstants.NAMENODE_LAYOUT_VERSION));
654    }
655    setFieldsFromProperties(props, sd);
656  }
657
658  /**
659   * Pull any properties out of the VERSION file that are from older
660   * versions of HDFS and only necessary during upgrade.
661   */
662  private void setDeprecatedPropertiesForUpgrade(Properties props) {
663    deprecatedProperties = new HashMap<String, String>();
664    String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY);
665    if (md5 != null) {
666      deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5);
667    }
668  }
669  
670  /**
671   * Return a property that was stored in an earlier version of HDFS.
672   * 
673   * This should only be used during upgrades.
674   */
675  String getDeprecatedProperty(String prop) {
676    assert getLayoutVersion() > HdfsConstants.NAMENODE_LAYOUT_VERSION :
677      "getDeprecatedProperty should only be done when loading " +
678      "storage from past versions during upgrade.";
679    return deprecatedProperties.get(prop);
680  }
681
682  /**
683   * Write version file into the storage directory.
684   *
685   * The version file should always be written last.
686   * Missing or corrupted version file indicates that
687   * the checkpoint is not valid.
688   *
689   * @param sd storage directory
690   * @throws IOException
691   */
692  @Override // Storage
693  protected void setPropertiesFromFields(Properties props,
694                           StorageDirectory sd
695                           ) throws IOException {
696    super.setPropertiesFromFields(props, sd);
697    // Set blockpoolID in version with federation support
698    if (NameNodeLayoutVersion.supports(
699        LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
700      props.setProperty("blockpoolID", blockpoolID);
701    }
702  }
703  
704  static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) {
705    return new File(sd.getCurrentDir(),
706                    String.format("%s_%019d", type.getName(), imageTxId));
707  }
708  
709  /**
710   * Get a storage file for one of the files that doesn't need a txid associated
711   * (e.g version, seen_txid)
712   */
713  static File getStorageFile(StorageDirectory sd, NameNodeFile type) {
714    return new File(sd.getCurrentDir(), type.getName());
715  }
716
717  @VisibleForTesting
718  public static String getCheckpointImageFileName(long txid) {
719    return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid);
720  }
721
722  @VisibleForTesting
723  public static String getImageFileName(long txid) {
724    return getNameNodeFileName(NameNodeFile.IMAGE, txid);
725  }
726
727  @VisibleForTesting
728  public static String getRollbackImageFileName(long txid) {
729    return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
730  }
731
732  public static String getLegacyOIVImageFileName(long txid) {
733    return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid);
734  }
735
736  private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
737    return String.format("%s_%019d", nnf.getName(), txid);
738  }
739
740  @VisibleForTesting
741  public static String getInProgressEditsFileName(long startTxId) {
742    return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId);
743  }
744  
745  static File getInProgressEditsFile(StorageDirectory sd, long startTxId) {
746    return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId));
747  }
748  
749  static File getFinalizedEditsFile(StorageDirectory sd,
750      long startTxId, long endTxId) {
751    return new File(sd.getCurrentDir(),
752        getFinalizedEditsFileName(startTxId, endTxId));
753  }
754
755  static File getTemporaryEditsFile(StorageDirectory sd,
756      long startTxId, long endTxId, long timestamp) {
757    return new File(sd.getCurrentDir(),
758        getTemporaryEditsFileName(startTxId, endTxId, timestamp));
759  }
760
761  static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) {
762    return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid));
763  }
764
765  @VisibleForTesting
766  public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
767    return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
768                         startTxId, endTxId);
769  }
770
771  public static String getTemporaryEditsFileName(long startTxId, long endTxId,
772      long timestamp) {
773    return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(),
774                         startTxId, endTxId, timestamp);
775  }
776  
777  /**
778   * Return the first readable finalized edits file for the given txid.
779   */
780  File findFinalizedEditsFile(long startTxId, long endTxId)
781  throws IOException {
782    File ret = findFile(NameNodeDirType.EDITS,
783        getFinalizedEditsFileName(startTxId, endTxId));
784    if (ret == null) {
785      throw new IOException(
786          "No edits file for txid " + startTxId + "-" + endTxId + " exists!");
787    }
788    return ret;
789  }
790    
791  /**
792   * Return the first readable image file for the given txid and image type, or
793   * null if no such image can be found
794   */
795  File findImageFile(NameNodeFile nnf, long txid) {
796    return findFile(NameNodeDirType.IMAGE,
797        getNameNodeFileName(nnf, txid));
798  }
799
800  /**
801   * Return the first readable storage file of the given name
802   * across any of the 'current' directories in SDs of the
803   * given type, or null if no such file exists.
804   */
805  private File findFile(NameNodeDirType dirType, String name) {
806    for (StorageDirectory sd : dirIterable(dirType)) {
807      File candidate = new File(sd.getCurrentDir(), name);
808      if (FileUtil.canRead(sd.getCurrentDir()) &&
809          candidate.exists()) {
810        return candidate;
811      }
812    }
813    return null;
814  }
815
816  /**
817   * Disable the check for pre-upgradable layouts. Needed for BackupImage.
818   * @param val Whether to disable the preupgradeable layout check.
819   */
820  void setDisablePreUpgradableLayoutCheck(boolean val) {
821    disablePreUpgradableLayoutCheck = val;
822  }
823
824  /**
825   * Marks a list of directories as having experienced an error.
826   *
827   * @param sds A list of storage directories to mark as errored.
828   */
829  void reportErrorsOnDirectories(List<StorageDirectory> sds) {
830    for (StorageDirectory sd : sds) {
831      reportErrorsOnDirectory(sd);
832    }
833  }
834
835  /**
836   * Reports that a directory has experienced an error.
837   * Notifies listeners that the directory is no longer
838   * available.
839   *
840   * @param sd A storage directory to mark as errored.
841   */
842  private void reportErrorsOnDirectory(StorageDirectory sd) {
843    LOG.error("Error reported on storage directory " + sd);
844
845    String lsd = listStorageDirectories();
846    LOG.debug("current list of storage dirs:" + lsd);
847
848    LOG.warn("About to remove corresponding storage: "
849             + sd.getRoot().getAbsolutePath());
850    try {
851      sd.unlock();
852    } catch (Exception e) {
853      LOG.warn("Unable to unlock bad storage directory: "
854               +  sd.getRoot().getPath(), e);
855    }
856
857    if (this.storageDirs.remove(sd)) {
858      this.removedStorageDirs.add(sd);
859    }
860    
861    lsd = listStorageDirectories();
862    LOG.debug("at the end current list of storage dirs:" + lsd);
863  }
864  
865  /** 
866   * Processes the startup options for the clusterid and blockpoolid 
867   * for the upgrade. 
868   * @param startOpt Startup options 
869   * @param layoutVersion Layout version for the upgrade 
870   * @throws IOException
871   */
872  void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion)
873      throws IOException {
874    if (startOpt == StartupOption.UPGRADE || startOpt == StartupOption.UPGRADEONLY) {
875      // If upgrade from a release that does not support federation,
876      // if clusterId is provided in the startupOptions use it.
877      // Else generate a new cluster ID      
878      if (!NameNodeLayoutVersion.supports(
879          LayoutVersion.Feature.FEDERATION, layoutVersion)) {
880        if (startOpt.getClusterId() == null) {
881          startOpt.setClusterId(newClusterID());
882        }
883        setClusterID(startOpt.getClusterId());
884        setBlockPoolID(newBlockPoolID());
885      } else {
886        // Upgrade from one version of federation to another supported
887        // version of federation doesn't require clusterID.
888        // Warn the user if the current clusterid didn't match with the input
889        // clusterid.
890        if (startOpt.getClusterId() != null
891            && !startOpt.getClusterId().equals(getClusterID())) {
892          LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID()
893              + ", Ignoring given clusterid: " + startOpt.getClusterId());
894        }
895      }
896      LOG.info("Using clusterid: " + getClusterID());
897    }
898  }
899  
900  /**
901   * Report that an IOE has occurred on some file which may
902   * or may not be within one of the NN image storage directories.
903   */
904  @Override
905  public void reportErrorOnFile(File f) {
906    // We use getAbsolutePath here instead of getCanonicalPath since we know
907    // that there is some IO problem on that drive.
908    // getCanonicalPath may need to call stat() or readlink() and it's likely
909    // those calls would fail due to the same underlying IO problem.
910    String absPath = f.getAbsolutePath();
911    for (StorageDirectory sd : storageDirs) {
912      String dirPath = sd.getRoot().getAbsolutePath();
913      if (!dirPath.endsWith(File.separator)) {
914        dirPath += File.separator;
915      }
916      if (absPath.startsWith(dirPath)) {
917        reportErrorsOnDirectory(sd);
918        return;
919      }
920    }
921    
922  }
923  
924  /**
925   * Generate new clusterID.
926   * 
927   * clusterID is a persistent attribute of the cluster.
928   * It is generated when the cluster is created and remains the same
929   * during the life cycle of the cluster.  When a new name node is formated, if 
930   * this is a new cluster, a new clusterID is geneated and stored.  Subsequent 
931   * name node must be given the same ClusterID during its format to be in the 
932   * same cluster.
933   * When a datanode register it receive the clusterID and stick with it.
934   * If at any point, name node or data node tries to join another cluster, it 
935   * will be rejected.
936   * 
937   * @return new clusterID
938   */ 
939  public static String newClusterID() {
940    return "CID-" + UUID.randomUUID().toString();
941  }
942
943  void setClusterID(String cid) {
944    clusterID = cid;
945  }
946
947  /**
948   * try to find current cluster id in the VERSION files
949   * returns first cluster id found in any VERSION file
950   * null in case none found
951   * @return clusterId or null in case no cluster id found
952   */
953  public String determineClusterId() {
954    String cid = null;
955    Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE);
956    while(sdit.hasNext()) {
957      StorageDirectory sd = sdit.next();
958      try {
959        Properties props = readPropertiesFile(sd.getVersionFile());
960        cid = props.getProperty("clusterID");
961        LOG.info("current cluster id for sd="+sd.getCurrentDir() + 
962            ";lv=" + layoutVersion + ";cid=" + cid);
963        
964        if(cid != null && !cid.equals(""))
965          return cid;
966      } catch (Exception e) {
967        LOG.warn("this sd not available: " + e.getLocalizedMessage());
968      } //ignore
969    }
970    LOG.warn("couldn't find any VERSION file containing valid ClusterId");
971    return null;
972  }
973
974  /**
975   * Generate new blockpoolID.
976   * 
977   * @return new blockpoolID
978   */ 
979  static String newBlockPoolID() throws UnknownHostException{
980    String ip = "unknownIP";
981    try {
982      ip = DNS.getDefaultIP("default");
983    } catch (UnknownHostException e) {
984      LOG.warn("Could not find ip address of \"default\" inteface.");
985      throw e;
986    }
987    
988    int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE);
989    String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now();
990    return bpid;
991  }
992
993  /** Validate and set block pool ID */
994  public void setBlockPoolID(String bpid) {
995    blockpoolID = bpid;
996  }
997
998  /** Validate and set block pool ID */
999  private void setBlockPoolID(File storage, String bpid)
1000      throws InconsistentFSStateException {
1001    if (bpid == null || bpid.equals("")) {
1002      throw new InconsistentFSStateException(storage, "file "
1003          + Storage.STORAGE_FILE_VERSION + " has no block pool Id.");
1004    }
1005    
1006    if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) {
1007      throw new InconsistentFSStateException(storage,
1008          "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID);
1009    }
1010    setBlockPoolID(bpid);
1011  }
1012  
1013  public String getBlockPoolID() {
1014    return blockpoolID;
1015  }
1016
1017  /**
1018   * Iterate over all current storage directories, inspecting them
1019   * with the given inspector.
1020   */
1021  void inspectStorageDirs(FSImageStorageInspector inspector)
1022      throws IOException {
1023
1024    // Process each of the storage directories to find the pair of
1025    // newest image file and edit file
1026    for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
1027      StorageDirectory sd = it.next();
1028      inspector.inspectDirectory(sd);
1029    }
1030  }
1031
1032  /**
1033   * Iterate over all of the storage dirs, reading their contents to determine
1034   * their layout versions. Returns an FSImageStorageInspector which has
1035   * inspected each directory.
1036   * 
1037   * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
1038   * @throws IOException if no valid storage dirs are found or no valid layout version
1039   */
1040  FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes,
1041      StartupOption startupOption) throws IOException {
1042    Integer layoutVersion = null;
1043    boolean multipleLV = false;
1044    StringBuilder layoutVersions = new StringBuilder();
1045
1046    // First determine what range of layout versions we're going to inspect
1047    for (Iterator<StorageDirectory> it = dirIterator(false);
1048         it.hasNext();) {
1049      StorageDirectory sd = it.next();
1050      if (!sd.getVersionFile().exists()) {
1051        FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
1052        continue;
1053      }
1054      readProperties(sd, startupOption); // sets layoutVersion
1055      int lv = getLayoutVersion();
1056      if (layoutVersion == null) {
1057        layoutVersion = Integer.valueOf(lv);
1058      } else if (!layoutVersion.equals(lv)) {
1059        multipleLV = true;
1060      }
1061      layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") ");
1062    }
1063    
1064    if (layoutVersion == null) {
1065      throw new IOException("No storage directories contained VERSION information");
1066    }
1067    if (multipleLV) {            
1068      throw new IOException(
1069          "Storage directories contain multiple layout versions: "
1070              + layoutVersions);
1071    }
1072    // If the storage directories are with the new layout version
1073    // (ie edits_<txnid>) then use the new inspector, which will ignore
1074    // the old format dirs.
1075    FSImageStorageInspector inspector;
1076    if (NameNodeLayoutVersion.supports(
1077        LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
1078      inspector = new FSImageTransactionalStorageInspector(fileTypes);
1079    } else {
1080      inspector = new FSImagePreTransactionalStorageInspector();
1081    }
1082    
1083    inspectStorageDirs(inspector);
1084    return inspector;
1085  }
1086
1087  public NamespaceInfo getNamespaceInfo() {
1088    return new NamespaceInfo(
1089        getNamespaceID(),
1090        getClusterID(),
1091        getBlockPoolID(),
1092        getCTime());
1093  }
1094}