001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.common;
019
020import java.io.File;
021import java.io.FileOutputStream;
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.io.RandomAccessFile;
025import java.lang.management.ManagementFactory;
026import java.nio.channels.FileLock;
027import java.nio.channels.OverlappingFileLockException;
028import java.util.ArrayList;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Properties;
032
033import org.apache.commons.logging.Log;
034import org.apache.commons.logging.LogFactory;
035import org.apache.hadoop.classification.InterfaceAudience;
036import org.apache.hadoop.fs.FileUtil;
037import org.apache.hadoop.fs.Path;
038import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
039import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
040import org.apache.hadoop.io.nativeio.NativeIO;
041import org.apache.hadoop.io.nativeio.NativeIOException;
042import org.apache.hadoop.util.ToolRunner;
043import org.apache.hadoop.util.VersionInfo;
044
045import com.google.common.base.Charsets;
046import com.google.common.base.Preconditions;
047
048
049
050/**
051 * Storage information file.
052 * <p>
053 * Local storage information is stored in a separate file VERSION.
054 * It contains type of the node, 
055 * the storage layout version, the namespace id, and 
056 * the fs state creation time.
057 * <p>
058 * Local storage can reside in multiple directories. 
059 * Each directory should contain the same VERSION file as the others.
060 * During startup Hadoop servers (name-node and data-nodes) read their local 
061 * storage information from them.
062 * <p>
063 * The servers hold a lock for each storage directory while they run so that 
064 * other nodes were not able to startup sharing the same storage.
065 * The locks are released when the servers stop (normally or abnormally).
066 * 
067 */
068@InterfaceAudience.Private
069public abstract class Storage extends StorageInfo {
070  public static final Log LOG = LogFactory.getLog(Storage.class.getName());
071
072  // last layout version that did not support upgrades
073  public static final int LAST_PRE_UPGRADE_LAYOUT_VERSION = -3;
074  
075  // this corresponds to Hadoop-0.18
076  public static final int LAST_UPGRADABLE_LAYOUT_VERSION = -16;
077  protected static final String LAST_UPGRADABLE_HADOOP_VERSION = "Hadoop-0.18";
078  
079  /** Layout versions of 0.20.203 release */
080  public static final int[] LAYOUT_VERSIONS_203 = {-19, -31};
081
082  public    static final String STORAGE_FILE_LOCK     = "in_use.lock";
083  public    static final String STORAGE_DIR_CURRENT   = "current";
084  public    static final String STORAGE_DIR_PREVIOUS  = "previous";
085  public    static final String STORAGE_TMP_REMOVED   = "removed.tmp";
086  public    static final String STORAGE_TMP_PREVIOUS  = "previous.tmp";
087  public    static final String STORAGE_TMP_FINALIZED = "finalized.tmp";
088  public    static final String STORAGE_TMP_LAST_CKPT = "lastcheckpoint.tmp";
089  public    static final String STORAGE_PREVIOUS_CKPT = "previous.checkpoint";
090  
091  /**
092   * The blocksBeingWritten directory which was used in some 1.x and earlier
093   * releases.
094   */
095  public static final String STORAGE_1_BBW = "blocksBeingWritten";
096  
097  public enum StorageState {
098    NON_EXISTENT,
099    NOT_FORMATTED,
100    COMPLETE_UPGRADE,
101    RECOVER_UPGRADE,
102    COMPLETE_FINALIZE,
103    COMPLETE_ROLLBACK,
104    RECOVER_ROLLBACK,
105    COMPLETE_CHECKPOINT,
106    RECOVER_CHECKPOINT,
107    NORMAL;
108  }
109  
110  /**
111   * An interface to denote storage directory type
112   * Implementations can define a type for storage directory by implementing
113   * this interface.
114   */
115  @InterfaceAudience.Private
116  public interface StorageDirType {
117    public StorageDirType getStorageDirType();
118    public boolean isOfType(StorageDirType type);
119  }
120  
121  protected List<StorageDirectory> storageDirs = new ArrayList<StorageDirectory>();
122  
123  private class DirIterator implements Iterator<StorageDirectory> {
124    final StorageDirType dirType;
125    final boolean includeShared;
126    int prevIndex; // for remove()
127    int nextIndex; // for next()
128    
129    DirIterator(StorageDirType dirType, boolean includeShared) {
130      this.dirType = dirType;
131      this.nextIndex = 0;
132      this.prevIndex = 0;
133      this.includeShared = includeShared;
134    }
135    
136    @Override
137    public boolean hasNext() {
138      if (storageDirs.isEmpty() || nextIndex >= storageDirs.size())
139        return false;
140      if (dirType != null || !includeShared) {
141        while (nextIndex < storageDirs.size()) {
142          if (shouldReturnNextDir())
143            break;
144          nextIndex++;
145        }
146        if (nextIndex >= storageDirs.size())
147         return false;
148      }
149      return true;
150    }
151    
152    @Override
153    public StorageDirectory next() {
154      StorageDirectory sd = getStorageDir(nextIndex);
155      prevIndex = nextIndex;
156      nextIndex++;
157      if (dirType != null || !includeShared) {
158        while (nextIndex < storageDirs.size()) {
159          if (shouldReturnNextDir())
160            break;
161          nextIndex++;
162        }
163      }
164      return sd;
165    }
166    
167    @Override
168    public void remove() {
169      nextIndex = prevIndex; // restore previous state
170      storageDirs.remove(prevIndex); // remove last returned element
171      hasNext(); // reset nextIndex to correct place
172    }
173    
174    private boolean shouldReturnNextDir() {
175      StorageDirectory sd = getStorageDir(nextIndex);
176      return (dirType == null || sd.getStorageDirType().isOfType(dirType)) &&
177          (includeShared || !sd.isShared());
178    }
179  }
180  
181  /**
182   * @return A list of the given File in every available storage directory,
183   * regardless of whether it might exist.
184   */
185  public List<File> getFiles(StorageDirType dirType, String fileName) {
186    ArrayList<File> list = new ArrayList<File>();
187    Iterator<StorageDirectory> it =
188      (dirType == null) ? dirIterator() : dirIterator(dirType);
189    for ( ;it.hasNext(); ) {
190      list.add(new File(it.next().getCurrentDir(), fileName));
191    }
192    return list;
193  }
194
195
196  /**
197   * Return default iterator
198   * This iterator returns all entries in storageDirs
199   */
200  public Iterator<StorageDirectory> dirIterator() {
201    return dirIterator(null);
202  }
203  
204  /**
205   * Return iterator based on Storage Directory Type
206   * This iterator selects entries in storageDirs of type dirType and returns
207   * them via the Iterator
208   */
209  public Iterator<StorageDirectory> dirIterator(StorageDirType dirType) {
210    return dirIterator(dirType, true);
211  }
212  
213  /**
214   * Return all entries in storageDirs, potentially excluding shared dirs.
215   * @param includeShared whether or not to include shared dirs.
216   * @return an iterator over the configured storage dirs.
217   */
218  public Iterator<StorageDirectory> dirIterator(boolean includeShared) {
219    return dirIterator(null, includeShared);
220  }
221  
222  /**
223   * @param dirType all entries will be of this type of dir
224   * @param includeShared true to include any shared directories,
225   *        false otherwise
226   * @return an iterator over the configured storage dirs.
227   */
228  public Iterator<StorageDirectory> dirIterator(StorageDirType dirType,
229      boolean includeShared) {
230    return new DirIterator(dirType, includeShared);
231  }
232  
233  public Iterable<StorageDirectory> dirIterable(final StorageDirType dirType) {
234    return new Iterable<StorageDirectory>() {
235      @Override
236      public Iterator<StorageDirectory> iterator() {
237        return dirIterator(dirType);
238      }
239    };
240  }
241  
242  
243  /**
244   * generate storage list (debug line)
245   */
246  public String listStorageDirectories() {
247    StringBuilder buf = new StringBuilder();
248    for (StorageDirectory sd : storageDirs) {
249      buf.append(sd.getRoot() + "(" + sd.getStorageDirType() + ");");
250    }
251    return buf.toString();
252  }
253  
254  /**
255   * One of the storage directories.
256   */
257  @InterfaceAudience.Private
258  public static class StorageDirectory implements FormatConfirmable {
259    final File root;              // root directory
260    // whether or not this dir is shared between two separate NNs for HA, or
261    // between multiple block pools in the case of federation.
262    final boolean isShared;
263    final StorageDirType dirType; // storage dir type
264    FileLock lock;                // storage lock
265
266    private String storageUuid = null;      // Storage directory identifier.
267    
268    public StorageDirectory(File dir) {
269      // default dirType is null
270      this(dir, null, false);
271    }
272    
273    public StorageDirectory(File dir, StorageDirType dirType) {
274      this(dir, dirType, false);
275    }
276    
277    public void setStorageUuid(String storageUuid) {
278      this.storageUuid = storageUuid;
279    }
280
281    public String getStorageUuid() {
282      return storageUuid;
283    }
284
285    /**
286     * Constructor
287     * @param dir directory corresponding to the storage
288     * @param dirType storage directory type
289     * @param isShared whether or not this dir is shared between two NNs. true
290     *          disables locking on the storage directory, false enables locking
291     */
292    public StorageDirectory(File dir, StorageDirType dirType, boolean isShared) {
293      this.root = dir;
294      this.lock = null;
295      this.dirType = dirType;
296      this.isShared = isShared;
297    }
298    
299    /**
300     * Get root directory of this storage
301     */
302    public File getRoot() {
303      return root;
304    }
305
306    /**
307     * Get storage directory type
308     */
309    public StorageDirType getStorageDirType() {
310      return dirType;
311    }    
312
313    public void read(File from, Storage storage) throws IOException {
314      Properties props = readPropertiesFile(from);
315      storage.setFieldsFromProperties(props, this);
316    }
317
318    /**
319     * Clear and re-create storage directory.
320     * <p>
321     * Removes contents of the current directory and creates an empty directory.
322     * 
323     * This does not fully format storage directory. 
324     * It cannot write the version file since it should be written last after  
325     * all other storage type dependent files are written.
326     * Derived storage is responsible for setting specific storage values and
327     * writing the version file to disk.
328     * 
329     * @throws IOException
330     */
331    public void clearDirectory() throws IOException {
332      File curDir = this.getCurrentDir();
333      if (curDir.exists())
334        if (!(FileUtil.fullyDelete(curDir)))
335          throw new IOException("Cannot remove current directory: " + curDir);
336      if (!curDir.mkdirs())
337        throw new IOException("Cannot create directory " + curDir);
338    }
339
340    /**
341     * Directory {@code current} contains latest files defining
342     * the file system meta-data.
343     * 
344     * @return the directory path
345     */
346    public File getCurrentDir() {
347      return new File(root, STORAGE_DIR_CURRENT);
348    }
349
350    /**
351     * File {@code VERSION} contains the following fields:
352     * <ol>
353     * <li>node type</li>
354     * <li>layout version</li>
355     * <li>namespaceID</li>
356     * <li>fs state creation time</li>
357     * <li>other fields specific for this node type</li>
358     * </ol>
359     * The version file is always written last during storage directory updates.
360     * The existence of the version file indicates that all other files have
361     * been successfully written in the storage directory, the storage is valid
362     * and does not need to be recovered.
363     * 
364     * @return the version file path
365     */
366    public File getVersionFile() {
367      return new File(new File(root, STORAGE_DIR_CURRENT), STORAGE_FILE_VERSION);
368    }
369
370    /**
371     * File {@code VERSION} from the {@code previous} directory.
372     * 
373     * @return the previous version file path
374     */
375    public File getPreviousVersionFile() {
376      return new File(new File(root, STORAGE_DIR_PREVIOUS), STORAGE_FILE_VERSION);
377    }
378
379    /**
380     * Directory {@code previous} contains the previous file system state,
381     * which the system can be rolled back to.
382     * 
383     * @return the directory path
384     */
385    public File getPreviousDir() {
386      return new File(root, STORAGE_DIR_PREVIOUS);
387    }
388
389    /**
390     * {@code previous.tmp} is a transient directory, which holds
391     * current file system state while the new state is saved into the new
392     * {@code current} during upgrade.
393     * If the saving succeeds {@code previous.tmp} will be moved to
394     * {@code previous}, otherwise it will be renamed back to 
395     * {@code current} by the recovery procedure during startup.
396     * 
397     * @return the directory path
398     */
399    public File getPreviousTmp() {
400      return new File(root, STORAGE_TMP_PREVIOUS);
401    }
402
403    /**
404     * {@code removed.tmp} is a transient directory, which holds
405     * current file system state while the previous state is moved into
406     * {@code current} during rollback.
407     * If the moving succeeds {@code removed.tmp} will be removed,
408     * otherwise it will be renamed back to 
409     * {@code current} by the recovery procedure during startup.
410     * 
411     * @return the directory path
412     */
413    public File getRemovedTmp() {
414      return new File(root, STORAGE_TMP_REMOVED);
415    }
416
417    /**
418     * {@code finalized.tmp} is a transient directory, which holds
419     * the {@code previous} file system state while it is being removed
420     * in response to the finalize request.
421     * Finalize operation will remove {@code finalized.tmp} when completed,
422     * otherwise the removal will resume upon the system startup.
423     * 
424     * @return the directory path
425     */
426    public File getFinalizedTmp() {
427      return new File(root, STORAGE_TMP_FINALIZED);
428    }
429
430    /**
431     * {@code lastcheckpoint.tmp} is a transient directory, which holds
432     * current file system state while the new state is saved into the new
433     * {@code current} during regular namespace updates.
434     * If the saving succeeds {@code lastcheckpoint.tmp} will be moved to
435     * {@code previous.checkpoint}, otherwise it will be renamed back to 
436     * {@code current} by the recovery procedure during startup.
437     * 
438     * @return the directory path
439     */
440    public File getLastCheckpointTmp() {
441      return new File(root, STORAGE_TMP_LAST_CKPT);
442    }
443
444    /**
445     * {@code previous.checkpoint} is a directory, which holds the previous
446     * (before the last save) state of the storage directory.
447     * The directory is created as a reference only, it does not play role
448     * in state recovery procedures, and is recycled automatically, 
449     * but it may be useful for manual recovery of a stale state of the system.
450     * 
451     * @return the directory path
452     */
453    public File getPreviousCheckpoint() {
454      return new File(root, STORAGE_PREVIOUS_CKPT);
455    }
456
457    /**
458     * Check consistency of the storage directory
459     * 
460     * @param startOpt a startup option.
461     *  
462     * @return state {@link StorageState} of the storage directory 
463     * @throws InconsistentFSStateException if directory state is not 
464     * consistent and cannot be recovered.
465     * @throws IOException
466     */
467    public StorageState analyzeStorage(StartupOption startOpt, Storage storage)
468        throws IOException {
469      assert root != null : "root is null";
470      boolean hadMkdirs = false;
471      String rootPath = root.getCanonicalPath();
472      try { // check that storage exists
473        if (!root.exists()) {
474          // storage directory does not exist
475          if (startOpt != StartupOption.FORMAT &&
476              startOpt != StartupOption.HOTSWAP) {
477            LOG.warn("Storage directory " + rootPath + " does not exist");
478            return StorageState.NON_EXISTENT;
479          }
480          LOG.info(rootPath + " does not exist. Creating ...");
481          if (!root.mkdirs())
482            throw new IOException("Cannot create directory " + rootPath);
483          hadMkdirs = true;
484        }
485        // or is inaccessible
486        if (!root.isDirectory()) {
487          LOG.warn(rootPath + "is not a directory");
488          return StorageState.NON_EXISTENT;
489        }
490        if (!FileUtil.canWrite(root)) {
491          LOG.warn("Cannot access storage directory " + rootPath);
492          return StorageState.NON_EXISTENT;
493        }
494      } catch(SecurityException ex) {
495        LOG.warn("Cannot access storage directory " + rootPath, ex);
496        return StorageState.NON_EXISTENT;
497      }
498
499      this.lock(); // lock storage if it exists
500
501      // If startOpt is HOTSWAP, it returns NOT_FORMATTED for empty directory,
502      // while it also checks the layout version.
503      if (startOpt == HdfsServerConstants.StartupOption.FORMAT ||
504          (startOpt == StartupOption.HOTSWAP && hadMkdirs))
505        return StorageState.NOT_FORMATTED;
506
507      if (startOpt != HdfsServerConstants.StartupOption.IMPORT) {
508        storage.checkOldLayoutStorage(this);
509      }
510
511      // check whether current directory is valid
512      File versionFile = getVersionFile();
513      boolean hasCurrent = versionFile.exists();
514
515      // check which directories exist
516      boolean hasPrevious = getPreviousDir().exists();
517      boolean hasPreviousTmp = getPreviousTmp().exists();
518      boolean hasRemovedTmp = getRemovedTmp().exists();
519      boolean hasFinalizedTmp = getFinalizedTmp().exists();
520      boolean hasCheckpointTmp = getLastCheckpointTmp().exists();
521
522      if (!(hasPreviousTmp || hasRemovedTmp
523          || hasFinalizedTmp || hasCheckpointTmp)) {
524        // no temp dirs - no recovery
525        if (hasCurrent)
526          return StorageState.NORMAL;
527        if (hasPrevious)
528          throw new InconsistentFSStateException(root,
529                              "version file in current directory is missing.");
530        return StorageState.NOT_FORMATTED;
531      }
532
533      if ((hasPreviousTmp?1:0) + (hasRemovedTmp?1:0)
534          + (hasFinalizedTmp?1:0) + (hasCheckpointTmp?1:0) > 1)
535        // more than one temp dirs
536        throw new InconsistentFSStateException(root,
537                                               "too many temporary directories.");
538
539      // # of temp dirs == 1 should either recover or complete a transition
540      if (hasCheckpointTmp) {
541        return hasCurrent ? StorageState.COMPLETE_CHECKPOINT
542                          : StorageState.RECOVER_CHECKPOINT;
543      }
544
545      if (hasFinalizedTmp) {
546        if (hasPrevious)
547          throw new InconsistentFSStateException(root,
548                                                 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_FINALIZED
549                                                 + "cannot exist together.");
550        return StorageState.COMPLETE_FINALIZE;
551      }
552
553      if (hasPreviousTmp) {
554        if (hasPrevious)
555          throw new InconsistentFSStateException(root,
556                                                 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_PREVIOUS
557                                                 + " cannot exist together.");
558        if (hasCurrent)
559          return StorageState.COMPLETE_UPGRADE;
560        return StorageState.RECOVER_UPGRADE;
561      }
562      
563      assert hasRemovedTmp : "hasRemovedTmp must be true";
564      if (!(hasCurrent ^ hasPrevious))
565        throw new InconsistentFSStateException(root,
566                                               "one and only one directory " + STORAGE_DIR_CURRENT 
567                                               + " or " + STORAGE_DIR_PREVIOUS 
568                                               + " must be present when " + STORAGE_TMP_REMOVED
569                                               + " exists.");
570      if (hasCurrent)
571        return StorageState.COMPLETE_ROLLBACK;
572      return StorageState.RECOVER_ROLLBACK;
573    }
574
575    /**
576     * Complete or recover storage state from previously failed transition.
577     * 
578     * @param curState specifies what/how the state should be recovered
579     * @throws IOException
580     */
581    public void doRecover(StorageState curState) throws IOException {
582      File curDir = getCurrentDir();
583      String rootPath = root.getCanonicalPath();
584      switch(curState) {
585      case COMPLETE_UPGRADE:  // mv previous.tmp -> previous
586        LOG.info("Completing previous upgrade for storage directory " 
587                 + rootPath);
588        rename(getPreviousTmp(), getPreviousDir());
589        return;
590      case RECOVER_UPGRADE:   // mv previous.tmp -> current
591        LOG.info("Recovering storage directory " + rootPath
592                 + " from previous upgrade");
593        if (curDir.exists())
594          deleteDir(curDir);
595        rename(getPreviousTmp(), curDir);
596        return;
597      case COMPLETE_ROLLBACK: // rm removed.tmp
598        LOG.info("Completing previous rollback for storage directory "
599                 + rootPath);
600        deleteDir(getRemovedTmp());
601        return;
602      case RECOVER_ROLLBACK:  // mv removed.tmp -> current
603        LOG.info("Recovering storage directory " + rootPath
604                 + " from previous rollback");
605        rename(getRemovedTmp(), curDir);
606        return;
607      case COMPLETE_FINALIZE: // rm finalized.tmp
608        LOG.info("Completing previous finalize for storage directory "
609                 + rootPath);
610        deleteDir(getFinalizedTmp());
611        return;
612      case COMPLETE_CHECKPOINT: // mv lastcheckpoint.tmp -> previous.checkpoint
613        LOG.info("Completing previous checkpoint for storage directory " 
614                 + rootPath);
615        File prevCkptDir = getPreviousCheckpoint();
616        if (prevCkptDir.exists())
617          deleteDir(prevCkptDir);
618        rename(getLastCheckpointTmp(), prevCkptDir);
619        return;
620      case RECOVER_CHECKPOINT:  // mv lastcheckpoint.tmp -> current
621        LOG.info("Recovering storage directory " + rootPath
622                 + " from failed checkpoint");
623        if (curDir.exists())
624          deleteDir(curDir);
625        rename(getLastCheckpointTmp(), curDir);
626        return;
627      default:
628        throw new IOException("Unexpected FS state: " + curState
629            + " for storage directory: " + rootPath);
630      }
631    }
632    
633    /**
634     * @return true if the storage directory should prompt the user prior
635     * to formatting (i.e if the directory appears to contain some data)
636     * @throws IOException if the SD cannot be accessed due to an IO error
637     */
638    @Override
639    public boolean hasSomeData() throws IOException {
640      // Its alright for a dir not to exist, or to exist (properly accessible)
641      // and be completely empty.
642      if (!root.exists()) return false;
643      
644      if (!root.isDirectory()) {
645        // a file where you expect a directory should not cause silent
646        // formatting
647        return true;
648      }
649      
650      if (FileUtil.listFiles(root).length == 0) {
651        // Empty dir can format without prompt.
652        return false;
653      }
654      
655      return true;
656    }
657    
658    public boolean isShared() {
659      return isShared;
660    }
661
662
663    /**
664     * Lock storage to provide exclusive access.
665     * 
666     * <p> Locking is not supported by all file systems.
667     * E.g., NFS does not consistently support exclusive locks.
668     * 
669     * <p> If locking is supported we guarantee exclusive access to the
670     * storage directory. Otherwise, no guarantee is given.
671     * 
672     * @throws IOException if locking fails
673     */
674    public void lock() throws IOException {
675      if (isShared()) {
676        LOG.info("Locking is disabled for " + this.root);
677        return;
678      }
679      FileLock newLock = tryLock();
680      if (newLock == null) {
681        String msg = "Cannot lock storage " + this.root 
682          + ". The directory is already locked";
683        LOG.info(msg);
684        throw new IOException(msg);
685      }
686      // Don't overwrite lock until success - this way if we accidentally
687      // call lock twice, the internal state won't be cleared by the second
688      // (failed) lock attempt
689      lock = newLock;
690    }
691
692    /**
693     * Attempts to acquire an exclusive lock on the storage.
694     * 
695     * @return A lock object representing the newly-acquired lock or
696     * <code>null</code> if storage is already locked.
697     * @throws IOException if locking fails.
698     */
699    @SuppressWarnings("resource")
700    FileLock tryLock() throws IOException {
701      boolean deletionHookAdded = false;
702      File lockF = new File(root, STORAGE_FILE_LOCK);
703      if (!lockF.exists()) {
704        lockF.deleteOnExit();
705        deletionHookAdded = true;
706      }
707      RandomAccessFile file = new RandomAccessFile(lockF, "rws");
708      String jvmName = ManagementFactory.getRuntimeMXBean().getName();
709      FileLock res = null;
710      try {
711        res = file.getChannel().tryLock();
712        if (null == res) {
713          throw new OverlappingFileLockException();
714        }
715        file.write(jvmName.getBytes(Charsets.UTF_8));
716        LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName);
717      } catch(OverlappingFileLockException oe) {
718        // Cannot read from the locked file on Windows.
719        String lockingJvmName = Path.WINDOWS ? "" : (" " + file.readLine());
720        LOG.error("It appears that another node " + lockingJvmName
721            + " has already locked the storage directory: " + root, oe);
722        file.close();
723        return null;
724      } catch(IOException e) {
725        LOG.error("Failed to acquire lock on " + lockF
726            + ". If this storage directory is mounted via NFS, " 
727            + "ensure that the appropriate nfs lock services are running.", e);
728        file.close();
729        throw e;
730      }
731      if (!deletionHookAdded) {
732        // If the file existed prior to our startup, we didn't
733        // call deleteOnExit above. But since we successfully locked
734        // the dir, we can take care of cleaning it up.
735        lockF.deleteOnExit();
736      }
737      return res;
738    }
739
740    /**
741     * Unlock storage.
742     * 
743     * @throws IOException
744     */
745    public void unlock() throws IOException {
746      if (this.lock == null)
747        return;
748      this.lock.release();
749      lock.channel().close();
750      lock = null;
751    }
752    
753    @Override
754    public String toString() {
755      return "Storage Directory " + this.root;
756    }
757
758    /**
759     * Check whether underlying file system supports file locking.
760     * 
761     * @return <code>true</code> if exclusive locks are supported or
762     *         <code>false</code> otherwise.
763     * @throws IOException
764     * @see StorageDirectory#lock()
765     */
766    public boolean isLockSupported() throws IOException {
767      FileLock firstLock = null;
768      FileLock secondLock = null;
769      try {
770        firstLock = lock;
771        if(firstLock == null) {
772          firstLock = tryLock();
773          if(firstLock == null)
774            return true;
775        }
776        secondLock = tryLock();
777        if(secondLock == null)
778          return true;
779      } finally {
780        if(firstLock != null && firstLock != lock) {
781          firstLock.release();
782          firstLock.channel().close();
783        }
784        if(secondLock != null) {
785          secondLock.release();
786          secondLock.channel().close();
787        }
788      }
789      return false;
790    }
791  }
792
793  /**
794   * Create empty storage info of the specified type
795   */
796  protected Storage(NodeType type) {
797    super(type);
798  }
799  
800  protected Storage(StorageInfo storageInfo) {
801    super(storageInfo);
802  }
803  
804  public int getNumStorageDirs() {
805    return storageDirs.size();
806  }
807  
808  public StorageDirectory getStorageDir(int idx) {
809    return storageDirs.get(idx);
810  }
811  
812  /**
813   * @return the storage directory, with the precondition that this storage
814   * has exactly one storage directory
815   */
816  public StorageDirectory getSingularStorageDir() {
817    Preconditions.checkState(storageDirs.size() == 1);
818    return storageDirs.get(0);
819  }
820  
821  protected void addStorageDir(StorageDirectory sd) {
822    storageDirs.add(sd);
823  }
824
825  /**
826   * Returns true if the storage directory on the given directory is already
827   * loaded.
828   * @param root the root directory of a {@link StorageDirectory}
829   * @throws IOException if failed to get canonical path.
830   */
831  protected boolean containsStorageDir(File root) throws IOException {
832    for (StorageDirectory sd : storageDirs) {
833      if (sd.getRoot().getCanonicalPath().equals(root.getCanonicalPath())) {
834        return true;
835      }
836    }
837    return false;
838  }
839
840  /**
841   * Return true if the layout of the given storage directory is from a version
842   * of Hadoop prior to the introduction of the "current" and "previous"
843   * directories which allow upgrade and rollback.
844   */
845  public abstract boolean isPreUpgradableLayout(StorageDirectory sd)
846  throws IOException;
847
848  /**
849   * Check if the given storage directory comes from a version of Hadoop
850   * prior to when the directory layout changed (ie 0.13). If this is
851   * the case, this method throws an IOException.
852   */
853  private void checkOldLayoutStorage(StorageDirectory sd) throws IOException {
854    if (isPreUpgradableLayout(sd)) {
855      checkVersionUpgradable(0);
856    }
857  }
858
859  /**
860   * Checks if the upgrade from {@code oldVersion} is supported.
861   * @param oldVersion the version of the metadata to check with the current
862   *                   version
863   * @throws IOException if upgrade is not supported
864   */
865  public static void checkVersionUpgradable(int oldVersion) 
866                                     throws IOException {
867    if (oldVersion > LAST_UPGRADABLE_LAYOUT_VERSION) {
868      String msg = "*********** Upgrade is not supported from this " +
869                   " older version " + oldVersion + 
870                   " of storage to the current version." + 
871                   " Please upgrade to " + LAST_UPGRADABLE_HADOOP_VERSION +
872                   " or a later version and then upgrade to current" +
873                   " version. Old layout version is " + 
874                   (oldVersion == 0 ? "'too old'" : (""+oldVersion)) +
875                   " and latest layout version this software version can" +
876                   " upgrade from is " + LAST_UPGRADABLE_LAYOUT_VERSION +
877                   ". ************";
878      LOG.error(msg);
879      throw new IOException(msg); 
880    }
881    
882  }
883  
884  /**
885   * Iterate over each of the {@link FormatConfirmable} objects,
886   * potentially checking with the user whether it should be formatted.
887   * 
888   * If running in interactive mode, will prompt the user for each
889   * directory to allow them to format anyway. Otherwise, returns
890   * false, unless 'force' is specified.
891   * 
892   * @param force format regardless of whether dirs exist
893   * @param interactive prompt the user when a dir exists
894   * @return true if formatting should proceed
895   * @throws IOException if some storage cannot be accessed
896   */
897  public static boolean confirmFormat(
898      Iterable<? extends FormatConfirmable> items,
899      boolean force, boolean interactive) throws IOException {
900    for (FormatConfirmable item : items) {
901      if (!item.hasSomeData())
902        continue;
903      if (force) { // Don't confirm, always format.
904        System.err.println(
905            "Data exists in " + item + ". Formatting anyway.");
906        continue;
907      }
908      if (!interactive) { // Don't ask - always don't format
909        System.err.println(
910            "Running in non-interactive mode, and data appears to exist in " +
911            item + ". Not formatting.");
912        return false;
913      }
914      if (!ToolRunner.confirmPrompt("Re-format filesystem in " + item + " ?")) {
915        System.err.println("Format aborted in " + item);
916        return false;
917      }
918    }
919    
920    return true;
921  }
922  
923  /**
924   * Interface for classes which need to have the user confirm their
925   * formatting during NameNode -format and other similar operations.
926   * 
927   * This is currently a storage directory or journal manager.
928   */
929  @InterfaceAudience.Private
930  public interface FormatConfirmable {
931    /**
932     * @return true if the storage seems to have some valid data in it,
933     * and the user should be required to confirm the format. Otherwise,
934     * false.
935     * @throws IOException if the storage cannot be accessed at all.
936     */
937    public boolean hasSomeData() throws IOException;
938    
939    /**
940     * @return a string representation of the formattable item, suitable
941     * for display to the user inside a prompt
942     */
943    public String toString();
944  }
945  
946  /**
947   * Set common storage fields into the given properties object.
948   * Should be overloaded if additional fields need to be set.
949   * 
950   * @param props the Properties object to write into
951   */
952  protected void setPropertiesFromFields(Properties props, 
953                                         StorageDirectory sd)
954      throws IOException {
955    props.setProperty("layoutVersion", String.valueOf(layoutVersion));
956    props.setProperty("storageType", storageType.toString());
957    props.setProperty("namespaceID", String.valueOf(namespaceID));
958    // Set clusterID in version with federation support
959    if (versionSupportsFederation(getServiceLayoutFeatureMap())) {
960      props.setProperty("clusterID", clusterID);
961    }
962    props.setProperty("cTime", String.valueOf(cTime));
963  }
964
965  /**
966   * Write properties to the VERSION file in the given storage directory.
967   */
968  public void writeProperties(StorageDirectory sd) throws IOException {
969    writeProperties(sd.getVersionFile(), sd);
970  }
971  
972  public void writeProperties(File to, StorageDirectory sd) throws IOException {
973    Properties props = new Properties();
974    setPropertiesFromFields(props, sd);
975    writeProperties(to, sd, props);
976  }
977
978  public static void writeProperties(File to, StorageDirectory sd,
979      Properties props) throws IOException {
980    RandomAccessFile file = new RandomAccessFile(to, "rws");
981    FileOutputStream out = null;
982    try {
983      file.seek(0);
984      out = new FileOutputStream(file.getFD());
985      /*
986       * If server is interrupted before this line, 
987       * the version file will remain unchanged.
988       */
989      props.store(out, null);
990      /*
991       * Now the new fields are flushed to the head of the file, but file 
992       * length can still be larger then required and therefore the file can 
993       * contain whole or corrupted fields from its old contents in the end.
994       * If server is interrupted here and restarted later these extra fields
995       * either should not effect server behavior or should be handled
996       * by the server correctly.
997       */
998      file.setLength(out.getChannel().position());
999    } finally {
1000      if (out != null) {
1001        out.close();
1002      }
1003      file.close();
1004    }
1005  }
1006
1007  public static void rename(File from, File to) throws IOException {
1008    try {
1009      NativeIO.renameTo(from, to);
1010    } catch (NativeIOException e) {
1011      throw new IOException("Failed to rename " + from.getCanonicalPath()
1012        + " to " + to.getCanonicalPath() + " due to failure in native rename. "
1013        + e.toString());
1014    }
1015  }
1016
1017  /**
1018   * Copies a file (usually large) to a new location using native unbuffered IO.
1019   * <p>
1020   * This method copies the contents of the specified source file
1021   * to the specified destination file using OS specific unbuffered IO.
1022   * The goal is to avoid churning the file system buffer cache when copying
1023   * large files.
1024   *
1025   * We can't use FileUtils#copyFile from apache-commons-io because it
1026   * is a buffered IO based on FileChannel#transferFrom, which uses MmapByteBuffer
1027   * internally.
1028   *
1029   * The directory holding the destination file is created if it does not exist.
1030   * If the destination file exists, then this method will delete it first.
1031   * <p>
1032   * <strong>Note:</strong> Setting <code>preserveFileDate</code> to
1033   * {@code true} tries to preserve the file's last modified
1034   * date/times using {@link File#setLastModified(long)}, however it is
1035   * not guaranteed that the operation will succeed.
1036   * If the modification operation fails, no indication is provided.
1037   *
1038   * @param srcFile  an existing file to copy, must not be {@code null}
1039   * @param destFile  the new file, must not be {@code null}
1040   * @param preserveFileDate  true if the file date of the copy
1041   *  should be the same as the original
1042   *
1043   * @throws NullPointerException if source or destination is {@code null}
1044   * @throws IOException if source or destination is invalid
1045   * @throws IOException if an IO error occurs during copying
1046   */
1047  public static void nativeCopyFileUnbuffered(File srcFile, File destFile,
1048      boolean preserveFileDate) throws IOException {
1049    if (srcFile == null) {
1050      throw new NullPointerException("Source must not be null");
1051    }
1052    if (destFile == null) {
1053      throw new NullPointerException("Destination must not be null");
1054    }
1055    if (srcFile.exists() == false) {
1056      throw new FileNotFoundException("Source '" + srcFile + "' does not exist");
1057    }
1058    if (srcFile.isDirectory()) {
1059      throw new IOException("Source '" + srcFile + "' exists but is a directory");
1060    }
1061    if (srcFile.getCanonicalPath().equals(destFile.getCanonicalPath())) {
1062      throw new IOException("Source '" + srcFile + "' and destination '" +
1063          destFile + "' are the same");
1064    }
1065    File parentFile = destFile.getParentFile();
1066    if (parentFile != null) {
1067      if (!parentFile.mkdirs() && !parentFile.isDirectory()) {
1068        throw new IOException("Destination '" + parentFile
1069            + "' directory cannot be created");
1070      }
1071    }
1072    if (destFile.exists()) {
1073      if (FileUtil.canWrite(destFile) == false) {
1074        throw new IOException("Destination '" + destFile
1075            + "' exists but is read-only");
1076      } else {
1077        if (destFile.delete() == false) {
1078          throw new IOException("Destination '" + destFile
1079              + "' exists but cannot be deleted");
1080        }
1081      }
1082    }
1083    try {
1084      NativeIO.copyFileUnbuffered(srcFile, destFile);
1085    } catch (NativeIOException e) {
1086      throw new IOException("Failed to copy " + srcFile.getCanonicalPath()
1087          + " to " + destFile.getCanonicalPath()
1088          + " due to failure in NativeIO#copyFileUnbuffered(). "
1089          + e.toString());
1090    }
1091    if (srcFile.length() != destFile.length()) {
1092      throw new IOException("Failed to copy full contents from '" + srcFile
1093          + "' to '" + destFile + "'");
1094    }
1095    if (preserveFileDate) {
1096      if (destFile.setLastModified(srcFile.lastModified()) == false) {
1097        if (LOG.isDebugEnabled()) {
1098          LOG.debug("Failed to preserve last modified date from'" + srcFile
1099            + "' to '" + destFile + "'");
1100        }
1101      }
1102    }
1103  }
1104
1105  /**
1106   * Recursively delete all the content of the directory first and then 
1107   * the directory itself from the local filesystem.
1108   * @param dir The directory to delete
1109   * @throws IOException
1110   */
1111  public static void deleteDir(File dir) throws IOException {
1112    if (!FileUtil.fullyDelete(dir))
1113      throw new IOException("Failed to delete " + dir.getCanonicalPath());
1114  }
1115  
1116  /**
1117   * Write all data storage files.
1118   * @throws IOException
1119   */
1120  public void writeAll() throws IOException {
1121    this.layoutVersion = getServiceLayoutVersion();
1122    for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1123      writeProperties(it.next());
1124    }
1125  }
1126
1127  /**
1128   * Unlock all storage directories.
1129   * @throws IOException
1130   */
1131  public void unlockAll() throws IOException {
1132    for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1133      it.next().unlock();
1134    }
1135  }
1136
1137  public static String getBuildVersion() {
1138    return VersionInfo.getRevision();
1139  }
1140
1141  public static String getRegistrationID(StorageInfo storage) {
1142    return "NS-" + Integer.toString(storage.getNamespaceID())
1143      + "-" + storage.getClusterID()
1144      + "-" + Long.toString(storage.getCTime());
1145  }
1146  
1147  public static boolean is203LayoutVersion(int layoutVersion) {
1148    for (int lv203 : LAYOUT_VERSIONS_203) {
1149      if (lv203 == layoutVersion) {
1150        return true;
1151      }
1152    }
1153    return false;
1154  }
1155}