001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import static org.apache.hadoop.util.Time.monotonicNow; 021 022import java.io.DataInput; 023import java.io.DataInputStream; 024import java.io.DataOutputStream; 025import java.io.File; 026import java.io.FileInputStream; 027import java.io.FileNotFoundException; 028import java.io.FileOutputStream; 029import java.io.IOException; 030import java.security.DigestInputStream; 031import java.security.DigestOutputStream; 032import java.security.MessageDigest; 033import java.util.ArrayList; 034import java.util.Arrays; 035import java.util.Collection; 036import java.util.HashMap; 037import java.util.List; 038import java.util.Map; 039import java.util.TreeMap; 040 041import org.apache.commons.logging.Log; 042import org.apache.hadoop.classification.InterfaceAudience; 043import org.apache.hadoop.classification.InterfaceStability; 044import org.apache.hadoop.conf.Configuration; 045import org.apache.hadoop.fs.FileSystem; 046import org.apache.hadoop.fs.Path; 047import org.apache.hadoop.fs.PathIsNotDirectoryException; 048import org.apache.hadoop.fs.UnresolvedLinkException; 049import org.apache.hadoop.fs.permission.PermissionStatus; 050import org.apache.hadoop.hdfs.DFSUtil; 051import org.apache.hadoop.hdfs.protocol.HdfsConstants; 052import org.apache.hadoop.hdfs.protocol.LayoutFlags; 053import org.apache.hadoop.hdfs.protocol.LayoutVersion; 054import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; 055import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous; 056import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguousUnderConstruction; 057import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; 058import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 059import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException; 060import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature; 061import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList; 062import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; 063import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat; 064import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap; 065import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 066import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 067import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 068import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 069import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; 070import org.apache.hadoop.hdfs.util.ReadOnlyList; 071import org.apache.hadoop.io.IOUtils; 072import org.apache.hadoop.io.MD5Hash; 073import org.apache.hadoop.io.Text; 074import org.apache.hadoop.util.StringUtils; 075 076import com.google.common.annotations.VisibleForTesting; 077import com.google.common.base.Preconditions; 078 079/** 080 * Contains inner classes for reading or writing the on-disk format for 081 * FSImages. 082 * 083 * In particular, the format of the FSImage looks like: 084 * <pre> 085 * FSImage { 086 * layoutVersion: int, namespaceID: int, numberItemsInFSDirectoryTree: long, 087 * namesystemGenerationStampV1: long, namesystemGenerationStampV2: long, 088 * generationStampAtBlockIdSwitch:long, lastAllocatedBlockId: 089 * long transactionID: long, snapshotCounter: int, numberOfSnapshots: int, 090 * numOfSnapshottableDirs: int, 091 * {FSDirectoryTree, FilesUnderConstruction, SecretManagerState} (can be compressed) 092 * } 093 * 094 * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported) { 095 * INodeInfo of root, numberOfChildren of root: int 096 * [list of INodeInfo of root's children], 097 * [list of INodeDirectoryInfo of root's directory children] 098 * } 099 * 100 * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} not supported){ 101 * [list of INodeInfo of INodes in topological order] 102 * } 103 * 104 * INodeInfo { 105 * { 106 * localName: short + byte[] 107 * } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported 108 * or 109 * { 110 * fullPath: byte[] 111 * } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is not supported 112 * replicationFactor: short, modificationTime: long, 113 * accessTime: long, preferredBlockSize: long, 114 * numberOfBlocks: int (-1 for INodeDirectory, -2 for INodeSymLink), 115 * { 116 * nsQuota: long, dsQuota: long, 117 * { 118 * isINodeSnapshottable: byte, 119 * isINodeWithSnapshot: byte (if isINodeSnapshottable is false) 120 * } (when {@link Feature#SNAPSHOT} is supported), 121 * fsPermission: short, PermissionStatus 122 * } for INodeDirectory 123 * or 124 * { 125 * symlinkString, fsPermission: short, PermissionStatus 126 * } for INodeSymlink 127 * or 128 * { 129 * [list of BlockInfo] 130 * [list of FileDiff] 131 * { 132 * isINodeFileUnderConstructionSnapshot: byte, 133 * {clientName: short + byte[], clientMachine: short + byte[]} (when 134 * isINodeFileUnderConstructionSnapshot is true), 135 * } (when {@link Feature#SNAPSHOT} is supported and writing snapshotINode), 136 * fsPermission: short, PermissionStatus 137 * } for INodeFile 138 * } 139 * 140 * INodeDirectoryInfo { 141 * fullPath of the directory: short + byte[], 142 * numberOfChildren: int, [list of INodeInfo of children INode], 143 * { 144 * numberOfSnapshots: int, 145 * [list of Snapshot] (when NumberOfSnapshots is positive), 146 * numberOfDirectoryDiffs: int, 147 * [list of DirectoryDiff] (NumberOfDirectoryDiffs is positive), 148 * number of children that are directories, 149 * [list of INodeDirectoryInfo of the directory children] (includes 150 * snapshot copies of deleted sub-directories) 151 * } (when {@link Feature#SNAPSHOT} is supported), 152 * } 153 * 154 * Snapshot { 155 * snapshotID: int, root of Snapshot: INodeDirectoryInfo (its local name is 156 * the name of the snapshot) 157 * } 158 * 159 * DirectoryDiff { 160 * full path of the root of the associated Snapshot: short + byte[], 161 * childrenSize: int, 162 * isSnapshotRoot: byte, 163 * snapshotINodeIsNotNull: byte (when isSnapshotRoot is false), 164 * snapshotINode: INodeDirectory (when SnapshotINodeIsNotNull is true), Diff 165 * } 166 * 167 * Diff { 168 * createdListSize: int, [Local name of INode in created list], 169 * deletedListSize: int, [INode in deleted list: INodeInfo] 170 * } 171 * 172 * FileDiff { 173 * full path of the root of the associated Snapshot: short + byte[], 174 * fileSize: long, 175 * snapshotINodeIsNotNull: byte, 176 * snapshotINode: INodeFile (when SnapshotINodeIsNotNull is true), Diff 177 * } 178 * </pre> 179 */ 180@InterfaceAudience.Private 181@InterfaceStability.Evolving 182public class FSImageFormat { 183 private static final Log LOG = FSImage.LOG; 184 185 // Static-only class 186 private FSImageFormat() {} 187 188 interface AbstractLoader { 189 MD5Hash getLoadedImageMd5(); 190 long getLoadedImageTxId(); 191 } 192 193 static class LoaderDelegator implements AbstractLoader { 194 private AbstractLoader impl; 195 private final Configuration conf; 196 private final FSNamesystem fsn; 197 198 LoaderDelegator(Configuration conf, FSNamesystem fsn) { 199 this.conf = conf; 200 this.fsn = fsn; 201 } 202 203 @Override 204 public MD5Hash getLoadedImageMd5() { 205 return impl.getLoadedImageMd5(); 206 } 207 208 @Override 209 public long getLoadedImageTxId() { 210 return impl.getLoadedImageTxId(); 211 } 212 213 public void load(File file, boolean requireSameLayoutVersion) 214 throws IOException { 215 Preconditions.checkState(impl == null, "Image already loaded!"); 216 217 FileInputStream is = null; 218 try { 219 is = new FileInputStream(file); 220 byte[] magic = new byte[FSImageUtil.MAGIC_HEADER.length]; 221 IOUtils.readFully(is, magic, 0, magic.length); 222 if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) { 223 FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader( 224 conf, fsn, requireSameLayoutVersion); 225 impl = loader; 226 loader.load(file); 227 } else { 228 Loader loader = new Loader(conf, fsn); 229 impl = loader; 230 loader.load(file); 231 } 232 } finally { 233 IOUtils.cleanup(LOG, is); 234 } 235 } 236 } 237 238 /** 239 * Construct a loader class to load the image. It chooses the loader based on 240 * the layout version. 241 */ 242 public static LoaderDelegator newLoader(Configuration conf, FSNamesystem fsn) { 243 return new LoaderDelegator(conf, fsn); 244 } 245 246 /** 247 * A one-shot class responsible for loading an image. The load() function 248 * should be called once, after which the getter methods may be used to retrieve 249 * information about the image that was loaded, if loading was successful. 250 */ 251 public static class Loader implements AbstractLoader { 252 private final Configuration conf; 253 /** which namesystem this loader is working for */ 254 private final FSNamesystem namesystem; 255 256 /** Set to true once a file has been loaded using this loader. */ 257 private boolean loaded = false; 258 259 /** The transaction ID of the last edit represented by the loaded file */ 260 private long imgTxId; 261 /** The MD5 sum of the loaded file */ 262 private MD5Hash imgDigest; 263 264 private Map<Integer, Snapshot> snapshotMap = null; 265 private final ReferenceMap referenceMap = new ReferenceMap(); 266 267 Loader(Configuration conf, FSNamesystem namesystem) { 268 this.conf = conf; 269 this.namesystem = namesystem; 270 } 271 272 /** 273 * Return the MD5 checksum of the image that has been loaded. 274 * @throws IllegalStateException if load() has not yet been called. 275 */ 276 @Override 277 public MD5Hash getLoadedImageMd5() { 278 checkLoaded(); 279 return imgDigest; 280 } 281 282 @Override 283 public long getLoadedImageTxId() { 284 checkLoaded(); 285 return imgTxId; 286 } 287 288 /** 289 * Throw IllegalStateException if load() has not yet been called. 290 */ 291 private void checkLoaded() { 292 if (!loaded) { 293 throw new IllegalStateException("Image not yet loaded!"); 294 } 295 } 296 297 /** 298 * Throw IllegalStateException if load() has already been called. 299 */ 300 private void checkNotLoaded() { 301 if (loaded) { 302 throw new IllegalStateException("Image already loaded!"); 303 } 304 } 305 306 public void load(File curFile) throws IOException { 307 checkNotLoaded(); 308 assert curFile != null : "curFile is null"; 309 310 StartupProgress prog = NameNode.getStartupProgress(); 311 Step step = new Step(StepType.INODES); 312 prog.beginStep(Phase.LOADING_FSIMAGE, step); 313 long startTime = monotonicNow(); 314 315 // 316 // Load in bits 317 // 318 MessageDigest digester = MD5Hash.getDigester(); 319 DigestInputStream fin = new DigestInputStream( 320 new FileInputStream(curFile), digester); 321 322 DataInputStream in = new DataInputStream(fin); 323 try { 324 // read image version: first appeared in version -1 325 int imgVersion = in.readInt(); 326 if (getLayoutVersion() != imgVersion) { 327 throw new InconsistentFSStateException(curFile, 328 "imgVersion " + imgVersion + 329 " expected to be " + getLayoutVersion()); 330 } 331 boolean supportSnapshot = NameNodeLayoutVersion.supports( 332 LayoutVersion.Feature.SNAPSHOT, imgVersion); 333 if (NameNodeLayoutVersion.supports( 334 LayoutVersion.Feature.ADD_LAYOUT_FLAGS, imgVersion)) { 335 LayoutFlags.read(in); 336 } 337 338 // read namespaceID: first appeared in version -2 339 in.readInt(); 340 341 long numFiles = in.readLong(); 342 343 // read in the last generation stamp for legacy blocks. 344 long genstamp = in.readLong(); 345 namesystem.getBlockIdManager().setGenerationStampV1(genstamp); 346 347 if (NameNodeLayoutVersion.supports( 348 LayoutVersion.Feature.SEQUENTIAL_BLOCK_ID, imgVersion)) { 349 // read the starting generation stamp for sequential block IDs 350 genstamp = in.readLong(); 351 namesystem.getBlockIdManager().setGenerationStampV2(genstamp); 352 353 // read the last generation stamp for blocks created after 354 // the switch to sequential block IDs. 355 long stampAtIdSwitch = in.readLong(); 356 namesystem.getBlockIdManager().setGenerationStampV1Limit(stampAtIdSwitch); 357 358 // read the max sequential block ID. 359 long maxSequentialBlockId = in.readLong(); 360 namesystem.getBlockIdManager().setLastAllocatedBlockId(maxSequentialBlockId); 361 } else { 362 363 long startingGenStamp = namesystem.getBlockIdManager() 364 .upgradeGenerationStampToV2(); 365 // This is an upgrade. 366 LOG.info("Upgrading to sequential block IDs. Generation stamp " + 367 "for new blocks set to " + startingGenStamp); 368 } 369 370 // read the transaction ID of the last edit represented by 371 // this image 372 if (NameNodeLayoutVersion.supports( 373 LayoutVersion.Feature.STORED_TXIDS, imgVersion)) { 374 imgTxId = in.readLong(); 375 } else { 376 imgTxId = 0; 377 } 378 379 // read the last allocated inode id in the fsimage 380 if (NameNodeLayoutVersion.supports( 381 LayoutVersion.Feature.ADD_INODE_ID, imgVersion)) { 382 long lastInodeId = in.readLong(); 383 namesystem.dir.resetLastInodeId(lastInodeId); 384 if (LOG.isDebugEnabled()) { 385 LOG.debug("load last allocated InodeId from fsimage:" + lastInodeId); 386 } 387 } else { 388 if (LOG.isDebugEnabled()) { 389 LOG.debug("Old layout version doesn't have inode id." 390 + " Will assign new id for each inode."); 391 } 392 } 393 394 if (supportSnapshot) { 395 snapshotMap = namesystem.getSnapshotManager().read(in, this); 396 } 397 398 // read compression related info 399 FSImageCompression compression; 400 if (NameNodeLayoutVersion.supports( 401 LayoutVersion.Feature.FSIMAGE_COMPRESSION, imgVersion)) { 402 compression = FSImageCompression.readCompressionHeader(conf, in); 403 } else { 404 compression = FSImageCompression.createNoopCompression(); 405 } 406 in = compression.unwrapInputStream(fin); 407 408 LOG.info("Loading image file " + curFile + " using " + compression); 409 410 // load all inodes 411 LOG.info("Number of files = " + numFiles); 412 prog.setTotal(Phase.LOADING_FSIMAGE, step, numFiles); 413 Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); 414 if (NameNodeLayoutVersion.supports( 415 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, imgVersion)) { 416 if (supportSnapshot) { 417 loadLocalNameINodesWithSnapshot(numFiles, in, counter); 418 } else { 419 loadLocalNameINodes(numFiles, in, counter); 420 } 421 } else { 422 loadFullNameINodes(numFiles, in, counter); 423 } 424 425 loadFilesUnderConstruction(in, supportSnapshot, counter); 426 prog.endStep(Phase.LOADING_FSIMAGE, step); 427 // Now that the step is finished, set counter equal to total to adjust 428 // for possible under-counting due to reference inodes. 429 prog.setCount(Phase.LOADING_FSIMAGE, step, numFiles); 430 431 loadSecretManagerState(in); 432 433 loadCacheManagerState(in); 434 435 // make sure to read to the end of file 436 boolean eof = (in.read() == -1); 437 assert eof : "Should have reached the end of image file " + curFile; 438 } finally { 439 in.close(); 440 } 441 442 imgDigest = new MD5Hash(digester.digest()); 443 loaded = true; 444 445 LOG.info("Image file " + curFile + " of size " + curFile.length() 446 + " bytes loaded in " + (monotonicNow() - startTime) / 1000 447 + " seconds."); 448 } 449 450 /** Update the root node's attributes */ 451 private void updateRootAttr(INodeWithAdditionalFields root) { 452 final QuotaCounts q = root.getQuotaCounts(); 453 final long nsQuota = q.getNameSpace(); 454 final long dsQuota = q.getStorageSpace(); 455 FSDirectory fsDir = namesystem.dir; 456 if (nsQuota != -1 || dsQuota != -1) { 457 fsDir.rootDir.getDirectoryWithQuotaFeature().setQuota(nsQuota, dsQuota); 458 } 459 fsDir.rootDir.cloneModificationTime(root); 460 fsDir.rootDir.clonePermissionStatus(root); 461 } 462 463 /** 464 * Load fsimage files when 1) only local names are stored, 465 * and 2) snapshot is supported. 466 * 467 * @param numFiles number of files expected to be read 468 * @param in Image input stream 469 * @param counter Counter to increment for namenode startup progress 470 */ 471 private void loadLocalNameINodesWithSnapshot(long numFiles, DataInput in, 472 Counter counter) throws IOException { 473 assert NameNodeLayoutVersion.supports( 474 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, getLayoutVersion()); 475 assert NameNodeLayoutVersion.supports( 476 LayoutVersion.Feature.SNAPSHOT, getLayoutVersion()); 477 478 // load root 479 loadRoot(in, counter); 480 // load rest of the nodes recursively 481 loadDirectoryWithSnapshot(in, counter); 482 } 483 484 /** 485 * load fsimage files assuming only local names are stored. Used when 486 * snapshots are not supported by the layout version. 487 * 488 * @param numFiles number of files expected to be read 489 * @param in image input stream 490 * @param counter Counter to increment for namenode startup progress 491 * @throws IOException 492 */ 493 private void loadLocalNameINodes(long numFiles, DataInput in, Counter counter) 494 throws IOException { 495 assert NameNodeLayoutVersion.supports( 496 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, getLayoutVersion()); 497 assert numFiles > 0; 498 499 // load root 500 loadRoot(in, counter); 501 // have loaded the first file (the root) 502 numFiles--; 503 504 // load rest of the nodes directory by directory 505 while (numFiles > 0) { 506 numFiles -= loadDirectory(in, counter); 507 } 508 if (numFiles != 0) { 509 throw new IOException("Read unexpect number of files: " + -numFiles); 510 } 511 } 512 513 /** 514 * Load information about root, and use the information to update the root 515 * directory of NameSystem. 516 * @param in The {@link DataInput} instance to read. 517 * @param counter Counter to increment for namenode startup progress 518 */ 519 private void loadRoot(DataInput in, Counter counter) 520 throws IOException { 521 // load root 522 if (in.readShort() != 0) { 523 throw new IOException("First node is not root"); 524 } 525 final INodeDirectory root = loadINode(null, false, in, counter) 526 .asDirectory(); 527 // update the root's attributes 528 updateRootAttr(root); 529 } 530 531 /** Load children nodes for the parent directory. */ 532 private int loadChildren(INodeDirectory parent, DataInput in, 533 Counter counter) throws IOException { 534 int numChildren = in.readInt(); 535 for (int i = 0; i < numChildren; i++) { 536 // load single inode 537 INode newNode = loadINodeWithLocalName(false, in, true, counter); 538 addToParent(parent, newNode); 539 } 540 return numChildren; 541 } 542 543 /** 544 * Load a directory when snapshot is supported. 545 * @param in The {@link DataInput} instance to read. 546 * @param counter Counter to increment for namenode startup progress 547 */ 548 private void loadDirectoryWithSnapshot(DataInput in, Counter counter) 549 throws IOException { 550 // Step 1. Identify the parent INode 551 long inodeId = in.readLong(); 552 final INodeDirectory parent = this.namesystem.dir.getInode(inodeId) 553 .asDirectory(); 554 555 // Check if the whole subtree has been saved (for reference nodes) 556 boolean toLoadSubtree = referenceMap.toProcessSubtree(parent.getId()); 557 if (!toLoadSubtree) { 558 return; 559 } 560 561 // Step 2. Load snapshots if parent is snapshottable 562 int numSnapshots = in.readInt(); 563 if (numSnapshots >= 0) { 564 // load snapshots and snapshotQuota 565 SnapshotFSImageFormat.loadSnapshotList(parent, numSnapshots, in, this); 566 if (parent.getDirectorySnapshottableFeature().getSnapshotQuota() > 0) { 567 // add the directory to the snapshottable directory list in 568 // SnapshotManager. Note that we only add root when its snapshot quota 569 // is positive. 570 this.namesystem.getSnapshotManager().addSnapshottable(parent); 571 } 572 } 573 574 // Step 3. Load children nodes under parent 575 loadChildren(parent, in, counter); 576 577 // Step 4. load Directory Diff List 578 SnapshotFSImageFormat.loadDirectoryDiffList(parent, in, this); 579 580 // Recursively load sub-directories, including snapshot copies of deleted 581 // directories 582 int numSubTree = in.readInt(); 583 for (int i = 0; i < numSubTree; i++) { 584 loadDirectoryWithSnapshot(in, counter); 585 } 586 } 587 588 /** 589 * Load all children of a directory 590 * 591 * @param in input to load from 592 * @param counter Counter to increment for namenode startup progress 593 * @return number of child inodes read 594 * @throws IOException 595 */ 596 private int loadDirectory(DataInput in, Counter counter) throws IOException { 597 String parentPath = FSImageSerialization.readString(in); 598 // Rename .snapshot paths if we're doing an upgrade 599 parentPath = renameReservedPathsOnUpgrade(parentPath, getLayoutVersion()); 600 final INodeDirectory parent = INodeDirectory.valueOf( 601 namesystem.dir.getINode(parentPath, true), parentPath); 602 return loadChildren(parent, in, counter); 603 } 604 605 /** 606 * load fsimage files assuming full path names are stored 607 * 608 * @param numFiles total number of files to load 609 * @param in data input stream 610 * @param counter Counter to increment for namenode startup progress 611 * @throws IOException if any error occurs 612 */ 613 private void loadFullNameINodes(long numFiles, DataInput in, Counter counter) 614 throws IOException { 615 byte[][] pathComponents; 616 byte[][] parentPath = {{}}; 617 FSDirectory fsDir = namesystem.dir; 618 INodeDirectory parentINode = fsDir.rootDir; 619 for (long i = 0; i < numFiles; i++) { 620 pathComponents = FSImageSerialization.readPathComponents(in); 621 for (int j=0; j < pathComponents.length; j++) { 622 byte[] newComponent = renameReservedComponentOnUpgrade 623 (pathComponents[j], getLayoutVersion()); 624 if (!Arrays.equals(newComponent, pathComponents[j])) { 625 String oldPath = DFSUtil.byteArray2PathString(pathComponents); 626 pathComponents[j] = newComponent; 627 String newPath = DFSUtil.byteArray2PathString(pathComponents); 628 LOG.info("Renaming reserved path " + oldPath + " to " + newPath); 629 } 630 } 631 final INode newNode = loadINode( 632 pathComponents[pathComponents.length-1], false, in, counter); 633 634 if (isRoot(pathComponents)) { // it is the root 635 // update the root's attributes 636 updateRootAttr(newNode.asDirectory()); 637 continue; 638 } 639 640 namesystem.dir.addToInodeMap(newNode); 641 // check if the new inode belongs to the same parent 642 if(!isParent(pathComponents, parentPath)) { 643 parentINode = getParentINodeDirectory(pathComponents); 644 parentPath = getParent(pathComponents); 645 } 646 647 // add new inode 648 addToParent(parentINode, newNode); 649 } 650 } 651 652 private INodeDirectory getParentINodeDirectory(byte[][] pathComponents 653 ) throws FileNotFoundException, PathIsNotDirectoryException, 654 UnresolvedLinkException { 655 if (pathComponents.length < 2) { // root 656 return null; 657 } 658 // Gets the parent INode 659 final INodesInPath inodes = namesystem.dir.getExistingPathINodes( 660 pathComponents); 661 return INodeDirectory.valueOf(inodes.getINode(-2), pathComponents); 662 } 663 664 /** 665 * Add the child node to parent and, if child is a file, update block map. 666 * This method is only used for image loading so that synchronization, 667 * modification time update and space count update are not needed. 668 */ 669 private void addToParent(INodeDirectory parent, INode child) 670 throws IllegalReservedPathException { 671 FSDirectory fsDir = namesystem.dir; 672 if (parent == fsDir.rootDir) { 673 child.setLocalName(renameReservedRootComponentOnUpgrade( 674 child.getLocalNameBytes(), getLayoutVersion())); 675 } 676 // NOTE: This does not update space counts for parents 677 if (!parent.addChild(child)) { 678 return; 679 } 680 namesystem.dir.cacheName(child); 681 682 if (child.isFile()) { 683 updateBlocksMap(child.asFile()); 684 } 685 } 686 687 public void updateBlocksMap(INodeFile file) { 688 // Add file->block mapping 689 final BlockInfoContiguous[] blocks = file.getBlocks(); 690 if (blocks != null) { 691 final BlockManager bm = namesystem.getBlockManager(); 692 for (int i = 0; i < blocks.length; i++) { 693 file.setBlock(i, bm.addBlockCollection(blocks[i], file)); 694 } 695 } 696 } 697 698 /** @return The FSDirectory of the namesystem where the fsimage is loaded */ 699 public FSDirectory getFSDirectoryInLoading() { 700 return namesystem.dir; 701 } 702 703 public INode loadINodeWithLocalName(boolean isSnapshotINode, DataInput in, 704 boolean updateINodeMap) throws IOException { 705 return loadINodeWithLocalName(isSnapshotINode, in, updateINodeMap, null); 706 } 707 708 public INode loadINodeWithLocalName(boolean isSnapshotINode, 709 DataInput in, boolean updateINodeMap, Counter counter) 710 throws IOException { 711 byte[] localName = FSImageSerialization.readLocalName(in); 712 localName = 713 renameReservedComponentOnUpgrade(localName, getLayoutVersion()); 714 INode inode = loadINode(localName, isSnapshotINode, in, counter); 715 if (updateINodeMap) { 716 namesystem.dir.addToInodeMap(inode); 717 } 718 return inode; 719 } 720 721 /** 722 * load an inode from fsimage except for its name 723 * 724 * @param in data input stream from which image is read 725 * @param counter Counter to increment for namenode startup progress 726 * @return an inode 727 */ 728 @SuppressWarnings("deprecation") 729 INode loadINode(final byte[] localName, boolean isSnapshotINode, 730 DataInput in, Counter counter) throws IOException { 731 final int imgVersion = getLayoutVersion(); 732 if (NameNodeLayoutVersion.supports( 733 LayoutVersion.Feature.SNAPSHOT, imgVersion)) { 734 namesystem.getFSDirectory().verifyINodeName(localName); 735 } 736 737 long inodeId = NameNodeLayoutVersion.supports( 738 LayoutVersion.Feature.ADD_INODE_ID, imgVersion) ? in.readLong() 739 : namesystem.dir.allocateNewInodeId(); 740 741 final short replication = namesystem.getBlockManager().adjustReplication( 742 in.readShort()); 743 final long modificationTime = in.readLong(); 744 long atime = 0; 745 if (NameNodeLayoutVersion.supports( 746 LayoutVersion.Feature.FILE_ACCESS_TIME, imgVersion)) { 747 atime = in.readLong(); 748 } 749 final long blockSize = in.readLong(); 750 final int numBlocks = in.readInt(); 751 752 if (numBlocks >= 0) { 753 // file 754 755 // read blocks 756 BlockInfoContiguous[] blocks = new BlockInfoContiguous[numBlocks]; 757 for (int j = 0; j < numBlocks; j++) { 758 blocks[j] = new BlockInfoContiguous(replication); 759 blocks[j].readFields(in); 760 } 761 762 String clientName = ""; 763 String clientMachine = ""; 764 boolean underConstruction = false; 765 FileDiffList fileDiffs = null; 766 if (NameNodeLayoutVersion.supports( 767 LayoutVersion.Feature.SNAPSHOT, imgVersion)) { 768 // read diffs 769 fileDiffs = SnapshotFSImageFormat.loadFileDiffList(in, this); 770 771 if (isSnapshotINode) { 772 underConstruction = in.readBoolean(); 773 if (underConstruction) { 774 clientName = FSImageSerialization.readString(in); 775 clientMachine = FSImageSerialization.readString(in); 776 // convert the last block to BlockUC 777 if (blocks.length > 0) { 778 BlockInfoContiguous lastBlk = blocks[blocks.length - 1]; 779 blocks[blocks.length - 1] = new BlockInfoContiguousUnderConstruction( 780 lastBlk, replication); 781 } 782 } 783 } 784 } 785 786 final PermissionStatus permissions = PermissionStatus.read(in); 787 788 // return 789 if (counter != null) { 790 counter.increment(); 791 } 792 793 final INodeFile file = new INodeFile(inodeId, localName, permissions, 794 modificationTime, atime, blocks, replication, blockSize, (byte)0); 795 if (underConstruction) { 796 file.toUnderConstruction(clientName, clientMachine); 797 } 798 return fileDiffs == null ? file : new INodeFile(file, fileDiffs); 799 } else if (numBlocks == -1) { 800 //directory 801 802 //read quotas 803 final long nsQuota = in.readLong(); 804 long dsQuota = -1L; 805 if (NameNodeLayoutVersion.supports( 806 LayoutVersion.Feature.DISKSPACE_QUOTA, imgVersion)) { 807 dsQuota = in.readLong(); 808 } 809 810 //read snapshot info 811 boolean snapshottable = false; 812 boolean withSnapshot = false; 813 if (NameNodeLayoutVersion.supports( 814 LayoutVersion.Feature.SNAPSHOT, imgVersion)) { 815 snapshottable = in.readBoolean(); 816 if (!snapshottable) { 817 withSnapshot = in.readBoolean(); 818 } 819 } 820 821 final PermissionStatus permissions = PermissionStatus.read(in); 822 823 //return 824 if (counter != null) { 825 counter.increment(); 826 } 827 final INodeDirectory dir = new INodeDirectory(inodeId, localName, 828 permissions, modificationTime); 829 if (nsQuota >= 0 || dsQuota >= 0) { 830 dir.addDirectoryWithQuotaFeature(new DirectoryWithQuotaFeature.Builder(). 831 nameSpaceQuota(nsQuota).storageSpaceQuota(dsQuota).build()); 832 } 833 if (withSnapshot) { 834 dir.addSnapshotFeature(null); 835 } 836 if (snapshottable) { 837 dir.addSnapshottableFeature(); 838 } 839 return dir; 840 } else if (numBlocks == -2) { 841 //symlink 842 if (!FileSystem.areSymlinksEnabled()) { 843 throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS"); 844 } 845 846 final String symlink = Text.readString(in); 847 final PermissionStatus permissions = PermissionStatus.read(in); 848 if (counter != null) { 849 counter.increment(); 850 } 851 return new INodeSymlink(inodeId, localName, permissions, 852 modificationTime, atime, symlink); 853 } else if (numBlocks == -3) { 854 //reference 855 // Intentionally do not increment counter, because it is too difficult at 856 // this point to assess whether or not this is a reference that counts 857 // toward quota. 858 859 final boolean isWithName = in.readBoolean(); 860 // lastSnapshotId for WithName node, dstSnapshotId for DstReference node 861 int snapshotId = in.readInt(); 862 863 final INodeReference.WithCount withCount 864 = referenceMap.loadINodeReferenceWithCount(isSnapshotINode, in, this); 865 866 if (isWithName) { 867 return new INodeReference.WithName(null, withCount, localName, 868 snapshotId); 869 } else { 870 final INodeReference ref = new INodeReference.DstReference(null, 871 withCount, snapshotId); 872 return ref; 873 } 874 } 875 876 throw new IOException("Unknown inode type: numBlocks=" + numBlocks); 877 } 878 879 /** Load {@link INodeFileAttributes}. */ 880 public INodeFileAttributes loadINodeFileAttributes(DataInput in) 881 throws IOException { 882 final int layoutVersion = getLayoutVersion(); 883 884 if (!NameNodeLayoutVersion.supports( 885 LayoutVersion.Feature.OPTIMIZE_SNAPSHOT_INODES, layoutVersion)) { 886 return loadINodeWithLocalName(true, in, false).asFile(); 887 } 888 889 final byte[] name = FSImageSerialization.readLocalName(in); 890 final PermissionStatus permissions = PermissionStatus.read(in); 891 final long modificationTime = in.readLong(); 892 final long accessTime = in.readLong(); 893 894 final short replication = namesystem.getBlockManager().adjustReplication( 895 in.readShort()); 896 final long preferredBlockSize = in.readLong(); 897 898 return new INodeFileAttributes.SnapshotCopy(name, permissions, null, modificationTime, 899 accessTime, replication, preferredBlockSize, (byte) 0, null); 900 } 901 902 public INodeDirectoryAttributes loadINodeDirectoryAttributes(DataInput in) 903 throws IOException { 904 final int layoutVersion = getLayoutVersion(); 905 906 if (!NameNodeLayoutVersion.supports( 907 LayoutVersion.Feature.OPTIMIZE_SNAPSHOT_INODES, layoutVersion)) { 908 return loadINodeWithLocalName(true, in, false).asDirectory(); 909 } 910 911 final byte[] name = FSImageSerialization.readLocalName(in); 912 final PermissionStatus permissions = PermissionStatus.read(in); 913 final long modificationTime = in.readLong(); 914 915 // Read quotas: quota by storage type does not need to be processed below. 916 // It is handled only in protobuf based FsImagePBINode class for newer 917 // fsImages. Tools using this class such as legacy-mode of offline image viewer 918 // should only load legacy FSImages without newer features. 919 final long nsQuota = in.readLong(); 920 final long dsQuota = in.readLong(); 921 922 return nsQuota == -1L && dsQuota == -1L ? new INodeDirectoryAttributes.SnapshotCopy( 923 name, permissions, null, modificationTime, null) 924 : new INodeDirectoryAttributes.CopyWithQuota(name, permissions, 925 null, modificationTime, nsQuota, dsQuota, null, null); 926 } 927 928 private void loadFilesUnderConstruction(DataInput in, 929 boolean supportSnapshot, Counter counter) throws IOException { 930 FSDirectory fsDir = namesystem.dir; 931 int size = in.readInt(); 932 933 LOG.info("Number of files under construction = " + size); 934 935 for (int i = 0; i < size; i++) { 936 INodeFile cons = FSImageSerialization.readINodeUnderConstruction(in, 937 namesystem, getLayoutVersion()); 938 counter.increment(); 939 940 // verify that file exists in namespace 941 String path = cons.getLocalName(); 942 INodeFile oldnode = null; 943 boolean inSnapshot = false; 944 if (path != null && FSDirectory.isReservedName(path) && 945 NameNodeLayoutVersion.supports( 946 LayoutVersion.Feature.ADD_INODE_ID, getLayoutVersion())) { 947 // TODO: for HDFS-5428, we use reserved path for those INodeFileUC in 948 // snapshot. If we support INode ID in the layout version, we can use 949 // the inode id to find the oldnode. 950 oldnode = namesystem.dir.getInode(cons.getId()).asFile(); 951 inSnapshot = true; 952 } else { 953 path = renameReservedPathsOnUpgrade(path, getLayoutVersion()); 954 final INodesInPath iip = fsDir.getINodesInPath(path, true); 955 oldnode = INodeFile.valueOf(iip.getLastINode(), path); 956 } 957 958 FileUnderConstructionFeature uc = cons.getFileUnderConstructionFeature(); 959 oldnode.toUnderConstruction(uc.getClientName(), uc.getClientMachine()); 960 if (oldnode.numBlocks() > 0) { 961 BlockInfoContiguous ucBlock = cons.getLastBlock(); 962 // we do not replace the inode, just replace the last block of oldnode 963 BlockInfoContiguous info = namesystem.getBlockManager().addBlockCollection( 964 ucBlock, oldnode); 965 oldnode.setBlock(oldnode.numBlocks() - 1, info); 966 } 967 968 if (!inSnapshot) { 969 namesystem.leaseManager.addLease(cons 970 .getFileUnderConstructionFeature().getClientName(), path); 971 } 972 } 973 } 974 975 private void loadSecretManagerState(DataInput in) 976 throws IOException { 977 int imgVersion = getLayoutVersion(); 978 979 if (!NameNodeLayoutVersion.supports( 980 LayoutVersion.Feature.DELEGATION_TOKEN, imgVersion)) { 981 //SecretManagerState is not available. 982 //This must not happen if security is turned on. 983 return; 984 } 985 namesystem.loadSecretManagerStateCompat(in); 986 } 987 988 private void loadCacheManagerState(DataInput in) throws IOException { 989 int imgVersion = getLayoutVersion(); 990 if (!NameNodeLayoutVersion.supports( 991 LayoutVersion.Feature.CACHING, imgVersion)) { 992 return; 993 } 994 namesystem.getCacheManager().loadStateCompat(in); 995 } 996 997 private int getLayoutVersion() { 998 return namesystem.getFSImage().getStorage().getLayoutVersion(); 999 } 1000 1001 private boolean isRoot(byte[][] path) { 1002 return path.length == 1 && 1003 path[0] == null; 1004 } 1005 1006 private boolean isParent(byte[][] path, byte[][] parent) { 1007 if (path == null || parent == null) 1008 return false; 1009 if (parent.length == 0 || path.length != parent.length + 1) 1010 return false; 1011 boolean isParent = true; 1012 for (int i = 0; i < parent.length; i++) { 1013 isParent = isParent && Arrays.equals(path[i], parent[i]); 1014 } 1015 return isParent; 1016 } 1017 1018 /** 1019 * Return string representing the parent of the given path. 1020 */ 1021 String getParent(String path) { 1022 return path.substring(0, path.lastIndexOf(Path.SEPARATOR)); 1023 } 1024 1025 byte[][] getParent(byte[][] path) { 1026 byte[][] result = new byte[path.length - 1][]; 1027 for (int i = 0; i < result.length; i++) { 1028 result[i] = new byte[path[i].length]; 1029 System.arraycopy(path[i], 0, result[i], 0, path[i].length); 1030 } 1031 return result; 1032 } 1033 1034 public Snapshot getSnapshot(DataInput in) throws IOException { 1035 return snapshotMap.get(in.readInt()); 1036 } 1037 } 1038 1039 @VisibleForTesting 1040 public static final TreeMap<String, String> renameReservedMap = 1041 new TreeMap<String, String>(); 1042 1043 /** 1044 * Use the default key-value pairs that will be used to determine how to 1045 * rename reserved paths on upgrade. 1046 */ 1047 @VisibleForTesting 1048 public static void useDefaultRenameReservedPairs() { 1049 renameReservedMap.clear(); 1050 for (String key: HdfsConstants.RESERVED_PATH_COMPONENTS) { 1051 renameReservedMap.put( 1052 key, 1053 key + "." + HdfsConstants.NAMENODE_LAYOUT_VERSION + "." 1054 + "UPGRADE_RENAMED"); 1055 } 1056 } 1057 1058 /** 1059 * Set the key-value pairs that will be used to determine how to rename 1060 * reserved paths on upgrade. 1061 */ 1062 @VisibleForTesting 1063 public static void setRenameReservedPairs(String renameReserved) { 1064 // Clear and set the default values 1065 useDefaultRenameReservedPairs(); 1066 // Overwrite with provided values 1067 setRenameReservedMapInternal(renameReserved); 1068 } 1069 1070 private static void setRenameReservedMapInternal(String renameReserved) { 1071 Collection<String> pairs = 1072 StringUtils.getTrimmedStringCollection(renameReserved); 1073 for (String p : pairs) { 1074 String[] pair = StringUtils.split(p, '/', '='); 1075 Preconditions.checkArgument(pair.length == 2, 1076 "Could not parse key-value pair " + p); 1077 String key = pair[0]; 1078 String value = pair[1]; 1079 Preconditions.checkArgument(DFSUtil.isReservedPathComponent(key), 1080 "Unknown reserved path " + key); 1081 Preconditions.checkArgument(DFSUtil.isValidNameForComponent(value), 1082 "Invalid rename path for " + key + ": " + value); 1083 LOG.info("Will rename reserved path " + key + " to " + value); 1084 renameReservedMap.put(key, value); 1085 } 1086 } 1087 1088 /** 1089 * When upgrading from an old version, the filesystem could contain paths 1090 * that are now reserved in the new version (e.g. .snapshot). This renames 1091 * these new reserved paths to a user-specified value to avoid collisions 1092 * with the reserved name. 1093 * 1094 * @param path Old path potentially containing a reserved path 1095 * @return New path with reserved path components renamed to user value 1096 */ 1097 static String renameReservedPathsOnUpgrade(String path, 1098 final int layoutVersion) throws IllegalReservedPathException { 1099 final String oldPath = path; 1100 // If any known LVs aren't supported, we're doing an upgrade 1101 if (!NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) { 1102 String[] components = INode.getPathNames(path); 1103 // Only need to worry about the root directory 1104 if (components.length > 1) { 1105 components[1] = DFSUtil.bytes2String( 1106 renameReservedRootComponentOnUpgrade( 1107 DFSUtil.string2Bytes(components[1]), 1108 layoutVersion)); 1109 path = DFSUtil.strings2PathString(components); 1110 } 1111 } 1112 if (!NameNodeLayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) { 1113 String[] components = INode.getPathNames(path); 1114 // Special case the root path 1115 if (components.length == 0) { 1116 return path; 1117 } 1118 for (int i=0; i<components.length; i++) { 1119 components[i] = DFSUtil.bytes2String( 1120 renameReservedComponentOnUpgrade( 1121 DFSUtil.string2Bytes(components[i]), 1122 layoutVersion)); 1123 } 1124 path = DFSUtil.strings2PathString(components); 1125 } 1126 1127 if (!path.equals(oldPath)) { 1128 LOG.info("Upgrade process renamed reserved path " + oldPath + " to " 1129 + path); 1130 } 1131 return path; 1132 } 1133 1134 private final static String RESERVED_ERROR_MSG = 1135 FSDirectory.DOT_RESERVED_PATH_PREFIX + " is a reserved path and " 1136 + HdfsConstants.DOT_SNAPSHOT_DIR + " is a reserved path component in" 1137 + " this version of HDFS. Please rollback and delete or rename" 1138 + " this path, or upgrade with the " 1139 + StartupOption.RENAMERESERVED.getName() 1140 + " [key-value pairs]" 1141 + " option to automatically rename these paths during upgrade."; 1142 1143 /** 1144 * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single 1145 * byte array path component. 1146 */ 1147 private static byte[] renameReservedComponentOnUpgrade(byte[] component, 1148 final int layoutVersion) throws IllegalReservedPathException { 1149 // If the LV doesn't support snapshots, we're doing an upgrade 1150 if (!NameNodeLayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) { 1151 if (Arrays.equals(component, HdfsConstants.DOT_SNAPSHOT_DIR_BYTES)) { 1152 if (!renameReservedMap.containsKey(HdfsConstants.DOT_SNAPSHOT_DIR)) { 1153 throw new IllegalReservedPathException(RESERVED_ERROR_MSG); 1154 } 1155 component = 1156 DFSUtil.string2Bytes(renameReservedMap 1157 .get(HdfsConstants.DOT_SNAPSHOT_DIR)); 1158 } 1159 } 1160 return component; 1161 } 1162 1163 /** 1164 * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single 1165 * byte array path component. 1166 */ 1167 private static byte[] renameReservedRootComponentOnUpgrade(byte[] component, 1168 final int layoutVersion) throws IllegalReservedPathException { 1169 // If the LV doesn't support inode IDs, we're doing an upgrade 1170 if (!NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) { 1171 if (Arrays.equals(component, FSDirectory.DOT_RESERVED)) { 1172 if (!renameReservedMap.containsKey(HdfsConstants.DOT_SNAPSHOT_DIR)) { 1173 throw new IllegalReservedPathException(RESERVED_ERROR_MSG); 1174 } 1175 final String renameString = renameReservedMap 1176 .get(FSDirectory.DOT_RESERVED_STRING); 1177 component = 1178 DFSUtil.string2Bytes(renameString); 1179 LOG.info("Renamed root path " + FSDirectory.DOT_RESERVED_STRING 1180 + " to " + renameString); 1181 } 1182 } 1183 return component; 1184 } 1185 1186 /** 1187 * A one-shot class responsible for writing an image file. 1188 * The write() function should be called once, after which the getter 1189 * functions may be used to retrieve information about the file that was written. 1190 * 1191 * This is replaced by the PB-based FSImage. The class is to maintain 1192 * compatibility for the external fsimage tool. 1193 */ 1194 @Deprecated 1195 static class Saver { 1196 private static final int LAYOUT_VERSION = -51; 1197 public static final int CHECK_CANCEL_INTERVAL = 4096; 1198 private final SaveNamespaceContext context; 1199 /** Set to true once an image has been written */ 1200 private boolean saved = false; 1201 private long checkCancelCounter = 0; 1202 1203 /** The MD5 checksum of the file that was written */ 1204 private MD5Hash savedDigest; 1205 private final ReferenceMap referenceMap = new ReferenceMap(); 1206 1207 private final Map<Long, INodeFile> snapshotUCMap = 1208 new HashMap<Long, INodeFile>(); 1209 1210 /** @throws IllegalStateException if the instance has not yet saved an image */ 1211 private void checkSaved() { 1212 if (!saved) { 1213 throw new IllegalStateException("FSImageSaver has not saved an image"); 1214 } 1215 } 1216 1217 /** @throws IllegalStateException if the instance has already saved an image */ 1218 private void checkNotSaved() { 1219 if (saved) { 1220 throw new IllegalStateException("FSImageSaver has already saved an image"); 1221 } 1222 } 1223 1224 1225 Saver(SaveNamespaceContext context) { 1226 this.context = context; 1227 } 1228 1229 /** 1230 * Return the MD5 checksum of the image file that was saved. 1231 */ 1232 MD5Hash getSavedDigest() { 1233 checkSaved(); 1234 return savedDigest; 1235 } 1236 1237 void save(File newFile, FSImageCompression compression) throws IOException { 1238 checkNotSaved(); 1239 1240 final FSNamesystem sourceNamesystem = context.getSourceNamesystem(); 1241 final INodeDirectory rootDir = sourceNamesystem.dir.rootDir; 1242 final long numINodes = rootDir.getDirectoryWithQuotaFeature() 1243 .getSpaceConsumed().getNameSpace(); 1244 String sdPath = newFile.getParentFile().getParentFile().getAbsolutePath(); 1245 Step step = new Step(StepType.INODES, sdPath); 1246 StartupProgress prog = NameNode.getStartupProgress(); 1247 prog.beginStep(Phase.SAVING_CHECKPOINT, step); 1248 prog.setTotal(Phase.SAVING_CHECKPOINT, step, numINodes); 1249 Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); 1250 long startTime = monotonicNow(); 1251 // 1252 // Write out data 1253 // 1254 MessageDigest digester = MD5Hash.getDigester(); 1255 FileOutputStream fout = new FileOutputStream(newFile); 1256 DigestOutputStream fos = new DigestOutputStream(fout, digester); 1257 DataOutputStream out = new DataOutputStream(fos); 1258 try { 1259 out.writeInt(LAYOUT_VERSION); 1260 LayoutFlags.write(out); 1261 // We use the non-locked version of getNamespaceInfo here since 1262 // the coordinating thread of saveNamespace already has read-locked 1263 // the namespace for us. If we attempt to take another readlock 1264 // from the actual saver thread, there's a potential of a 1265 // fairness-related deadlock. See the comments on HDFS-2223. 1266 out.writeInt(sourceNamesystem.unprotectedGetNamespaceInfo() 1267 .getNamespaceID()); 1268 out.writeLong(numINodes); 1269 out.writeLong(sourceNamesystem.getBlockIdManager().getGenerationStampV1()); 1270 out.writeLong(sourceNamesystem.getBlockIdManager().getGenerationStampV2()); 1271 out.writeLong(sourceNamesystem.getBlockIdManager().getGenerationStampAtblockIdSwitch()); 1272 out.writeLong(sourceNamesystem.getBlockIdManager().getLastAllocatedBlockId()); 1273 out.writeLong(context.getTxId()); 1274 out.writeLong(sourceNamesystem.dir.getLastInodeId()); 1275 1276 1277 sourceNamesystem.getSnapshotManager().write(out); 1278 1279 // write compression info and set up compressed stream 1280 out = compression.writeHeaderAndWrapStream(fos); 1281 LOG.info("Saving image file " + newFile + 1282 " using " + compression); 1283 1284 // save the root 1285 saveINode2Image(rootDir, out, false, referenceMap, counter); 1286 // save the rest of the nodes 1287 saveImage(rootDir, out, true, false, counter); 1288 prog.endStep(Phase.SAVING_CHECKPOINT, step); 1289 // Now that the step is finished, set counter equal to total to adjust 1290 // for possible under-counting due to reference inodes. 1291 prog.setCount(Phase.SAVING_CHECKPOINT, step, numINodes); 1292 // save files under construction 1293 // TODO: for HDFS-5428, since we cannot break the compatibility of 1294 // fsimage, we store part of the under-construction files that are only 1295 // in snapshots in this "under-construction-file" section. As a 1296 // temporary solution, we use "/.reserved/.inodes/<inodeid>" as their 1297 // paths, so that when loading fsimage we do not put them into the lease 1298 // map. In the future, we can remove this hack when we can bump the 1299 // layout version. 1300 sourceNamesystem.saveFilesUnderConstruction(out, snapshotUCMap); 1301 1302 context.checkCancelled(); 1303 sourceNamesystem.saveSecretManagerStateCompat(out, sdPath); 1304 context.checkCancelled(); 1305 sourceNamesystem.getCacheManager().saveStateCompat(out, sdPath); 1306 context.checkCancelled(); 1307 out.flush(); 1308 context.checkCancelled(); 1309 fout.getChannel().force(true); 1310 } finally { 1311 out.close(); 1312 } 1313 1314 saved = true; 1315 // set md5 of the saved image 1316 savedDigest = new MD5Hash(digester.digest()); 1317 1318 LOG.info("Image file " + newFile + " of size " + newFile.length() 1319 + " bytes saved in " + (monotonicNow() - startTime) / 1000 1320 + " seconds."); 1321 } 1322 1323 /** 1324 * Save children INodes. 1325 * @param children The list of children INodes 1326 * @param out The DataOutputStream to write 1327 * @param inSnapshot Whether the parent directory or its ancestor is in 1328 * the deleted list of some snapshot (caused by rename or 1329 * deletion) 1330 * @param counter Counter to increment for namenode startup progress 1331 * @return Number of children that are directory 1332 */ 1333 private int saveChildren(ReadOnlyList<INode> children, 1334 DataOutputStream out, boolean inSnapshot, Counter counter) 1335 throws IOException { 1336 // Write normal children INode. 1337 out.writeInt(children.size()); 1338 int dirNum = 0; 1339 for(INode child : children) { 1340 // print all children first 1341 // TODO: for HDFS-5428, we cannot change the format/content of fsimage 1342 // here, thus even if the parent directory is in snapshot, we still 1343 // do not handle INodeUC as those stored in deleted list 1344 saveINode2Image(child, out, false, referenceMap, counter); 1345 if (child.isDirectory()) { 1346 dirNum++; 1347 } else if (inSnapshot && child.isFile() 1348 && child.asFile().isUnderConstruction()) { 1349 this.snapshotUCMap.put(child.getId(), child.asFile()); 1350 } 1351 if (checkCancelCounter++ % CHECK_CANCEL_INTERVAL == 0) { 1352 context.checkCancelled(); 1353 } 1354 } 1355 return dirNum; 1356 } 1357 1358 /** 1359 * Save file tree image starting from the given root. 1360 * This is a recursive procedure, which first saves all children and 1361 * snapshot diffs of a current directory and then moves inside the 1362 * sub-directories. 1363 * 1364 * @param current The current node 1365 * @param out The DataoutputStream to write the image 1366 * @param toSaveSubtree Whether or not to save the subtree to fsimage. For 1367 * reference node, its subtree may already have been 1368 * saved before. 1369 * @param inSnapshot Whether the current directory is in snapshot 1370 * @param counter Counter to increment for namenode startup progress 1371 */ 1372 private void saveImage(INodeDirectory current, DataOutputStream out, 1373 boolean toSaveSubtree, boolean inSnapshot, Counter counter) 1374 throws IOException { 1375 // write the inode id of the directory 1376 out.writeLong(current.getId()); 1377 1378 if (!toSaveSubtree) { 1379 return; 1380 } 1381 1382 final ReadOnlyList<INode> children = current 1383 .getChildrenList(Snapshot.CURRENT_STATE_ID); 1384 int dirNum = 0; 1385 List<INodeDirectory> snapshotDirs = null; 1386 DirectoryWithSnapshotFeature sf = current.getDirectoryWithSnapshotFeature(); 1387 if (sf != null) { 1388 snapshotDirs = new ArrayList<INodeDirectory>(); 1389 sf.getSnapshotDirectory(snapshotDirs); 1390 dirNum += snapshotDirs.size(); 1391 } 1392 1393 // 2. Write INodeDirectorySnapshottable#snapshotsByNames to record all 1394 // Snapshots 1395 if (current.isDirectory() && current.asDirectory().isSnapshottable()) { 1396 SnapshotFSImageFormat.saveSnapshots(current.asDirectory(), out); 1397 } else { 1398 out.writeInt(-1); // # of snapshots 1399 } 1400 1401 // 3. Write children INode 1402 dirNum += saveChildren(children, out, inSnapshot, counter); 1403 1404 // 4. Write DirectoryDiff lists, if there is any. 1405 SnapshotFSImageFormat.saveDirectoryDiffList(current, out, referenceMap); 1406 1407 // Write sub-tree of sub-directories, including possible snapshots of 1408 // deleted sub-directories 1409 out.writeInt(dirNum); // the number of sub-directories 1410 for(INode child : children) { 1411 if(!child.isDirectory()) { 1412 continue; 1413 } 1414 // make sure we only save the subtree under a reference node once 1415 boolean toSave = child.isReference() ? 1416 referenceMap.toProcessSubtree(child.getId()) : true; 1417 saveImage(child.asDirectory(), out, toSave, inSnapshot, counter); 1418 } 1419 if (snapshotDirs != null) { 1420 for (INodeDirectory subDir : snapshotDirs) { 1421 // make sure we only save the subtree under a reference node once 1422 boolean toSave = subDir.getParentReference() != null ? 1423 referenceMap.toProcessSubtree(subDir.getId()) : true; 1424 saveImage(subDir, out, toSave, true, counter); 1425 } 1426 } 1427 } 1428 1429 /** 1430 * Saves inode and increments progress counter. 1431 * 1432 * @param inode INode to save 1433 * @param out DataOutputStream to receive inode 1434 * @param writeUnderConstruction boolean true if this is under construction 1435 * @param referenceMap ReferenceMap containing reference inodes 1436 * @param counter Counter to increment for namenode startup progress 1437 * @throws IOException thrown if there is an I/O error 1438 */ 1439 private void saveINode2Image(INode inode, DataOutputStream out, 1440 boolean writeUnderConstruction, ReferenceMap referenceMap, 1441 Counter counter) throws IOException { 1442 FSImageSerialization.saveINode2Image(inode, out, writeUnderConstruction, 1443 referenceMap); 1444 // Intentionally do not increment counter for reference inodes, because it 1445 // is too difficult at this point to assess whether or not this is a 1446 // reference that counts toward quota. 1447 if (!(inode instanceof INodeReference)) { 1448 counter.increment(); 1449 } 1450 } 1451 } 1452}