001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import static org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; 021import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 022import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 023import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; 024import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; 025import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT; 026import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY; 027import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT; 028import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY; 029import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_DEFAULT; 030import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY; 031import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT; 032import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_KEY; 033import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_DEFAULT; 034import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY; 035import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT; 036import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY; 037import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT; 038import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY; 039import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY; 040import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT; 041import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY; 042import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT; 043import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY; 044import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT; 045import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY; 046import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME; 047import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT; 048import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY; 049import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT; 050import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY; 051import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT; 052import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY; 053import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT; 054import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY; 055import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY; 056import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY; 057import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS; 058import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT; 059import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD; 060import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT; 061import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT; 062import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY; 063import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC; 064import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT; 065import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_INODE_ATTRIBUTES_PROVIDER_KEY; 066import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT; 067import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY; 068import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; 069import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT; 070import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY; 071import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY; 072import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT; 073import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY; 074import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT; 075import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY; 076import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT; 077import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY; 078import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY; 079import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT; 080import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY; 081import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT; 082import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY; 083import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; 084import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT; 085import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY; 086import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT; 087import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY; 088import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT; 089import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; 090import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT; 091import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY; 092import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; 093import static org.apache.hadoop.util.Time.now; 094import static org.apache.hadoop.util.Time.monotonicNow; 095import static org.apache.hadoop.hdfs.server.namenode.top.metrics.TopMetrics.TOPMETRICS_METRICS_SOURCE_NAME; 096 097import java.io.BufferedWriter; 098import java.io.ByteArrayInputStream; 099import java.io.DataInput; 100import java.io.DataInputStream; 101import java.io.DataOutputStream; 102import java.io.File; 103import java.io.FileNotFoundException; 104import java.io.FileOutputStream; 105import java.io.IOException; 106import java.io.OutputStreamWriter; 107import java.io.PrintWriter; 108import java.io.StringWriter; 109import java.lang.management.ManagementFactory; 110import java.net.InetAddress; 111import java.net.URI; 112import java.security.GeneralSecurityException; 113import java.util.ArrayList; 114import java.util.Arrays; 115import java.util.Collection; 116import java.util.Collections; 117import java.util.Date; 118import java.util.EnumSet; 119import java.util.HashMap; 120import java.util.HashSet; 121import java.util.Iterator; 122import java.util.LinkedHashSet; 123import java.util.List; 124import java.util.Map; 125import java.util.Set; 126import java.util.TreeMap; 127import java.util.concurrent.TimeUnit; 128import java.util.concurrent.locks.Condition; 129import java.util.concurrent.locks.ReentrantLock; 130import java.util.concurrent.locks.ReentrantReadWriteLock; 131 132import javax.management.NotCompliantMBeanException; 133import javax.management.ObjectName; 134import javax.management.StandardMBean; 135 136import org.apache.commons.logging.Log; 137import org.apache.commons.logging.LogFactory; 138import org.apache.commons.logging.impl.Log4JLogger; 139import org.apache.hadoop.HadoopIllegalArgumentException; 140import org.apache.hadoop.classification.InterfaceAudience; 141import org.apache.hadoop.conf.Configuration; 142import org.apache.hadoop.crypto.CipherSuite; 143import org.apache.hadoop.crypto.CryptoProtocolVersion; 144import org.apache.hadoop.crypto.key.KeyProvider; 145import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; 146import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; 147import org.apache.hadoop.fs.CacheFlag; 148import org.apache.hadoop.fs.ContentSummary; 149import org.apache.hadoop.fs.CreateFlag; 150import org.apache.hadoop.fs.FileAlreadyExistsException; 151import org.apache.hadoop.fs.FileEncryptionInfo; 152import org.apache.hadoop.fs.FileStatus; 153import org.apache.hadoop.fs.FileSystem; 154import org.apache.hadoop.fs.FsServerDefaults; 155import org.apache.hadoop.fs.InvalidPathException; 156import org.apache.hadoop.fs.Options; 157import org.apache.hadoop.fs.ParentNotDirectoryException; 158import org.apache.hadoop.fs.Path; 159import org.apache.hadoop.fs.UnresolvedLinkException; 160import org.apache.hadoop.fs.XAttr; 161import org.apache.hadoop.fs.XAttrSetFlag; 162import org.apache.hadoop.fs.permission.AclEntry; 163import org.apache.hadoop.fs.permission.AclStatus; 164import org.apache.hadoop.fs.permission.FsAction; 165import org.apache.hadoop.fs.permission.FsPermission; 166import org.apache.hadoop.fs.permission.PermissionStatus; 167import org.apache.hadoop.fs.StorageType; 168import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 169import org.apache.hadoop.ha.ServiceFailedException; 170import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; 171import org.apache.hadoop.hdfs.DFSConfigKeys; 172import org.apache.hadoop.hdfs.DFSUtil; 173import org.apache.hadoop.hdfs.HAUtil; 174import org.apache.hadoop.hdfs.HdfsConfiguration; 175import org.apache.hadoop.hdfs.UnknownCryptoProtocolVersionException; 176import org.apache.hadoop.hdfs.XAttrHelper; 177import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; 178import org.apache.hadoop.hdfs.protocol.Block; 179import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; 180import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; 181import org.apache.hadoop.hdfs.protocol.CachePoolEntry; 182import org.apache.hadoop.hdfs.protocol.CachePoolInfo; 183import org.apache.hadoop.hdfs.protocol.ClientProtocol; 184import org.apache.hadoop.hdfs.protocol.DatanodeID; 185import org.apache.hadoop.hdfs.protocol.DatanodeInfo; 186import org.apache.hadoop.hdfs.protocol.DirectoryListing; 187import org.apache.hadoop.hdfs.protocol.EncryptionZone; 188import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 189import org.apache.hadoop.hdfs.protocol.HdfsConstants; 190import org.apache.hadoop.hdfs.protocol.LastBlockWithStatus; 191import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; 192import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; 193import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 194import org.apache.hadoop.hdfs.protocol.LocatedBlock; 195import org.apache.hadoop.hdfs.protocol.LocatedBlocks; 196import org.apache.hadoop.hdfs.protocol.QuotaExceededException; 197import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException; 198import org.apache.hadoop.hdfs.protocol.RollingUpgradeException; 199import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; 200import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException; 201import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; 202import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; 203import org.apache.hadoop.hdfs.protocol.datatransfer.ReplaceDatanodeOnFailure; 204import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; 205import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode; 206import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; 207import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; 208import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState; 209import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection; 210import org.apache.hadoop.hdfs.server.blockmanagement.BlockIdManager; 211import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous; 212import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguousUnderConstruction; 213import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; 214import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; 215import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; 216import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics; 217import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; 218import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; 219import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 220import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 221import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 222import org.apache.hadoop.hdfs.server.common.Storage; 223import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType; 224import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; 225import org.apache.hadoop.hdfs.server.common.Util; 226import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; 227import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; 228import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; 229import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; 230import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; 231import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; 232import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer; 233import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; 234import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer; 235import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean; 236import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 237import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; 238import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager; 239import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; 240import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 241import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; 242import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status; 243import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; 244import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; 245import org.apache.hadoop.hdfs.server.namenode.top.TopAuditLogger; 246import org.apache.hadoop.hdfs.server.namenode.top.TopConf; 247import org.apache.hadoop.hdfs.server.namenode.top.metrics.TopMetrics; 248import org.apache.hadoop.hdfs.server.namenode.top.window.RollingWindowManager; 249import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods; 250import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; 251import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; 252import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; 253import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; 254import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat; 255import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand; 256import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; 257import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 258import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; 259import org.apache.hadoop.hdfs.server.protocol.StorageReport; 260import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; 261import org.apache.hadoop.io.EnumSetWritable; 262import org.apache.hadoop.io.IOUtils; 263import org.apache.hadoop.io.Text; 264import org.apache.hadoop.ipc.RetriableException; 265import org.apache.hadoop.ipc.RetryCache; 266import org.apache.hadoop.ipc.Server; 267import org.apache.hadoop.ipc.StandbyException; 268import org.apache.hadoop.metrics2.annotation.Metric; 269import org.apache.hadoop.metrics2.annotation.Metrics; 270import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 271import org.apache.hadoop.metrics2.lib.MetricsRegistry; 272import org.apache.hadoop.metrics2.lib.MutableRatesWithAggregation; 273import org.apache.hadoop.metrics2.util.MBeans; 274import org.apache.hadoop.net.NetworkTopology; 275import org.apache.hadoop.net.Node; 276import org.apache.hadoop.net.NodeBase; 277import org.apache.hadoop.security.AccessControlException; 278import org.apache.hadoop.security.UserGroupInformation; 279import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; 280import org.apache.hadoop.security.token.SecretManager.InvalidToken; 281import org.apache.hadoop.security.token.Token; 282import org.apache.hadoop.security.token.TokenIdentifier; 283import org.apache.hadoop.security.token.delegation.DelegationKey; 284import org.apache.hadoop.util.ChunkedArrayList; 285import org.apache.hadoop.util.Daemon; 286import org.apache.hadoop.util.DataChecksum; 287import org.apache.hadoop.util.ReflectionUtils; 288import org.apache.hadoop.util.StringUtils; 289import org.apache.hadoop.util.VersionInfo; 290import org.apache.log4j.Appender; 291import org.apache.log4j.AsyncAppender; 292import org.apache.log4j.Logger; 293import org.codehaus.jackson.map.ObjectMapper; 294import org.mortbay.util.ajax.JSON; 295 296import com.google.common.annotations.VisibleForTesting; 297import com.google.common.base.Charsets; 298import com.google.common.base.Preconditions; 299import com.google.common.collect.ImmutableMap; 300import com.google.common.collect.Lists; 301 302/*************************************************** 303 * FSNamesystem does the actual bookkeeping work for the 304 * DataNode. 305 * 306 * It tracks several important tables. 307 * 308 * 1) valid fsname --> blocklist (kept on disk, logged) 309 * 2) Set of all valid blocks (inverted #1) 310 * 3) block --> machinelist (kept in memory, rebuilt dynamically from reports) 311 * 4) machine --> blocklist (inverted #2) 312 * 5) LRU cache of updated-heartbeat machines 313 ***************************************************/ 314@InterfaceAudience.Private 315@Metrics(context="dfs") 316public class FSNamesystem implements Namesystem, FSNamesystemMBean, 317 NameNodeMXBean { 318 public static final Log LOG = LogFactory.getLog(FSNamesystem.class); 319 private final MetricsRegistry registry = new MetricsRegistry("FSNamesystem"); 320 @Metric final MutableRatesWithAggregation detailedLockHoldTimeMetrics = 321 registry.newRatesWithAggregation("detailedLockHoldTimeMetrics"); 322 323 private static final ThreadLocal<StringBuilder> auditBuffer = 324 new ThreadLocal<StringBuilder>() { 325 @Override 326 protected StringBuilder initialValue() { 327 return new StringBuilder(); 328 } 329 }; 330 331 private final BlockIdManager blockIdManager; 332 333 @VisibleForTesting 334 public boolean isAuditEnabled() { 335 return !isDefaultAuditLogger || auditLog.isInfoEnabled(); 336 } 337 338 private void logAuditEvent(boolean succeeded, String cmd, String src) 339 throws IOException { 340 logAuditEvent(succeeded, cmd, src, null, null); 341 } 342 343 private void logAuditEvent(boolean succeeded, String cmd, String src, 344 String dst, HdfsFileStatus stat) throws IOException { 345 if (isAuditEnabled() && isExternalInvocation()) { 346 logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(), 347 cmd, src, dst, stat); 348 } 349 } 350 351 private void logAuditEvent(boolean succeeded, 352 UserGroupInformation ugi, InetAddress addr, String cmd, String src, 353 String dst, HdfsFileStatus stat) { 354 FileStatus status = null; 355 if (stat != null) { 356 Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null; 357 Path path = dst != null ? new Path(dst) : new Path(src); 358 status = new FileStatus(stat.getLen(), stat.isDir(), 359 stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(), 360 stat.getAccessTime(), stat.getPermission(), stat.getOwner(), 361 stat.getGroup(), symlink, path); 362 } 363 for (AuditLogger logger : auditLoggers) { 364 if (logger instanceof HdfsAuditLogger) { 365 HdfsAuditLogger hdfsLogger = (HdfsAuditLogger) logger; 366 hdfsLogger.logAuditEvent(succeeded, ugi.toString(), addr, cmd, src, dst, 367 status, ugi, dtSecretManager); 368 } else { 369 logger.logAuditEvent(succeeded, ugi.toString(), addr, 370 cmd, src, dst, status); 371 } 372 } 373 } 374 375 /** 376 * Logger for audit events, noting successful FSNamesystem operations. Emits 377 * to FSNamesystem.audit at INFO. Each event causes a set of tab-separated 378 * <code>key=value</code> pairs to be written for the following properties: 379 * <code> 380 * ugi=<ugi in RPC> 381 * ip=<remote IP> 382 * cmd=<command> 383 * src=<src path> 384 * dst=<dst path (optional)> 385 * perm=<permissions (optional)> 386 * </code> 387 */ 388 public static final Log auditLog = LogFactory.getLog( 389 FSNamesystem.class.getName() + ".audit"); 390 391 static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100; 392 static int BLOCK_DELETION_INCREMENT = 1000; 393 private final boolean isPermissionEnabled; 394 private final UserGroupInformation fsOwner; 395 private final String supergroup; 396 private final boolean standbyShouldCheckpoint; 397 398 // Scan interval is not configurable. 399 private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL = 400 TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS); 401 final DelegationTokenSecretManager dtSecretManager; 402 private final boolean alwaysUseDelegationTokensForTests; 403 404 private static final Step STEP_AWAITING_REPORTED_BLOCKS = 405 new Step(StepType.AWAITING_REPORTED_BLOCKS); 406 407 // Tracks whether the default audit logger is the only configured audit 408 // logger; this allows isAuditEnabled() to return false in case the 409 // underlying logger is disabled, and avoid some unnecessary work. 410 private final boolean isDefaultAuditLogger; 411 private final List<AuditLogger> auditLoggers; 412 413 /** The namespace tree. */ 414 FSDirectory dir; 415 private final BlockManager blockManager; 416 private final SnapshotManager snapshotManager; 417 private final CacheManager cacheManager; 418 private final DatanodeStatistics datanodeStatistics; 419 420 private String nameserviceId; 421 422 private volatile RollingUpgradeInfo rollingUpgradeInfo = null; 423 /** 424 * A flag that indicates whether the checkpointer should checkpoint a rollback 425 * fsimage. The edit log tailer sets this flag. The checkpoint will create a 426 * rollback fsimage if the flag is true, and then change the flag to false. 427 */ 428 private volatile boolean needRollbackFsImage; 429 430 // Block pool ID used by this namenode 431 private String blockPoolId; 432 433 final LeaseManager leaseManager = new LeaseManager(this); 434 435 volatile Daemon smmthread = null; // SafeModeMonitor thread 436 437 Daemon nnrmthread = null; // NamenodeResourceMonitor thread 438 439 Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread 440 441 // A daemon to periodically clean up corrupt lazyPersist files 442 // from the name space. 443 Daemon lazyPersistFileScrubber = null; 444 /** 445 * When an active namenode will roll its own edit log, in # edits 446 */ 447 private final long editLogRollerThreshold; 448 /** 449 * Check interval of an active namenode's edit log roller thread 450 */ 451 private final int editLogRollerInterval; 452 453 /** 454 * How frequently we scan and unlink corrupt lazyPersist files. 455 * (In seconds) 456 */ 457 private final int lazyPersistFileScrubIntervalSec; 458 459 private volatile boolean hasResourcesAvailable = false; 460 private volatile boolean fsRunning = true; 461 462 /** The start time of the namesystem. */ 463 private final long startTime = now(); 464 465 /** The interval of namenode checking for the disk space availability */ 466 private final long resourceRecheckInterval; 467 468 // The actual resource checker instance. 469 NameNodeResourceChecker nnResourceChecker; 470 471 private final FsServerDefaults serverDefaults; 472 private final boolean supportAppends; 473 private final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure; 474 475 private volatile SafeModeInfo safeMode; // safe mode information 476 477 private final long maxFsObjects; // maximum number of fs objects 478 479 private final long minBlockSize; // minimum block size 480 private final long maxBlocksPerFile; // maximum # of blocks per file 481 482 // precision of access times. 483 private final long accessTimePrecision; 484 485 /** Lock to protect FSNamesystem. */ 486 private final FSNamesystemLock fsLock; 487 488 /** 489 * Checkpoint lock to protect FSNamesystem modification on standby NNs. 490 * Unlike fsLock, it does not affect block updates. On active NNs, this lock 491 * does not provide proper protection, because there are operations that 492 * modify both block and name system state. Even on standby, fsLock is 493 * used when block state changes need to be blocked. 494 */ 495 private final ReentrantLock cpLock; 496 497 /** 498 * Used when this NN is in standby state to read from the shared edit log. 499 */ 500 private EditLogTailer editLogTailer = null; 501 502 /** 503 * Used when this NN is in standby state to perform checkpoints. 504 */ 505 private StandbyCheckpointer standbyCheckpointer; 506 507 /** 508 * Reference to the NN's HAContext object. This is only set once 509 * {@link #startCommonServices(Configuration, HAContext)} is called. 510 */ 511 private HAContext haContext; 512 513 private final boolean haEnabled; 514 515 /** flag indicating whether replication queues have been initialized */ 516 boolean initializedReplQueues = false; 517 518 /** 519 * Whether the namenode is in the middle of starting the active service 520 */ 521 private volatile boolean startingActiveService = false; 522 523 private final RetryCache retryCache; 524 525 private KeyProviderCryptoExtension provider = null; 526 527 private volatile boolean imageLoaded = false; 528 private final Condition cond; 529 530 private final FSImage fsImage; 531 532 private final TopConf topConf; 533 private TopMetrics topMetrics; 534 535 private INodeAttributeProvider inodeAttributeProvider; 536 537 /** 538 * Notify that loading of this FSDirectory is complete, and 539 * it is imageLoaded for use 540 */ 541 void imageLoadComplete() { 542 Preconditions.checkState(!imageLoaded, "FSDirectory already loaded"); 543 setImageLoaded(); 544 } 545 546 void setImageLoaded() { 547 if(imageLoaded) return; 548 writeLock(); 549 try { 550 setImageLoaded(true); 551 dir.markNameCacheInitialized(); 552 cond.signalAll(); 553 } finally { 554 writeUnlock("setImageLoaded"); 555 } 556 } 557 558 //This is for testing purposes only 559 @VisibleForTesting 560 boolean isImageLoaded() { 561 return imageLoaded; 562 } 563 564 // exposed for unit tests 565 protected void setImageLoaded(boolean flag) { 566 imageLoaded = flag; 567 } 568 569 /** 570 * Block until the object is imageLoaded to be used. 571 */ 572 void waitForLoadingFSImage() { 573 if (!imageLoaded) { 574 writeLock(); 575 try { 576 while (!imageLoaded) { 577 try { 578 cond.await(5000, TimeUnit.MILLISECONDS); 579 } catch (InterruptedException ignored) { 580 } 581 } 582 } finally { 583 writeUnlock(); 584 } 585 } 586 } 587 588 /** 589 * Clear all loaded data 590 */ 591 void clear() { 592 dir.reset(); 593 dtSecretManager.reset(); 594 blockIdManager.clear(); 595 leaseManager.removeAllLeases(); 596 snapshotManager.clearSnapshottableDirs(); 597 cacheManager.clear(); 598 setImageLoaded(false); 599 blockManager.clear(); 600 } 601 602 @VisibleForTesting 603 LeaseManager getLeaseManager() { 604 return leaseManager; 605 } 606 607 boolean isHaEnabled() { 608 return haEnabled; 609 } 610 611 /** 612 * Check the supplied configuration for correctness. 613 * @param conf Supplies the configuration to validate. 614 * @throws IOException if the configuration could not be queried. 615 * @throws IllegalArgumentException if the configuration is invalid. 616 */ 617 private static void checkConfiguration(Configuration conf) 618 throws IOException { 619 620 final Collection<URI> namespaceDirs = 621 FSNamesystem.getNamespaceDirs(conf); 622 final Collection<URI> editsDirs = 623 FSNamesystem.getNamespaceEditsDirs(conf); 624 final Collection<URI> requiredEditsDirs = 625 FSNamesystem.getRequiredNamespaceEditsDirs(conf); 626 final Collection<URI> sharedEditsDirs = 627 FSNamesystem.getSharedEditsDirs(conf); 628 629 for (URI u : requiredEditsDirs) { 630 if (u.toString().compareTo( 631 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT) == 0) { 632 continue; 633 } 634 635 // Each required directory must also be in editsDirs or in 636 // sharedEditsDirs. 637 if (!editsDirs.contains(u) && 638 !sharedEditsDirs.contains(u)) { 639 throw new IllegalArgumentException( 640 "Required edits directory " + u.toString() + " not present in " + 641 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + ". " + 642 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY + "=" + 643 editsDirs.toString() + "; " + 644 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY + "=" + 645 requiredEditsDirs.toString() + ". " + 646 DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY + "=" + 647 sharedEditsDirs.toString() + "."); 648 } 649 } 650 651 if (namespaceDirs.size() == 1) { 652 LOG.warn("Only one image storage directory (" 653 + DFS_NAMENODE_NAME_DIR_KEY + ") configured. Beware of data loss" 654 + " due to lack of redundant storage directories!"); 655 } 656 if (editsDirs.size() == 1) { 657 LOG.warn("Only one namespace edits storage directory (" 658 + DFS_NAMENODE_EDITS_DIR_KEY + ") configured. Beware of data loss" 659 + " due to lack of redundant storage directories!"); 660 } 661 } 662 663 /** 664 * Instantiates an FSNamesystem loaded from the image and edits 665 * directories specified in the passed Configuration. 666 * 667 * @param conf the Configuration which specifies the storage directories 668 * from which to load 669 * @return an FSNamesystem which contains the loaded namespace 670 * @throws IOException if loading fails 671 */ 672 static FSNamesystem loadFromDisk(Configuration conf) throws IOException { 673 674 checkConfiguration(conf); 675 FSImage fsImage = new FSImage(conf, 676 FSNamesystem.getNamespaceDirs(conf), 677 FSNamesystem.getNamespaceEditsDirs(conf)); 678 FSNamesystem namesystem = new FSNamesystem(conf, fsImage, false); 679 StartupOption startOpt = NameNode.getStartupOption(conf); 680 if (startOpt == StartupOption.RECOVER) { 681 namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER); 682 } 683 684 long loadStart = monotonicNow(); 685 try { 686 namesystem.loadFSImage(startOpt); 687 } catch (IOException ioe) { 688 LOG.warn("Encountered exception loading fsimage", ioe); 689 fsImage.close(); 690 throw ioe; 691 } 692 long timeTakenToLoadFSImage = monotonicNow() - loadStart; 693 LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs"); 694 NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics(); 695 if (nnMetrics != null) { 696 nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage); 697 } 698 return namesystem; 699 } 700 701 FSNamesystem(Configuration conf, FSImage fsImage) throws IOException { 702 this(conf, fsImage, false); 703 } 704 705 /** 706 * Create an FSNamesystem associated with the specified image. 707 * 708 * Note that this does not load any data off of disk -- if you would 709 * like that behavior, use {@link #loadFromDisk(Configuration)} 710 * 711 * @param conf configuration 712 * @param fsImage The FSImage to associate with 713 * @param ignoreRetryCache Whether or not should ignore the retry cache setup 714 * step. For Secondary NN this should be set to true. 715 * @throws IOException on bad configuration 716 */ 717 FSNamesystem(Configuration conf, FSImage fsImage, boolean ignoreRetryCache) 718 throws IOException { 719 provider = DFSUtil.createKeyProviderCryptoExtension(conf); 720 if (provider == null) { 721 LOG.info("No KeyProvider found."); 722 } else { 723 LOG.info("Found KeyProvider: " + provider.toString()); 724 } 725 if (conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY, 726 DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT)) { 727 LOG.info("Enabling async auditlog"); 728 enableAsyncAuditLog(); 729 } 730 fsLock = new FSNamesystemLock(conf, detailedLockHoldTimeMetrics); 731 cond = fsLock.newWriteLockCondition(); 732 cpLock = new ReentrantLock(); 733 734 this.fsImage = fsImage; 735 try { 736 resourceRecheckInterval = conf.getLong( 737 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 738 DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT); 739 740 this.blockManager = new BlockManager(this, conf); 741 this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics(); 742 this.blockIdManager = new BlockIdManager(blockManager); 743 744 this.fsOwner = UserGroupInformation.getCurrentUser(); 745 this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 746 DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT); 747 this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY, 748 DFS_PERMISSIONS_ENABLED_DEFAULT); 749 LOG.info("fsOwner = " + fsOwner); 750 LOG.info("supergroup = " + supergroup); 751 LOG.info("isPermissionEnabled = " + isPermissionEnabled); 752 753 // block allocation has to be persisted in HA using a shared edits directory 754 // so that the standby has up-to-date namespace information 755 nameserviceId = DFSUtil.getNamenodeNameServiceId(conf); 756 this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId); 757 758 // Sanity check the HA-related config. 759 if (nameserviceId != null) { 760 LOG.info("Determined nameservice ID: " + nameserviceId); 761 } 762 LOG.info("HA Enabled: " + haEnabled); 763 if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) { 764 LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf)); 765 throw new IOException("Invalid configuration: a shared edits dir " + 766 "must not be specified if HA is not enabled."); 767 } 768 769 // Get the checksum type from config 770 String checksumTypeStr = conf.get(DFS_CHECKSUM_TYPE_KEY, DFS_CHECKSUM_TYPE_DEFAULT); 771 DataChecksum.Type checksumType; 772 try { 773 checksumType = DataChecksum.Type.valueOf(checksumTypeStr); 774 } catch (IllegalArgumentException iae) { 775 throw new IOException("Invalid checksum type in " 776 + DFS_CHECKSUM_TYPE_KEY + ": " + checksumTypeStr); 777 } 778 779 this.serverDefaults = new FsServerDefaults( 780 conf.getLongBytes(DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE_DEFAULT), 781 conf.getInt(DFS_BYTES_PER_CHECKSUM_KEY, DFS_BYTES_PER_CHECKSUM_DEFAULT), 782 conf.getInt(DFS_CLIENT_WRITE_PACKET_SIZE_KEY, DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT), 783 (short) conf.getInt(DFS_REPLICATION_KEY, DFS_REPLICATION_DEFAULT), 784 conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT), 785 conf.getBoolean(DFS_ENCRYPT_DATA_TRANSFER_KEY, DFS_ENCRYPT_DATA_TRANSFER_DEFAULT), 786 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT), 787 checksumType); 788 789 this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY, 790 DFS_NAMENODE_MAX_OBJECTS_DEFAULT); 791 792 this.minBlockSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 793 DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_DEFAULT); 794 this.maxBlocksPerFile = conf.getLong(DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY, 795 DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_DEFAULT); 796 this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 797 DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT); 798 this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT); 799 LOG.info("Append Enabled: " + supportAppends); 800 801 this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf); 802 803 this.standbyShouldCheckpoint = conf.getBoolean( 804 DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT); 805 // # edit autoroll threshold is a multiple of the checkpoint threshold 806 this.editLogRollerThreshold = (long) 807 (conf.getFloat( 808 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD, 809 DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) * 810 conf.getLong( 811 DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 812 DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT)); 813 this.editLogRollerInterval = conf.getInt( 814 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS, 815 DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT); 816 817 this.lazyPersistFileScrubIntervalSec = conf.getInt( 818 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC, 819 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT); 820 821 if (this.lazyPersistFileScrubIntervalSec == 0) { 822 throw new IllegalArgumentException( 823 DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC + " must be non-zero."); 824 } 825 826 // For testing purposes, allow the DT secret manager to be started regardless 827 // of whether security is enabled. 828 alwaysUseDelegationTokensForTests = conf.getBoolean( 829 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, 830 DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT); 831 832 this.dtSecretManager = createDelegationTokenSecretManager(conf); 833 this.dir = new FSDirectory(this, conf); 834 this.snapshotManager = new SnapshotManager(dir); 835 this.cacheManager = new CacheManager(this, conf, blockManager); 836 this.safeMode = new SafeModeInfo(conf); 837 this.topConf = new TopConf(conf); 838 this.auditLoggers = initAuditLoggers(conf); 839 this.isDefaultAuditLogger = auditLoggers.size() == 1 && 840 auditLoggers.get(0) instanceof DefaultAuditLogger; 841 this.retryCache = ignoreRetryCache ? null : initRetryCache(conf); 842 Class<? extends INodeAttributeProvider> klass = conf.getClass( 843 DFS_NAMENODE_INODE_ATTRIBUTES_PROVIDER_KEY, 844 null, INodeAttributeProvider.class); 845 if (klass != null) { 846 inodeAttributeProvider = ReflectionUtils.newInstance(klass, conf); 847 LOG.info("Using INode attribute provider: " + klass.getName()); 848 } 849 } catch(IOException e) { 850 LOG.error(getClass().getSimpleName() + " initialization failed.", e); 851 close(); 852 throw e; 853 } catch (RuntimeException re) { 854 LOG.error(getClass().getSimpleName() + " initialization failed.", re); 855 close(); 856 throw re; 857 } 858 } 859 860 @VisibleForTesting 861 public List<AuditLogger> getAuditLoggers() { 862 return auditLoggers; 863 } 864 865 @VisibleForTesting 866 public RetryCache getRetryCache() { 867 return retryCache; 868 } 869 870 void lockRetryCache() { 871 if (retryCache != null) { 872 retryCache.lock(); 873 } 874 } 875 876 void unlockRetryCache() { 877 if (retryCache != null) { 878 retryCache.unlock(); 879 } 880 } 881 882 /** Whether or not retry cache is enabled */ 883 boolean hasRetryCache() { 884 return retryCache != null; 885 } 886 887 void addCacheEntryWithPayload(byte[] clientId, int callId, Object payload) { 888 if (retryCache != null) { 889 retryCache.addCacheEntryWithPayload(clientId, callId, payload); 890 } 891 } 892 893 void addCacheEntry(byte[] clientId, int callId) { 894 if (retryCache != null) { 895 retryCache.addCacheEntry(clientId, callId); 896 } 897 } 898 899 @VisibleForTesting 900 public KeyProviderCryptoExtension getProvider() { 901 return provider; 902 } 903 904 @VisibleForTesting 905 static RetryCache initRetryCache(Configuration conf) { 906 boolean enable = conf.getBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, 907 DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT); 908 LOG.info("Retry cache on namenode is " + (enable ? "enabled" : "disabled")); 909 if (enable) { 910 float heapPercent = conf.getFloat( 911 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY, 912 DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT); 913 long entryExpiryMillis = conf.getLong( 914 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_KEY, 915 DFS_NAMENODE_RETRY_CACHE_EXPIRYTIME_MILLIS_DEFAULT); 916 LOG.info("Retry cache will use " + heapPercent 917 + " of total heap and retry cache entry expiry time is " 918 + entryExpiryMillis + " millis"); 919 long entryExpiryNanos = entryExpiryMillis * 1000 * 1000; 920 return new RetryCache("NameNodeRetryCache", heapPercent, 921 entryExpiryNanos); 922 } 923 return null; 924 } 925 926 private List<AuditLogger> initAuditLoggers(Configuration conf) { 927 // Initialize the custom access loggers if configured. 928 Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY); 929 List<AuditLogger> auditLoggers = Lists.newArrayList(); 930 if (alClasses != null && !alClasses.isEmpty()) { 931 for (String className : alClasses) { 932 try { 933 AuditLogger logger; 934 if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) { 935 logger = new DefaultAuditLogger(); 936 } else { 937 logger = (AuditLogger) Class.forName(className).newInstance(); 938 } 939 logger.initialize(conf); 940 auditLoggers.add(logger); 941 } catch (RuntimeException re) { 942 throw re; 943 } catch (Exception e) { 944 throw new RuntimeException(e); 945 } 946 } 947 } 948 949 // Make sure there is at least one logger installed. 950 if (auditLoggers.isEmpty()) { 951 auditLoggers.add(new DefaultAuditLogger()); 952 } 953 954 // Add audit logger to calculate top users 955 if (topConf.isEnabled) { 956 topMetrics = new TopMetrics(conf, topConf.nntopReportingPeriodsMs); 957 if (DefaultMetricsSystem.instance().getSource( 958 TOPMETRICS_METRICS_SOURCE_NAME) == null) { 959 DefaultMetricsSystem.instance().register(TOPMETRICS_METRICS_SOURCE_NAME, 960 "Top N operations by user", topMetrics); 961 } 962 auditLoggers.add(new TopAuditLogger(topMetrics)); 963 } 964 965 return Collections.unmodifiableList(auditLoggers); 966 } 967 968 private void loadFSImage(StartupOption startOpt) throws IOException { 969 final FSImage fsImage = getFSImage(); 970 971 // format before starting up if requested 972 if (startOpt == StartupOption.FORMAT) { 973 974 fsImage.format(this, fsImage.getStorage().determineClusterId());// reuse current id 975 976 startOpt = StartupOption.REGULAR; 977 } 978 boolean success = false; 979 writeLock(); 980 try { 981 // We shouldn't be calling saveNamespace if we've come up in standby state. 982 MetaRecoveryContext recovery = startOpt.createRecoveryContext(); 983 final boolean staleImage 984 = fsImage.recoverTransitionRead(startOpt, this, recovery); 985 if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt) || 986 RollingUpgradeStartupOption.DOWNGRADE.matches(startOpt)) { 987 rollingUpgradeInfo = null; 988 } 989 final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade(); 990 LOG.info("Need to save fs image? " + needToSave 991 + " (staleImage=" + staleImage + ", haEnabled=" + haEnabled 992 + ", isRollingUpgrade=" + isRollingUpgrade() + ")"); 993 if (needToSave) { 994 fsImage.saveNamespace(this); 995 } else { 996 updateStorageVersionForRollingUpgrade(fsImage.getLayoutVersion(), 997 startOpt); 998 // No need to save, so mark the phase done. 999 StartupProgress prog = NameNode.getStartupProgress(); 1000 prog.beginPhase(Phase.SAVING_CHECKPOINT); 1001 prog.endPhase(Phase.SAVING_CHECKPOINT); 1002 } 1003 // This will start a new log segment and write to the seen_txid file, so 1004 // we shouldn't do it when coming up in standby state 1005 if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE) 1006 || (haEnabled && startOpt == StartupOption.UPGRADEONLY)) { 1007 fsImage.openEditLogForWrite(); 1008 } 1009 success = true; 1010 } finally { 1011 if (!success) { 1012 fsImage.close(); 1013 } 1014 writeUnlock("loadFSImage"); 1015 } 1016 imageLoadComplete(); 1017 } 1018 1019 private void updateStorageVersionForRollingUpgrade(final long layoutVersion, 1020 StartupOption startOpt) throws IOException { 1021 boolean rollingStarted = RollingUpgradeStartupOption.STARTED 1022 .matches(startOpt) && layoutVersion > HdfsConstants 1023 .NAMENODE_LAYOUT_VERSION; 1024 boolean rollingRollback = RollingUpgradeStartupOption.ROLLBACK 1025 .matches(startOpt); 1026 if (rollingRollback || rollingStarted) { 1027 fsImage.updateStorageVersion(); 1028 } 1029 } 1030 1031 private void startSecretManager() { 1032 if (dtSecretManager != null) { 1033 try { 1034 dtSecretManager.startThreads(); 1035 } catch (IOException e) { 1036 // Inability to start secret manager 1037 // can't be recovered from. 1038 throw new RuntimeException(e); 1039 } 1040 } 1041 } 1042 1043 private void startSecretManagerIfNecessary() { 1044 boolean shouldRun = shouldUseDelegationTokens() && 1045 !isInSafeMode() && getEditLog().isOpenForWrite(); 1046 boolean running = dtSecretManager.isRunning(); 1047 if (shouldRun && !running) { 1048 startSecretManager(); 1049 } 1050 } 1051 1052 private void stopSecretManager() { 1053 if (dtSecretManager != null) { 1054 dtSecretManager.stopThreads(); 1055 } 1056 } 1057 1058 /** 1059 * Start services common to both active and standby states 1060 */ 1061 void startCommonServices(Configuration conf, HAContext haContext) throws IOException { 1062 this.registerMBean(); // register the MBean for the FSNamesystemState 1063 writeLock(); 1064 this.haContext = haContext; 1065 try { 1066 nnResourceChecker = new NameNodeResourceChecker(conf); 1067 checkAvailableResources(); 1068 assert safeMode != null && !isPopulatingReplQueues(); 1069 StartupProgress prog = NameNode.getStartupProgress(); 1070 prog.beginPhase(Phase.SAFEMODE); 1071 prog.setTotal(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS, 1072 getCompleteBlocksTotal()); 1073 setBlockTotal(); 1074 blockManager.activate(conf); 1075 } finally { 1076 writeUnlock("startCommonServices"); 1077 } 1078 1079 registerMXBean(); 1080 DefaultMetricsSystem.instance().register(this); 1081 if (inodeAttributeProvider != null) { 1082 inodeAttributeProvider.start(); 1083 dir.setINodeAttributeProvider(inodeAttributeProvider); 1084 } 1085 snapshotManager.registerMXBean(); 1086 } 1087 1088 /** 1089 * Stop services common to both active and standby states 1090 */ 1091 void stopCommonServices() { 1092 writeLock(); 1093 if (inodeAttributeProvider != null) { 1094 dir.setINodeAttributeProvider(null); 1095 inodeAttributeProvider.stop(); 1096 } 1097 try { 1098 if (blockManager != null) blockManager.close(); 1099 } finally { 1100 writeUnlock("stopCommonServices"); 1101 } 1102 RetryCache.clear(retryCache); 1103 } 1104 1105 /** 1106 * Start services required in active state 1107 * @throws IOException 1108 */ 1109 void startActiveServices() throws IOException { 1110 startingActiveService = true; 1111 LOG.info("Starting services required for active state"); 1112 writeLock(); 1113 try { 1114 FSEditLog editLog = getFSImage().getEditLog(); 1115 1116 if (!editLog.isOpenForWrite()) { 1117 // During startup, we're already open for write during initialization. 1118 editLog.initJournalsForWrite(); 1119 // May need to recover 1120 editLog.recoverUnclosedStreams(); 1121 1122 LOG.info("Catching up to latest edits from old active before " + 1123 "taking over writer role in edits logs"); 1124 editLogTailer.catchupDuringFailover(); 1125 1126 blockManager.setPostponeBlocksFromFuture(false); 1127 blockManager.getDatanodeManager().markAllDatanodesStale(); 1128 blockManager.clearQueues(); 1129 blockManager.processAllPendingDNMessages(); 1130 1131 // Only need to re-process the queue, If not in SafeMode. 1132 if (!isInSafeMode()) { 1133 LOG.info("Reprocessing replication and invalidation queues"); 1134 initializeReplQueues(); 1135 } 1136 1137 if (LOG.isDebugEnabled()) { 1138 LOG.debug("NameNode metadata after re-processing " + 1139 "replication and invalidation queues during failover:\n" + 1140 metaSaveAsString()); 1141 } 1142 1143 long nextTxId = getFSImage().getLastAppliedTxId() + 1; 1144 LOG.info("Will take over writing edit logs at txnid " + 1145 nextTxId); 1146 editLog.setNextTxId(nextTxId); 1147 1148 getFSImage().editLog.openForWrite(); 1149 } 1150 1151 // Enable quota checks. 1152 dir.enableQuotaChecks(); 1153 if (haEnabled) { 1154 // Renew all of the leases before becoming active. 1155 // This is because, while we were in standby mode, 1156 // the leases weren't getting renewed on this NN. 1157 // Give them all a fresh start here. 1158 leaseManager.renewAllLeases(); 1159 } 1160 leaseManager.startMonitor(); 1161 startSecretManagerIfNecessary(); 1162 1163 //ResourceMonitor required only at ActiveNN. See HDFS-2914 1164 this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); 1165 nnrmthread.start(); 1166 1167 nnEditLogRoller = new Daemon(new NameNodeEditLogRoller( 1168 editLogRollerThreshold, editLogRollerInterval)); 1169 nnEditLogRoller.start(); 1170 1171 if (lazyPersistFileScrubIntervalSec > 0) { 1172 lazyPersistFileScrubber = new Daemon(new LazyPersistFileScrubber( 1173 lazyPersistFileScrubIntervalSec)); 1174 lazyPersistFileScrubber.start(); 1175 } 1176 1177 cacheManager.startMonitorThread(); 1178 blockManager.getDatanodeManager().setShouldSendCachingCommands(true); 1179 } finally { 1180 startingActiveService = false; 1181 checkSafeMode(); 1182 writeUnlock("startActiveServices"); 1183 } 1184 } 1185 1186 /** 1187 * Initialize replication queues. 1188 */ 1189 private void initializeReplQueues() { 1190 LOG.info("initializing replication queues"); 1191 blockManager.processMisReplicatedBlocks(); 1192 initializedReplQueues = true; 1193 } 1194 1195 private boolean inActiveState() { 1196 return haContext != null && 1197 haContext.getState().getServiceState() == HAServiceState.ACTIVE; 1198 } 1199 1200 /** 1201 * @return Whether the namenode is transitioning to active state and is in the 1202 * middle of the {@link #startActiveServices()} 1203 */ 1204 public boolean inTransitionToActive() { 1205 return haEnabled && inActiveState() && startingActiveService; 1206 } 1207 1208 private boolean shouldUseDelegationTokens() { 1209 return UserGroupInformation.isSecurityEnabled() || 1210 alwaysUseDelegationTokensForTests; 1211 } 1212 1213 /** 1214 * Stop services required in active state 1215 */ 1216 void stopActiveServices() { 1217 LOG.info("Stopping services started for active state"); 1218 writeLock(); 1219 try { 1220 stopSecretManager(); 1221 leaseManager.stopMonitor(); 1222 if (nnrmthread != null) { 1223 ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor(); 1224 nnrmthread.interrupt(); 1225 } 1226 if (nnEditLogRoller != null) { 1227 ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop(); 1228 nnEditLogRoller.interrupt(); 1229 } 1230 if (lazyPersistFileScrubber != null) { 1231 ((LazyPersistFileScrubber) lazyPersistFileScrubber.getRunnable()).stop(); 1232 lazyPersistFileScrubber.interrupt(); 1233 } 1234 if (dir != null && getFSImage() != null) { 1235 if (getFSImage().editLog != null) { 1236 getFSImage().editLog.close(); 1237 } 1238 // Update the fsimage with the last txid that we wrote 1239 // so that the tailer starts from the right spot. 1240 getFSImage().updateLastAppliedTxIdFromWritten(); 1241 } 1242 if (cacheManager != null) { 1243 cacheManager.stopMonitorThread(); 1244 cacheManager.clearDirectiveStats(); 1245 } 1246 blockManager.getDatanodeManager().clearPendingCachingCommands(); 1247 blockManager.getDatanodeManager().setShouldSendCachingCommands(false); 1248 // Don't want to keep replication queues when not in Active. 1249 blockManager.clearQueues(); 1250 initializedReplQueues = false; 1251 } finally { 1252 writeUnlock("stopActiveServices"); 1253 } 1254 } 1255 1256 /** 1257 * Start services required in standby state 1258 * 1259 * @throws IOException 1260 */ 1261 void startStandbyServices(final Configuration conf) throws IOException { 1262 LOG.info("Starting services required for standby state"); 1263 if (!getFSImage().editLog.isOpenForRead()) { 1264 // During startup, we're already open for read. 1265 getFSImage().editLog.initSharedJournalsForRead(); 1266 } 1267 1268 blockManager.setPostponeBlocksFromFuture(true); 1269 1270 // Disable quota checks while in standby. 1271 dir.disableQuotaChecks(); 1272 editLogTailer = new EditLogTailer(this, conf); 1273 editLogTailer.start(); 1274 if (standbyShouldCheckpoint) { 1275 standbyCheckpointer = new StandbyCheckpointer(conf, this); 1276 standbyCheckpointer.start(); 1277 } 1278 } 1279 1280 /** 1281 * Called when the NN is in Standby state and the editlog tailer tails the 1282 * OP_ROLLING_UPGRADE_START. 1283 */ 1284 void triggerRollbackCheckpoint() { 1285 setNeedRollbackFsImage(true); 1286 if (standbyCheckpointer != null) { 1287 standbyCheckpointer.triggerRollbackCheckpoint(); 1288 } 1289 } 1290 1291 /** 1292 * Called while the NN is in Standby state, but just about to be 1293 * asked to enter Active state. This cancels any checkpoints 1294 * currently being taken. 1295 */ 1296 void prepareToStopStandbyServices() throws ServiceFailedException { 1297 if (standbyCheckpointer != null) { 1298 standbyCheckpointer.cancelAndPreventCheckpoints( 1299 "About to leave standby state"); 1300 } 1301 } 1302 1303 /** Stop services required in standby state */ 1304 void stopStandbyServices() throws IOException { 1305 LOG.info("Stopping services started for standby state"); 1306 if (standbyCheckpointer != null) { 1307 standbyCheckpointer.stop(); 1308 } 1309 if (editLogTailer != null) { 1310 editLogTailer.stop(); 1311 } 1312 if (dir != null && getFSImage() != null && getFSImage().editLog != null) { 1313 getFSImage().editLog.close(); 1314 } 1315 } 1316 1317 @Override 1318 public void checkOperation(OperationCategory op) throws StandbyException { 1319 if (haContext != null) { 1320 // null in some unit tests 1321 haContext.checkOperation(op); 1322 } 1323 } 1324 1325 /** 1326 * @throws RetriableException 1327 * If 1) The NameNode is in SafeMode, 2) HA is enabled, and 3) 1328 * NameNode is in active state 1329 * @throws SafeModeException 1330 * Otherwise if NameNode is in SafeMode. 1331 */ 1332 void checkNameNodeSafeMode(String errorMsg) 1333 throws RetriableException, SafeModeException { 1334 if (isInSafeMode()) { 1335 SafeModeException se = new SafeModeException(errorMsg, safeMode); 1336 if (haEnabled && haContext != null 1337 && haContext.getState().getServiceState() == HAServiceState.ACTIVE 1338 && shouldRetrySafeMode(this.safeMode)) { 1339 throw new RetriableException(se); 1340 } else { 1341 throw se; 1342 } 1343 } 1344 } 1345 1346 boolean isPermissionEnabled() { 1347 return isPermissionEnabled; 1348 } 1349 1350 /** 1351 * We already know that the safemode is on. We will throw a RetriableException 1352 * if the safemode is not manual or caused by low resource. 1353 */ 1354 private boolean shouldRetrySafeMode(SafeModeInfo safeMode) { 1355 if (safeMode == null) { 1356 return false; 1357 } else { 1358 return !safeMode.isManual() && !safeMode.areResourcesLow(); 1359 } 1360 } 1361 1362 public static Collection<URI> getNamespaceDirs(Configuration conf) { 1363 return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY); 1364 } 1365 1366 /** 1367 * Get all edits dirs which are required. If any shared edits dirs are 1368 * configured, these are also included in the set of required dirs. 1369 * 1370 * @param conf the HDFS configuration. 1371 * @return all required dirs. 1372 */ 1373 public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) { 1374 Set<URI> ret = new HashSet<URI>(); 1375 ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY)); 1376 ret.addAll(getSharedEditsDirs(conf)); 1377 return ret; 1378 } 1379 1380 private static Collection<URI> getStorageDirs(Configuration conf, 1381 String propertyName) { 1382 Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName); 1383 StartupOption startOpt = NameNode.getStartupOption(conf); 1384 if(startOpt == StartupOption.IMPORT) { 1385 // In case of IMPORT this will get rid of default directories 1386 // but will retain directories specified in hdfs-site.xml 1387 // When importing image from a checkpoint, the name-node can 1388 // start with empty set of storage directories. 1389 Configuration cE = new HdfsConfiguration(false); 1390 cE.addResource("core-default.xml"); 1391 cE.addResource("core-site.xml"); 1392 cE.addResource("hdfs-default.xml"); 1393 Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName); 1394 dirNames.removeAll(dirNames2); 1395 if(dirNames.isEmpty()) 1396 LOG.warn("!!! WARNING !!!" + 1397 "\n\tThe NameNode currently runs without persistent storage." + 1398 "\n\tAny changes to the file system meta-data may be lost." + 1399 "\n\tRecommended actions:" + 1400 "\n\t\t- shutdown and restart NameNode with configured \"" 1401 + propertyName + "\" in hdfs-site.xml;" + 1402 "\n\t\t- use Backup Node as a persistent and up-to-date storage " + 1403 "of the file system meta-data."); 1404 } else if (dirNames.isEmpty()) { 1405 dirNames = Collections.singletonList( 1406 DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_DEFAULT); 1407 } 1408 return Util.stringCollectionAsURIs(dirNames); 1409 } 1410 1411 /** 1412 * Return an ordered list of edits directories to write to. 1413 * The list is ordered such that all shared edits directories 1414 * are ordered before non-shared directories, and any duplicates 1415 * are removed. The order they are specified in the configuration 1416 * is retained. 1417 * @return Collection of shared edits directories. 1418 * @throws IOException if multiple shared edits directories are configured 1419 */ 1420 public static List<URI> getNamespaceEditsDirs(Configuration conf) 1421 throws IOException { 1422 return getNamespaceEditsDirs(conf, true); 1423 } 1424 1425 public static List<URI> getNamespaceEditsDirs(Configuration conf, 1426 boolean includeShared) 1427 throws IOException { 1428 // Use a LinkedHashSet so that order is maintained while we de-dup 1429 // the entries. 1430 LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>(); 1431 1432 if (includeShared) { 1433 List<URI> sharedDirs = getSharedEditsDirs(conf); 1434 1435 // Fail until multiple shared edits directories are supported (HDFS-2782) 1436 if (sharedDirs.size() > 1) { 1437 throw new IOException( 1438 "Multiple shared edits directories are not yet supported"); 1439 } 1440 1441 // First add the shared edits dirs. It's critical that the shared dirs 1442 // are added first, since JournalSet syncs them in the order they are listed, 1443 // and we need to make sure all edits are in place in the shared storage 1444 // before they are replicated locally. See HDFS-2874. 1445 for (URI dir : sharedDirs) { 1446 if (!editsDirs.add(dir)) { 1447 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1448 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates."); 1449 } 1450 } 1451 } 1452 // Now add the non-shared dirs. 1453 for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) { 1454 if (!editsDirs.add(dir)) { 1455 LOG.warn("Edits URI " + dir + " listed multiple times in " + 1456 DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " + 1457 DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates."); 1458 } 1459 } 1460 1461 if (editsDirs.isEmpty()) { 1462 // If this is the case, no edit dirs have been explicitly configured. 1463 // Image dirs are to be used for edits too. 1464 return Lists.newArrayList(getNamespaceDirs(conf)); 1465 } else { 1466 return Lists.newArrayList(editsDirs); 1467 } 1468 } 1469 1470 /** 1471 * Returns edit directories that are shared between primary and secondary. 1472 * @param conf configuration 1473 * @return collection of edit directories from {@code conf} 1474 */ 1475 public static List<URI> getSharedEditsDirs(Configuration conf) { 1476 // don't use getStorageDirs here, because we want an empty default 1477 // rather than the dir in /tmp 1478 Collection<String> dirNames = conf.getTrimmedStringCollection( 1479 DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 1480 return Util.stringCollectionAsURIs(dirNames); 1481 } 1482 1483 @Override 1484 public void readLock() { 1485 this.fsLock.readLock(); 1486 } 1487 @Override 1488 public void readUnlock() { 1489 this.fsLock.readUnlock(); 1490 } 1491 public void readUnlock(String opName) { 1492 this.fsLock.readUnlock(opName); 1493 } 1494 @Override 1495 public void writeLock() { 1496 this.fsLock.writeLock(); 1497 } 1498 @Override 1499 public void writeLockInterruptibly() throws InterruptedException { 1500 this.fsLock.writeLockInterruptibly(); 1501 } 1502 @Override 1503 public void writeUnlock() { 1504 this.fsLock.writeUnlock(); 1505 } 1506 public void writeUnlock(String opName) { 1507 this.fsLock.writeUnlock(opName); 1508 } 1509 @Override 1510 public boolean hasWriteLock() { 1511 return this.fsLock.isWriteLockedByCurrentThread(); 1512 } 1513 @Override 1514 public boolean hasReadLock() { 1515 return this.fsLock.getReadHoldCount() > 0 || hasWriteLock(); 1516 } 1517 1518 public int getReadHoldCount() { 1519 return this.fsLock.getReadHoldCount(); 1520 } 1521 1522 public int getWriteHoldCount() { 1523 return this.fsLock.getWriteHoldCount(); 1524 } 1525 1526 /** Lock the checkpoint lock */ 1527 public void cpLock() { 1528 this.cpLock.lock(); 1529 } 1530 1531 /** Lock the checkpoint lock interrupibly */ 1532 public void cpLockInterruptibly() throws InterruptedException { 1533 this.cpLock.lockInterruptibly(); 1534 } 1535 1536 /** Unlock the checkpoint lock */ 1537 public void cpUnlock() { 1538 this.cpLock.unlock(); 1539 } 1540 1541 1542 NamespaceInfo getNamespaceInfo() { 1543 readLock(); 1544 try { 1545 return unprotectedGetNamespaceInfo(); 1546 } finally { 1547 readUnlock("getNamespaceInfo"); 1548 } 1549 } 1550 1551 /** 1552 * Version of @see #getNamespaceInfo() that is not protected by a lock. 1553 */ 1554 NamespaceInfo unprotectedGetNamespaceInfo() { 1555 return new NamespaceInfo(getFSImage().getStorage().getNamespaceID(), 1556 getClusterId(), getBlockPoolId(), 1557 getFSImage().getStorage().getCTime()); 1558 } 1559 1560 /** 1561 * Close down this file system manager. 1562 * Causes heartbeat and lease daemons to stop; waits briefly for 1563 * them to finish, but a short timeout returns control back to caller. 1564 */ 1565 void close() { 1566 fsRunning = false; 1567 try { 1568 stopCommonServices(); 1569 if (smmthread != null) smmthread.interrupt(); 1570 } finally { 1571 // using finally to ensure we also wait for lease daemon 1572 try { 1573 stopActiveServices(); 1574 stopStandbyServices(); 1575 } catch (IOException ie) { 1576 } finally { 1577 IOUtils.cleanup(LOG, dir); 1578 IOUtils.cleanup(LOG, fsImage); 1579 } 1580 } 1581 } 1582 1583 @Override 1584 public boolean isRunning() { 1585 return fsRunning; 1586 } 1587 1588 @Override 1589 public boolean isInStandbyState() { 1590 if (haContext == null || haContext.getState() == null) { 1591 // We're still starting up. In this case, if HA is 1592 // on for the cluster, we always start in standby. Otherwise 1593 // start in active. 1594 return haEnabled; 1595 } 1596 1597 return HAServiceState.STANDBY == haContext.getState().getServiceState(); 1598 } 1599 1600 /** 1601 * Dump all metadata into specified file 1602 */ 1603 void metaSave(String filename) throws IOException { 1604 checkSuperuserPrivilege(); 1605 checkOperation(OperationCategory.UNCHECKED); 1606 writeLock(); 1607 try { 1608 checkOperation(OperationCategory.UNCHECKED); 1609 File file = new File(System.getProperty("hadoop.log.dir"), filename); 1610 PrintWriter out = new PrintWriter(new BufferedWriter( 1611 new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8))); 1612 metaSave(out); 1613 out.flush(); 1614 out.close(); 1615 } finally { 1616 writeUnlock("metaSave"); 1617 } 1618 } 1619 1620 private void metaSave(PrintWriter out) { 1621 assert hasWriteLock(); 1622 long totalInodes = this.dir.totalInodes(); 1623 long totalBlocks = this.getBlocksTotal(); 1624 out.println(totalInodes + " files and directories, " + totalBlocks 1625 + " blocks = " + (totalInodes + totalBlocks) + " total"); 1626 1627 blockManager.metaSave(out); 1628 } 1629 1630 private String metaSaveAsString() { 1631 StringWriter sw = new StringWriter(); 1632 PrintWriter pw = new PrintWriter(sw); 1633 metaSave(pw); 1634 pw.flush(); 1635 return sw.toString(); 1636 } 1637 1638 FsServerDefaults getServerDefaults() throws StandbyException { 1639 checkOperation(OperationCategory.READ); 1640 return serverDefaults; 1641 } 1642 1643 long getAccessTimePrecision() { 1644 return accessTimePrecision; 1645 } 1646 1647 private boolean isAccessTimeSupported() { 1648 return accessTimePrecision > 0; 1649 } 1650 1651 ///////////////////////////////////////////////////////// 1652 // 1653 // These methods are called by HadoopFS clients 1654 // 1655 ///////////////////////////////////////////////////////// 1656 /** 1657 * Set permissions for an existing file. 1658 * @throws IOException 1659 */ 1660 void setPermission(String src, FsPermission permission) throws IOException { 1661 final String operationName = "setPermission"; 1662 HdfsFileStatus auditStat; 1663 checkOperation(OperationCategory.WRITE); 1664 writeLock(); 1665 try { 1666 checkOperation(OperationCategory.WRITE); 1667 checkNameNodeSafeMode("Cannot set permission for " + src); 1668 auditStat = FSDirAttrOp.setPermission(dir, src, permission); 1669 } catch (AccessControlException e) { 1670 logAuditEvent(false, operationName, src); 1671 throw e; 1672 } finally { 1673 writeUnlock(operationName); 1674 } 1675 getEditLog().logSync(); 1676 logAuditEvent(true, operationName, src, null, auditStat); 1677 } 1678 1679 /** 1680 * Set owner for an existing file. 1681 * @throws IOException 1682 */ 1683 void setOwner(String src, String username, String group) 1684 throws IOException { 1685 final String operationName = "setOwner"; 1686 HdfsFileStatus auditStat; 1687 checkOperation(OperationCategory.WRITE); 1688 writeLock(); 1689 try { 1690 checkOperation(OperationCategory.WRITE); 1691 checkNameNodeSafeMode("Cannot set owner for " + src); 1692 auditStat = FSDirAttrOp.setOwner(dir, src, username, group); 1693 } catch (AccessControlException e) { 1694 logAuditEvent(false, operationName, src); 1695 throw e; 1696 } finally { 1697 writeUnlock(operationName); 1698 } 1699 getEditLog().logSync(); 1700 logAuditEvent(true, operationName, src, null, auditStat); 1701 } 1702 1703 static class GetBlockLocationsResult { 1704 final boolean updateAccessTime; 1705 final LocatedBlocks blocks; 1706 boolean updateAccessTime() { 1707 return updateAccessTime; 1708 } 1709 private GetBlockLocationsResult( 1710 boolean updateAccessTime, LocatedBlocks blocks) { 1711 this.updateAccessTime = updateAccessTime; 1712 this.blocks = blocks; 1713 } 1714 } 1715 1716 /** 1717 * Get block locations within the specified range. 1718 * @see ClientProtocol#getBlockLocations(String, long, long) 1719 */ 1720 LocatedBlocks getBlockLocations(String clientMachine, String srcArg, 1721 long offset, long length) throws IOException { 1722 final String operationName = "open"; 1723 checkOperation(OperationCategory.READ); 1724 GetBlockLocationsResult res = null; 1725 FSPermissionChecker pc = getPermissionChecker(); 1726 readLock(); 1727 try { 1728 checkOperation(OperationCategory.READ); 1729 res = getBlockLocations(pc, srcArg, offset, length, true, true); 1730 } catch (AccessControlException e) { 1731 logAuditEvent(false, operationName, srcArg); 1732 throw e; 1733 } finally { 1734 readUnlock(operationName); 1735 } 1736 1737 logAuditEvent(true, operationName, srcArg); 1738 1739 if (res.updateAccessTime()) { 1740 String src = srcArg; 1741 writeLock(); 1742 final long now = now(); 1743 try { 1744 checkOperation(OperationCategory.WRITE); 1745 /** 1746 * Resolve the path again and update the atime only when the file 1747 * exists. 1748 * 1749 * XXX: Races can still occur even after resolving the path again. 1750 * For example: 1751 * 1752 * <ul> 1753 * <li>Get the block location for "/a/b"</li> 1754 * <li>Rename "/a/b" to "/c/b"</li> 1755 * <li>The second resolution still points to "/a/b", which is 1756 * wrong.</li> 1757 * </ul> 1758 * 1759 * The behavior is incorrect but consistent with the one before 1760 * HDFS-7463. A better fix is to change the edit log of SetTime to 1761 * use inode id instead of a path. 1762 */ 1763 final INodesInPath iip = dir.resolvePath(pc, src); 1764 src = iip.getPath(); 1765 INode inode = iip.getLastINode(); 1766 boolean updateAccessTime = inode != null && 1767 now > inode.getAccessTime() + getAccessTimePrecision(); 1768 if (!isInSafeMode() && updateAccessTime) { 1769 boolean changed = FSDirAttrOp.setTimes(dir, 1770 inode, -1, now, false, iip.getLatestSnapshotId()); 1771 if (changed) { 1772 getEditLog().logTimes(src, -1, now); 1773 } 1774 } 1775 } catch (Throwable e) { 1776 LOG.warn("Failed to update the access time of " + src, e); 1777 } finally { 1778 writeUnlock(operationName); 1779 } 1780 } 1781 1782 LocatedBlocks blocks = res.blocks; 1783 if (blocks != null) { 1784 blockManager.getDatanodeManager().sortLocatedBlocks( 1785 clientMachine, blocks.getLocatedBlocks()); 1786 1787 // lastBlock is not part of getLocatedBlocks(), might need to sort it too 1788 LocatedBlock lastBlock = blocks.getLastLocatedBlock(); 1789 if (lastBlock != null) { 1790 ArrayList<LocatedBlock> lastBlockList = Lists.newArrayList(lastBlock); 1791 blockManager.getDatanodeManager().sortLocatedBlocks( 1792 clientMachine, lastBlockList); 1793 } 1794 } 1795 return blocks; 1796 } 1797 1798 /** 1799 * Get block locations within the specified range. 1800 * @see ClientProtocol#getBlockLocations(String, long, long) 1801 * @throws IOException 1802 */ 1803 GetBlockLocationsResult getBlockLocations( 1804 FSPermissionChecker pc, String src, long offset, long length, 1805 boolean needBlockToken, boolean checkSafeMode) throws IOException { 1806 if (offset < 0) { 1807 throw new HadoopIllegalArgumentException( 1808 "Negative offset is not supported. File: " + src); 1809 } 1810 if (length < 0) { 1811 throw new HadoopIllegalArgumentException( 1812 "Negative length is not supported. File: " + src); 1813 } 1814 final GetBlockLocationsResult ret = getBlockLocationsInt( 1815 pc, src, offset, length, needBlockToken); 1816 1817 if (checkSafeMode && isInSafeMode()) { 1818 for (LocatedBlock b : ret.blocks.getLocatedBlocks()) { 1819 // if safemode & no block locations yet then throw safemodeException 1820 if ((b.getLocations() == null) || (b.getLocations().length == 0)) { 1821 SafeModeException se = new SafeModeException( 1822 "Zero blocklocations for " + src, safeMode); 1823 if (haEnabled && haContext != null && 1824 haContext.getState().getServiceState() == HAServiceState.ACTIVE) { 1825 throw new RetriableException(se); 1826 } else { 1827 throw se; 1828 } 1829 } 1830 } 1831 } 1832 return ret; 1833 } 1834 1835 private GetBlockLocationsResult getBlockLocationsInt( 1836 FSPermissionChecker pc, final String srcArg, long offset, long length, 1837 boolean needBlockToken) 1838 throws IOException { 1839 String src = srcArg; 1840 final INodesInPath iip = dir.resolvePath(pc, src); 1841 src = iip.getPath(); 1842 final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src); 1843 if (isPermissionEnabled) { 1844 dir.checkPathAccess(pc, iip, FsAction.READ); 1845 checkUnreadableBySuperuser(pc, inode, iip.getPathSnapshotId()); 1846 } 1847 1848 final long fileSize = iip.isSnapshot() 1849 ? inode.computeFileSize(iip.getPathSnapshotId()) 1850 : inode.computeFileSizeNotIncludingLastUcBlock(); 1851 boolean isUc = inode.isUnderConstruction(); 1852 if (iip.isSnapshot()) { 1853 // if src indicates a snapshot file, we need to make sure the returned 1854 // blocks do not exceed the size of the snapshot file. 1855 length = Math.min(length, fileSize - offset); 1856 isUc = false; 1857 } 1858 1859 final FileEncryptionInfo feInfo = 1860 FSDirectory.isReservedRawName(srcArg) ? null 1861 : dir.getFileEncryptionInfo(inode, iip.getPathSnapshotId(), iip); 1862 1863 final LocatedBlocks blocks = blockManager.createLocatedBlocks( 1864 inode.getBlocks(iip.getPathSnapshotId()), fileSize, 1865 isUc, offset, length, needBlockToken, iip.isSnapshot(), feInfo); 1866 1867 // Set caching information for the located blocks. 1868 for (LocatedBlock lb : blocks.getLocatedBlocks()) { 1869 cacheManager.setCachedLocations(lb); 1870 } 1871 1872 final long now = now(); 1873 boolean updateAccessTime = isAccessTimeSupported() && !isInSafeMode() 1874 && !iip.isSnapshot() 1875 && now > inode.getAccessTime() + getAccessTimePrecision(); 1876 return new GetBlockLocationsResult(updateAccessTime, blocks); 1877 } 1878 1879 /** 1880 * Moves all the blocks from {@code srcs} and appends them to {@code target} 1881 * To avoid rollbacks we will verify validity of ALL of the args 1882 * before we start actual move. 1883 * 1884 * This does not support ".inodes" relative path 1885 * @param target target to concat into 1886 * @param srcs file that will be concatenated 1887 * @throws IOException on error 1888 */ 1889 void concat(String target, String [] srcs, boolean logRetryCache) 1890 throws IOException { 1891 waitForLoadingFSImage(); 1892 final String operationName = "concat"; 1893 HdfsFileStatus stat = null; 1894 boolean success = false; 1895 writeLock(); 1896 try { 1897 checkOperation(OperationCategory.WRITE); 1898 checkNameNodeSafeMode("Cannot concat " + target); 1899 stat = FSDirConcatOp.concat(dir, target, srcs, logRetryCache); 1900 success = true; 1901 } finally { 1902 writeUnlock(operationName); 1903 if (success) { 1904 getEditLog().logSync(); 1905 } 1906 logAuditEvent(success, operationName, Arrays.toString(srcs), 1907 target, stat); 1908 } 1909 } 1910 1911 /** 1912 * stores the modification and access time for this inode. 1913 * The access time is precise up to an hour. The transaction, if needed, is 1914 * written to the edits log but is not flushed. 1915 */ 1916 void setTimes(String src, long mtime, long atime) throws IOException { 1917 final String operationName = "setTimes"; 1918 HdfsFileStatus auditStat; 1919 checkOperation(OperationCategory.WRITE); 1920 writeLock(); 1921 try { 1922 checkOperation(OperationCategory.WRITE); 1923 checkNameNodeSafeMode("Cannot set times " + src); 1924 auditStat = FSDirAttrOp.setTimes(dir, src, mtime, atime); 1925 } catch (AccessControlException e) { 1926 logAuditEvent(false, operationName, src); 1927 throw e; 1928 } finally { 1929 writeUnlock(operationName); 1930 } 1931 getEditLog().logSync(); 1932 logAuditEvent(true, operationName, src, null, auditStat); 1933 } 1934 1935 /** 1936 * Create a symbolic link. 1937 */ 1938 @SuppressWarnings("deprecation") 1939 void createSymlink(String target, String link, 1940 PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) 1941 throws IOException { 1942 final String operationName = "createSymlink"; 1943 if (!FileSystem.areSymlinksEnabled()) { 1944 throw new UnsupportedOperationException("Symlinks not supported"); 1945 } 1946 HdfsFileStatus auditStat = null; 1947 checkOperation(OperationCategory.WRITE); 1948 writeLock(); 1949 try { 1950 checkOperation(OperationCategory.WRITE); 1951 checkNameNodeSafeMode("Cannot create symlink " + link); 1952 auditStat = FSDirSymlinkOp.createSymlinkInt(this, target, link, dirPerms, 1953 createParent, logRetryCache); 1954 } catch (AccessControlException e) { 1955 logAuditEvent(false, operationName, link, target, null); 1956 throw e; 1957 } finally { 1958 writeUnlock(operationName); 1959 } 1960 getEditLog().logSync(); 1961 logAuditEvent(true, operationName, link, target, auditStat); 1962 } 1963 1964 /** 1965 * Set replication for an existing file. 1966 * 1967 * The NameNode sets new replication and schedules either replication of 1968 * under-replicated data blocks or removal of the excessive block copies 1969 * if the blocks are over-replicated. 1970 * 1971 * @see ClientProtocol#setReplication(String, short) 1972 * @param src file name 1973 * @param replication new replication 1974 * @return true if successful; 1975 * false if file does not exist or is a directory 1976 */ 1977 boolean setReplication(final String src, final short replication) 1978 throws IOException { 1979 final String operationName = "setReplication"; 1980 boolean success = false; 1981 waitForLoadingFSImage(); 1982 checkOperation(OperationCategory.WRITE); 1983 writeLock(); 1984 try { 1985 checkOperation(OperationCategory.WRITE); 1986 checkNameNodeSafeMode("Cannot set replication for " + src); 1987 success = FSDirAttrOp.setReplication(dir, blockManager, src, replication); 1988 } catch (AccessControlException e) { 1989 logAuditEvent(false, operationName, src); 1990 throw e; 1991 } finally { 1992 writeUnlock(operationName); 1993 } 1994 if (success) { 1995 getEditLog().logSync(); 1996 logAuditEvent(true, operationName, src); 1997 } 1998 return success; 1999 } 2000 2001 /** 2002 * Truncate file to a lower length. 2003 * Truncate cannot be reverted / recovered from as it causes data loss. 2004 * Truncation at block boundary is atomic, otherwise it requires 2005 * block recovery to truncate the last block of the file. 2006 * 2007 * @return true if client does not need to wait for block recovery, 2008 * false if client needs to wait for block recovery. 2009 */ 2010 boolean truncate(String src, long newLength, 2011 String clientName, String clientMachine, 2012 long mtime) 2013 throws IOException, UnresolvedLinkException { 2014 boolean ret; 2015 try { 2016 ret = truncateInt(src, newLength, clientName, clientMachine, mtime); 2017 } catch (AccessControlException e) { 2018 logAuditEvent(false, "truncate", src); 2019 throw e; 2020 } 2021 return ret; 2022 } 2023 2024 boolean truncateInt(String srcArg, long newLength, 2025 String clientName, String clientMachine, 2026 long mtime) 2027 throws IOException, UnresolvedLinkException { 2028 final String operationName = "truncate"; 2029 String src = srcArg; 2030 NameNode.stateChangeLog.debug( 2031 "DIR* NameSystem.truncate: src={} newLength={}", src, newLength); 2032 if (newLength < 0) { 2033 throw new HadoopIllegalArgumentException( 2034 "Cannot truncate to a negative file size: " + newLength + "."); 2035 } 2036 HdfsFileStatus stat = null; 2037 FSPermissionChecker pc = getPermissionChecker(); 2038 checkOperation(OperationCategory.WRITE); 2039 boolean res; 2040 writeLock(); 2041 BlocksMapUpdateInfo toRemoveBlocks = new BlocksMapUpdateInfo(); 2042 try { 2043 checkOperation(OperationCategory.WRITE); 2044 checkNameNodeSafeMode("Cannot truncate for " + src); 2045 INodesInPath iip = dir.resolvePath(pc, src); 2046 src = iip.getPath(); 2047 res = truncateInternal(src, newLength, clientName, 2048 clientMachine, mtime, pc, toRemoveBlocks); 2049 stat = dir.getAuditFileInfo(dir.getINodesInPath4Write(src, false)); 2050 } finally { 2051 writeUnlock(operationName); 2052 } 2053 getEditLog().logSync(); 2054 if (!toRemoveBlocks.getToDeleteList().isEmpty()) { 2055 removeBlocks(toRemoveBlocks); 2056 toRemoveBlocks.clear(); 2057 } 2058 logAuditEvent(true, operationName, src, null, stat); 2059 return res; 2060 } 2061 2062 /** 2063 * Truncate a file to a given size 2064 * Update the count at each ancestor directory with quota 2065 */ 2066 boolean truncateInternal(String src, long newLength, 2067 String clientName, String clientMachine, 2068 long mtime, FSPermissionChecker pc, 2069 BlocksMapUpdateInfo toRemoveBlocks) 2070 throws IOException, UnresolvedLinkException { 2071 assert hasWriteLock(); 2072 INodesInPath iip = dir.getINodesInPath4Write(src, true); 2073 if (isPermissionEnabled) { 2074 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2075 } 2076 INodeFile file = INodeFile.valueOf(iip.getLastINode(), src); 2077 final BlockStoragePolicy lpPolicy = 2078 blockManager.getStoragePolicy("LAZY_PERSIST"); 2079 2080 if (lpPolicy != null && 2081 lpPolicy.getId() == file.getStoragePolicyID()) { 2082 throw new UnsupportedOperationException( 2083 "Cannot truncate lazy persist file " + src); 2084 } 2085 2086 // Check if the file is already being truncated with the same length 2087 final BlockInfoContiguous last = file.getLastBlock(); 2088 if (last != null && last.getBlockUCState() == BlockUCState.UNDER_RECOVERY) { 2089 final Block truncateBlock 2090 = ((BlockInfoContiguousUnderConstruction)last).getTruncateBlock(); 2091 if (truncateBlock != null) { 2092 final long truncateLength = file.computeFileSize(false, false) 2093 + truncateBlock.getNumBytes(); 2094 if (newLength == truncateLength) { 2095 return false; 2096 } 2097 } 2098 } 2099 2100 // Opening an existing file for truncate. May need lease recovery. 2101 recoverLeaseInternal(RecoverLeaseOp.TRUNCATE_FILE, 2102 iip, src, clientName, clientMachine, false); 2103 // Truncate length check. 2104 long oldLength = file.computeFileSize(); 2105 if(oldLength == newLength) { 2106 return true; 2107 } 2108 if(oldLength < newLength) { 2109 throw new HadoopIllegalArgumentException( 2110 "Cannot truncate to a larger file size. Current size: " + oldLength + 2111 ", truncate size: " + newLength + "."); 2112 } 2113 // Perform INodeFile truncation. 2114 final QuotaCounts delta = new QuotaCounts.Builder().build(); 2115 boolean onBlockBoundary = dir.truncate(iip, newLength, toRemoveBlocks, 2116 mtime, delta); 2117 Block truncateBlock = null; 2118 if(!onBlockBoundary) { 2119 // Open file for write, but don't log into edits 2120 long lastBlockDelta = file.computeFileSize() - newLength; 2121 assert lastBlockDelta > 0 : "delta is 0 only if on block bounday"; 2122 truncateBlock = prepareFileForTruncate(iip, clientName, clientMachine, 2123 lastBlockDelta, null); 2124 } 2125 2126 // update the quota: use the preferred block size for UC block 2127 dir.writeLock(); 2128 try { 2129 dir.updateCountNoQuotaCheck(iip, iip.length() - 1, delta); 2130 } finally { 2131 dir.writeUnlock(); 2132 } 2133 2134 getEditLog().logTruncate(src, clientName, clientMachine, newLength, mtime, 2135 truncateBlock); 2136 return onBlockBoundary; 2137 } 2138 2139 /** 2140 * Convert current INode to UnderConstruction. 2141 * Recreate lease. 2142 * Create new block for the truncated copy. 2143 * Schedule truncation of the replicas. 2144 * 2145 * @return the returned block will be written to editLog and passed back into 2146 * this method upon loading. 2147 */ 2148 Block prepareFileForTruncate(INodesInPath iip, 2149 String leaseHolder, 2150 String clientMachine, 2151 long lastBlockDelta, 2152 Block newBlock) 2153 throws IOException { 2154 INodeFile file = iip.getLastINode().asFile(); 2155 String src = iip.getPath(); 2156 file.recordModification(iip.getLatestSnapshotId()); 2157 file.toUnderConstruction(leaseHolder, clientMachine); 2158 assert file.isUnderConstruction() : "inode should be under construction."; 2159 leaseManager.addLease( 2160 file.getFileUnderConstructionFeature().getClientName(), src); 2161 boolean shouldRecoverNow = (newBlock == null); 2162 BlockInfoContiguous oldBlock = file.getLastBlock(); 2163 boolean shouldCopyOnTruncate = shouldCopyOnTruncate(file, oldBlock); 2164 if(newBlock == null) { 2165 newBlock = (shouldCopyOnTruncate) ? createNewBlock() : 2166 new Block(oldBlock.getBlockId(), oldBlock.getNumBytes(), 2167 nextGenerationStamp(blockIdManager.isLegacyBlock(oldBlock))); 2168 } 2169 2170 BlockInfoContiguousUnderConstruction truncatedBlockUC; 2171 if(shouldCopyOnTruncate) { 2172 // Add new truncateBlock into blocksMap and 2173 // use oldBlock as a source for copy-on-truncate recovery 2174 truncatedBlockUC = new BlockInfoContiguousUnderConstruction(newBlock, 2175 file.getBlockReplication()); 2176 truncatedBlockUC.setNumBytes(oldBlock.getNumBytes() - lastBlockDelta); 2177 truncatedBlockUC.setTruncateBlock(oldBlock); 2178 file.setLastBlock(truncatedBlockUC, blockManager.getStorages(oldBlock)); 2179 getBlockManager().addBlockCollection(truncatedBlockUC, file); 2180 2181 NameNode.stateChangeLog.debug( 2182 "BLOCK* prepareFileForTruncate: Scheduling copy-on-truncate to new" + 2183 " size {} new block {} old block {}", truncatedBlockUC.getNumBytes(), 2184 newBlock, truncatedBlockUC.getTruncateBlock()); 2185 } else { 2186 // Use new generation stamp for in-place truncate recovery 2187 blockManager.convertLastBlockToUnderConstruction(file, lastBlockDelta); 2188 oldBlock = file.getLastBlock(); 2189 assert !oldBlock.isComplete() : "oldBlock should be under construction"; 2190 truncatedBlockUC = (BlockInfoContiguousUnderConstruction) oldBlock; 2191 truncatedBlockUC.setTruncateBlock(new Block(oldBlock)); 2192 truncatedBlockUC.getTruncateBlock().setNumBytes( 2193 oldBlock.getNumBytes() - lastBlockDelta); 2194 truncatedBlockUC.getTruncateBlock().setGenerationStamp( 2195 newBlock.getGenerationStamp()); 2196 2197 NameNode.stateChangeLog.debug( 2198 "BLOCK* prepareFileForTruncate: {} Scheduling in-place block " + 2199 "truncate to new size {}", 2200 truncatedBlockUC.getTruncateBlock().getNumBytes(), truncatedBlockUC); 2201 } 2202 if (shouldRecoverNow) { 2203 truncatedBlockUC.initializeBlockRecovery(newBlock.getGenerationStamp()); 2204 } 2205 2206 return newBlock; 2207 } 2208 2209 /** 2210 * Defines if a replica needs to be copied on truncate or 2211 * can be truncated in place. 2212 */ 2213 boolean shouldCopyOnTruncate(INodeFile file, BlockInfoContiguous blk) { 2214 if(!isUpgradeFinalized()) { 2215 return true; 2216 } 2217 if (isRollingUpgrade()) { 2218 return true; 2219 } 2220 return file.isBlockInLatestSnapshot(blk); 2221 } 2222 2223 /** 2224 * Set the storage policy for a file or a directory. 2225 * 2226 * @param src file/directory path 2227 * @param policyName storage policy name 2228 */ 2229 void setStoragePolicy(String src, String policyName) throws IOException { 2230 HdfsFileStatus auditStat; 2231 waitForLoadingFSImage(); 2232 checkOperation(OperationCategory.WRITE); 2233 final String operationName = "setStoragePolicy"; 2234 writeLock(); 2235 try { 2236 checkOperation(OperationCategory.WRITE); 2237 checkNameNodeSafeMode("Cannot set storage policy for " + src); 2238 auditStat = FSDirAttrOp.setStoragePolicy( 2239 dir, blockManager, src, policyName); 2240 } catch (AccessControlException e) { 2241 logAuditEvent(false, operationName, src); 2242 throw e; 2243 } finally { 2244 writeUnlock(operationName); 2245 } 2246 getEditLog().logSync(); 2247 logAuditEvent(true, operationName, src, null, auditStat); 2248 } 2249 2250 /** 2251 * @return All the existing block storage policies 2252 */ 2253 BlockStoragePolicy[] getStoragePolicies() throws IOException { 2254 checkOperation(OperationCategory.READ); 2255 waitForLoadingFSImage(); 2256 readLock(); 2257 try { 2258 checkOperation(OperationCategory.READ); 2259 return FSDirAttrOp.getStoragePolicies(blockManager); 2260 } finally { 2261 readUnlock("getStoragePolicies"); 2262 } 2263 } 2264 2265 long getPreferredBlockSize(String src) throws IOException { 2266 checkOperation(OperationCategory.READ); 2267 readLock(); 2268 try { 2269 checkOperation(OperationCategory.READ); 2270 return FSDirAttrOp.getPreferredBlockSize(dir, src); 2271 } finally { 2272 readUnlock("getPreferredBlockSize"); 2273 } 2274 } 2275 2276 /** 2277 * If the file is within an encryption zone, select the appropriate 2278 * CryptoProtocolVersion from the list provided by the client. Since the 2279 * client may be newer, we need to handle unknown versions. 2280 * 2281 * @param zone EncryptionZone of the file 2282 * @param supportedVersions List of supported protocol versions 2283 * @return chosen protocol version 2284 * @throws IOException 2285 */ 2286 private CryptoProtocolVersion chooseProtocolVersion(EncryptionZone zone, 2287 CryptoProtocolVersion[] supportedVersions) 2288 throws UnknownCryptoProtocolVersionException, UnresolvedLinkException, 2289 SnapshotAccessControlException { 2290 Preconditions.checkNotNull(zone); 2291 Preconditions.checkNotNull(supportedVersions); 2292 // Right now, we only support a single protocol version, 2293 // so simply look for it in the list of provided options 2294 final CryptoProtocolVersion required = zone.getVersion(); 2295 2296 for (CryptoProtocolVersion c : supportedVersions) { 2297 if (c.equals(CryptoProtocolVersion.UNKNOWN)) { 2298 if (LOG.isDebugEnabled()) { 2299 LOG.debug("Ignoring unknown CryptoProtocolVersion provided by " + 2300 "client: " + c.getUnknownValue()); 2301 } 2302 continue; 2303 } 2304 if (c.equals(required)) { 2305 return c; 2306 } 2307 } 2308 throw new UnknownCryptoProtocolVersionException( 2309 "No crypto protocol versions provided by the client are supported." 2310 + " Client provided: " + Arrays.toString(supportedVersions) 2311 + " NameNode supports: " + Arrays.toString(CryptoProtocolVersion 2312 .values())); 2313 } 2314 2315 /** 2316 * Invoke KeyProvider APIs to generate an encrypted data encryption key for an 2317 * encryption zone. Should not be called with any locks held. 2318 * 2319 * @param ezKeyName key name of an encryption zone 2320 * @return New EDEK, or null if ezKeyName is null 2321 * @throws IOException 2322 */ 2323 private EncryptedKeyVersion generateEncryptedDataEncryptionKey(String 2324 ezKeyName) throws IOException { 2325 if (ezKeyName == null) { 2326 return null; 2327 } 2328 EncryptedKeyVersion edek = null; 2329 try { 2330 edek = provider.generateEncryptedKey(ezKeyName); 2331 } catch (GeneralSecurityException e) { 2332 throw new IOException(e); 2333 } 2334 Preconditions.checkNotNull(edek); 2335 return edek; 2336 } 2337 2338 /** 2339 * Create a new file entry in the namespace. 2340 * 2341 * For description of parameters and exceptions thrown see 2342 * {@link ClientProtocol#create}, except it returns valid file status upon 2343 * success 2344 */ 2345 HdfsFileStatus startFile(String src, PermissionStatus permissions, 2346 String holder, String clientMachine, EnumSet<CreateFlag> flag, 2347 boolean createParent, short replication, long blockSize, 2348 CryptoProtocolVersion[] supportedVersions, boolean logRetryCache) 2349 throws AccessControlException, SafeModeException, 2350 FileAlreadyExistsException, UnresolvedLinkException, 2351 FileNotFoundException, ParentNotDirectoryException, IOException { 2352 2353 HdfsFileStatus status = null; 2354 try { 2355 status = startFileInt(src, permissions, holder, clientMachine, flag, 2356 createParent, replication, blockSize, supportedVersions, 2357 logRetryCache); 2358 } catch (AccessControlException e) { 2359 logAuditEvent(false, "create", src); 2360 throw e; 2361 } 2362 return status; 2363 } 2364 2365 private HdfsFileStatus startFileInt(final String srcArg, 2366 PermissionStatus permissions, String holder, String clientMachine, 2367 EnumSet<CreateFlag> flag, boolean createParent, short replication, 2368 long blockSize, CryptoProtocolVersion[] supportedVersions, 2369 boolean logRetryCache) 2370 throws AccessControlException, SafeModeException, 2371 FileAlreadyExistsException, UnresolvedLinkException, 2372 FileNotFoundException, ParentNotDirectoryException, IOException { 2373 String src = srcArg; 2374 final String operationName = "create"; 2375 if (NameNode.stateChangeLog.isDebugEnabled()) { 2376 StringBuilder builder = new StringBuilder(); 2377 builder.append("DIR* NameSystem.startFile: src=" + src 2378 + ", holder=" + holder 2379 + ", clientMachine=" + clientMachine 2380 + ", createParent=" + createParent 2381 + ", replication=" + replication 2382 + ", createFlag=" + flag.toString() 2383 + ", blockSize=" + blockSize); 2384 builder.append(", supportedVersions="); 2385 if (supportedVersions != null) { 2386 builder.append(Arrays.toString(supportedVersions)); 2387 } else { 2388 builder.append("null"); 2389 } 2390 NameNode.stateChangeLog.debug(builder.toString()); 2391 } 2392 if (!DFSUtil.isValidName(src)) { 2393 throw new InvalidPathException(src); 2394 } 2395 blockManager.verifyReplication(src, replication, clientMachine); 2396 2397 boolean skipSync = false; 2398 HdfsFileStatus stat = null; 2399 FSPermissionChecker pc = getPermissionChecker(); 2400 if (blockSize < minBlockSize) { 2401 throw new IOException("Specified block size is less than configured" + 2402 " minimum value (" + DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY 2403 + "): " + blockSize + " < " + minBlockSize); 2404 } 2405 boolean create = flag.contains(CreateFlag.CREATE); 2406 boolean overwrite = flag.contains(CreateFlag.OVERWRITE); 2407 boolean isLazyPersist = flag.contains(CreateFlag.LAZY_PERSIST); 2408 2409 waitForLoadingFSImage(); 2410 2411 /** 2412 * If the file is in an encryption zone, we optimistically create an 2413 * EDEK for the file by calling out to the configured KeyProvider. 2414 * Since this typically involves doing an RPC, we take the readLock 2415 * initially, then drop it to do the RPC. 2416 * 2417 * Since the path can flip-flop between being in an encryption zone and not 2418 * in the meantime, we need to recheck the preconditions when we retake the 2419 * lock to do the create. If the preconditions are not met, we throw a 2420 * special RetryStartFileException to ask the DFSClient to try the create 2421 * again later. 2422 */ 2423 CryptoProtocolVersion protocolVersion = null; 2424 CipherSuite suite = null; 2425 String ezKeyName = null; 2426 EncryptedKeyVersion edek = null; 2427 2428 if (provider != null) { 2429 readLock(); 2430 try { 2431 INodesInPath iip = dir.resolvePathForWrite(pc, src); 2432 src = iip.getPath(); 2433 // Nothing to do if the path is not within an EZ 2434 final EncryptionZone zone = dir.getEZForPath(iip); 2435 if (zone != null) { 2436 protocolVersion = chooseProtocolVersion(zone, supportedVersions); 2437 suite = zone.getSuite(); 2438 ezKeyName = zone.getKeyName(); 2439 2440 Preconditions.checkNotNull(protocolVersion); 2441 Preconditions.checkNotNull(suite); 2442 Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN), 2443 "Chose an UNKNOWN CipherSuite!"); 2444 Preconditions.checkNotNull(ezKeyName); 2445 } 2446 } finally { 2447 readUnlock(operationName); 2448 } 2449 2450 Preconditions.checkState( 2451 (suite == null && ezKeyName == null) || 2452 (suite != null && ezKeyName != null), 2453 "Both suite and ezKeyName should both be null or not null"); 2454 2455 // Generate EDEK if necessary while not holding the lock 2456 edek = generateEncryptedDataEncryptionKey(ezKeyName); 2457 EncryptionFaultInjector.getInstance().startFileAfterGenerateKey(); 2458 } 2459 2460 // Proceed with the create, using the computed cipher suite and 2461 // generated EDEK 2462 BlocksMapUpdateInfo toRemoveBlocks = null; 2463 writeLock(); 2464 try { 2465 checkOperation(OperationCategory.WRITE); 2466 checkNameNodeSafeMode("Cannot create file" + src); 2467 dir.writeLock(); 2468 try { 2469 final INodesInPath iip = dir.resolvePathForWrite(pc, src); 2470 src = iip.getPath(); 2471 toRemoveBlocks = startFileInternal( 2472 pc, iip, permissions, holder, 2473 clientMachine, create, overwrite, 2474 createParent, replication, blockSize, 2475 isLazyPersist, suite, protocolVersion, edek, 2476 logRetryCache); 2477 stat = FSDirStatAndListingOp.getFileInfo( 2478 dir, src, false, FSDirectory.isReservedRawName(srcArg)); 2479 } finally { 2480 dir.writeUnlock(); 2481 } 2482 } catch (StandbyException se) { 2483 skipSync = true; 2484 throw se; 2485 } finally { 2486 writeUnlock(operationName); 2487 // There might be transactions logged while trying to recover the lease. 2488 // They need to be sync'ed even when an exception was thrown. 2489 if (!skipSync) { 2490 getEditLog().logSync(); 2491 if (toRemoveBlocks != null) { 2492 removeBlocks(toRemoveBlocks); 2493 toRemoveBlocks.clear(); 2494 } 2495 } 2496 } 2497 2498 logAuditEvent(true, operationName, srcArg, null, stat); 2499 return stat; 2500 } 2501 2502 /** 2503 * Create a new file or overwrite an existing file<br> 2504 * 2505 * Once the file is create the client then allocates a new block with the next 2506 * call using {@link ClientProtocol#addBlock}. 2507 * <p> 2508 * For description of parameters and exceptions thrown see 2509 * {@link ClientProtocol#create} 2510 */ 2511 private BlocksMapUpdateInfo startFileInternal(FSPermissionChecker pc, 2512 INodesInPath iip, PermissionStatus permissions, String holder, 2513 String clientMachine, boolean create, boolean overwrite, 2514 boolean createParent, short replication, long blockSize, 2515 boolean isLazyPersist, CipherSuite suite, CryptoProtocolVersion version, 2516 EncryptedKeyVersion edek, boolean logRetryEntry) 2517 throws IOException { 2518 assert hasWriteLock(); 2519 // Verify that the destination does not exist as a directory already. 2520 final INode inode = iip.getLastINode(); 2521 final String src = iip.getPath(); 2522 if (inode != null && inode.isDirectory()) { 2523 throw new FileAlreadyExistsException(src + 2524 " already exists as a directory"); 2525 } 2526 2527 final INodeFile myFile = INodeFile.valueOf(inode, src, true); 2528 if (isPermissionEnabled) { 2529 if (overwrite && myFile != null) { 2530 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2531 } 2532 /* 2533 * To overwrite existing file, need to check 'w' permission 2534 * of parent (equals to ancestor in this case) 2535 */ 2536 dir.checkAncestorAccess(pc, iip, FsAction.WRITE); 2537 } 2538 if (!createParent) { 2539 dir.verifyParentDir(iip, src); 2540 } 2541 2542 FileEncryptionInfo feInfo = null; 2543 2544 final EncryptionZone zone = dir.getEZForPath(iip); 2545 if (zone != null) { 2546 // The path is now within an EZ, but we're missing encryption parameters 2547 if (suite == null || edek == null) { 2548 throw new RetryStartFileException(); 2549 } 2550 // Path is within an EZ and we have provided encryption parameters. 2551 // Make sure that the generated EDEK matches the settings of the EZ. 2552 final String ezKeyName = zone.getKeyName(); 2553 if (!ezKeyName.equals(edek.getEncryptionKeyName())) { 2554 throw new RetryStartFileException(); 2555 } 2556 feInfo = new FileEncryptionInfo(suite, version, 2557 edek.getEncryptedKeyVersion().getMaterial(), 2558 edek.getEncryptedKeyIv(), 2559 ezKeyName, edek.getEncryptionKeyVersionName()); 2560 } 2561 2562 try { 2563 BlocksMapUpdateInfo toRemoveBlocks = null; 2564 if (myFile == null) { 2565 if (!create) { 2566 throw new FileNotFoundException("Can't overwrite non-existent " + 2567 src + " for client " + clientMachine); 2568 } 2569 } else { 2570 if (overwrite) { 2571 toRemoveBlocks = new BlocksMapUpdateInfo(); 2572 List<INode> toRemoveINodes = new ChunkedArrayList<INode>(); 2573 long ret = FSDirDeleteOp.delete(dir, iip, toRemoveBlocks, 2574 toRemoveINodes, now()); 2575 if (ret >= 0) { 2576 iip = INodesInPath.replace(iip, iip.length() - 1, null); 2577 FSDirDeleteOp.incrDeletedFileCount(ret); 2578 removeLeasesAndINodes(src, toRemoveINodes, true); 2579 } 2580 } else { 2581 // If lease soft limit time is expired, recover the lease 2582 recoverLeaseInternal(RecoverLeaseOp.CREATE_FILE, 2583 iip, src, holder, clientMachine, false); 2584 throw new FileAlreadyExistsException(src + " for client " + 2585 clientMachine + " already exists"); 2586 } 2587 } 2588 2589 checkFsObjectLimit(); 2590 INodeFile newNode = null; 2591 2592 // Always do an implicit mkdirs for parent directory tree. 2593 Map.Entry<INodesInPath, String> parent = FSDirMkdirOp 2594 .createAncestorDirectories(dir, iip, permissions); 2595 if (parent != null) { 2596 iip = dir.addFile(parent.getKey(), parent.getValue(), permissions, 2597 replication, blockSize, holder, clientMachine); 2598 newNode = iip != null ? iip.getLastINode().asFile() : null; 2599 } 2600 2601 if (newNode == null) { 2602 throw new IOException("Unable to add " + src + " to namespace"); 2603 } 2604 leaseManager.addLease(newNode.getFileUnderConstructionFeature() 2605 .getClientName(), src); 2606 2607 // Set encryption attributes if necessary 2608 if (feInfo != null) { 2609 dir.setFileEncryptionInfo(src, feInfo); 2610 newNode = dir.getInode(newNode.getId()).asFile(); 2611 } 2612 2613 setNewINodeStoragePolicy(newNode, iip, isLazyPersist); 2614 2615 // record file record in log, record new generation stamp 2616 getEditLog().logOpenFile(src, newNode, overwrite, logRetryEntry); 2617 NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: added {}" + 2618 " inode {} holder {}", src, newNode.getId(), holder); 2619 return toRemoveBlocks; 2620 } catch (IOException ie) { 2621 NameNode.stateChangeLog.warn("DIR* NameSystem.startFile: " + src + " " + 2622 ie.getMessage()); 2623 throw ie; 2624 } 2625 } 2626 2627 private void setNewINodeStoragePolicy(INodeFile inode, 2628 INodesInPath iip, 2629 boolean isLazyPersist) 2630 throws IOException { 2631 2632 if (isLazyPersist) { 2633 BlockStoragePolicy lpPolicy = 2634 blockManager.getStoragePolicy("LAZY_PERSIST"); 2635 2636 // Set LAZY_PERSIST storage policy if the flag was passed to 2637 // CreateFile. 2638 if (lpPolicy == null) { 2639 throw new HadoopIllegalArgumentException( 2640 "The LAZY_PERSIST storage policy has been disabled " + 2641 "by the administrator."); 2642 } 2643 inode.setStoragePolicyID(lpPolicy.getId(), 2644 iip.getLatestSnapshotId()); 2645 } else { 2646 BlockStoragePolicy effectivePolicy = 2647 blockManager.getStoragePolicy(inode.getStoragePolicyID()); 2648 2649 if (effectivePolicy != null && 2650 effectivePolicy.isCopyOnCreateFile()) { 2651 // Copy effective policy from ancestor directory to current file. 2652 inode.setStoragePolicyID(effectivePolicy.getId(), 2653 iip.getLatestSnapshotId()); 2654 } 2655 } 2656 } 2657 2658 /** 2659 * Append to an existing file for append. 2660 * <p> 2661 * 2662 * The method returns the last block of the file if this is a partial block, 2663 * which can still be used for writing more data. The client uses the returned 2664 * block locations to form the data pipeline for this block.<br> 2665 * The method returns null if the last block is full. The client then 2666 * allocates a new block with the next call using 2667 * {@link ClientProtocol#addBlock}. 2668 * <p> 2669 * 2670 * For description of parameters and exceptions thrown see 2671 * {@link ClientProtocol#append(String, String, EnumSetWritable)} 2672 * 2673 * @return the last block locations if the block is partial or null otherwise 2674 */ 2675 private LocatedBlock appendFileInternal(FSPermissionChecker pc, 2676 INodesInPath iip, String holder, String clientMachine, boolean newBlock, 2677 boolean logRetryCache) throws IOException { 2678 assert hasWriteLock(); 2679 // Verify that the destination does not exist as a directory already. 2680 final INode inode = iip.getLastINode(); 2681 final String src = iip.getPath(); 2682 if (inode != null && inode.isDirectory()) { 2683 throw new FileAlreadyExistsException("Cannot append to directory " + src 2684 + "; already exists as a directory."); 2685 } 2686 if (isPermissionEnabled) { 2687 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2688 } 2689 2690 try { 2691 if (inode == null) { 2692 throw new FileNotFoundException("failed to append to non-existent file " 2693 + src + " for client " + clientMachine); 2694 } 2695 INodeFile myFile = INodeFile.valueOf(inode, src, true); 2696 final BlockStoragePolicy lpPolicy = 2697 blockManager.getStoragePolicy("LAZY_PERSIST"); 2698 if (lpPolicy != null && 2699 lpPolicy.getId() == myFile.getStoragePolicyID()) { 2700 throw new UnsupportedOperationException( 2701 "Cannot append to lazy persist file " + src); 2702 } 2703 // Opening an existing file for append - may need to recover lease. 2704 recoverLeaseInternal(RecoverLeaseOp.APPEND_FILE, 2705 iip, src, holder, clientMachine, false); 2706 2707 final BlockInfoContiguous lastBlock = myFile.getLastBlock(); 2708 // Check that the block has at least minimum replication. 2709 if(lastBlock != null && lastBlock.isComplete() && 2710 !getBlockManager().isSufficientlyReplicated(lastBlock)) { 2711 throw new IOException("append: lastBlock=" + lastBlock + 2712 " of src=" + src + " is not sufficiently replicated yet."); 2713 } 2714 return prepareFileForAppend(src, iip, holder, clientMachine, newBlock, 2715 true, logRetryCache); 2716 } catch (IOException ie) { 2717 NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage()); 2718 throw ie; 2719 } 2720 } 2721 2722 /** 2723 * Convert current node to under construction. 2724 * Recreate in-memory lease record. 2725 * 2726 * @param src path to the file 2727 * @param leaseHolder identifier of the lease holder on this file 2728 * @param clientMachine identifier of the client machine 2729 * @param newBlock if the data is appended to a new block 2730 * @param writeToEditLog whether to persist this change to the edit log 2731 * @param logRetryCache whether to record RPC ids in editlog for retry cache 2732 * rebuilding 2733 * @return the last block locations if the block is partial or null otherwise 2734 * @throws UnresolvedLinkException 2735 * @throws IOException 2736 */ 2737 LocatedBlock prepareFileForAppend(String src, INodesInPath iip, 2738 String leaseHolder, String clientMachine, boolean newBlock, 2739 boolean writeToEditLog, boolean logRetryCache) throws IOException { 2740 final INodeFile file = iip.getLastINode().asFile(); 2741 final QuotaCounts delta = verifyQuotaForUCBlock(file, iip); 2742 2743 file.recordModification(iip.getLatestSnapshotId()); 2744 file.toUnderConstruction(leaseHolder, clientMachine); 2745 2746 leaseManager.addLease( 2747 file.getFileUnderConstructionFeature().getClientName(), src); 2748 2749 LocatedBlock ret = null; 2750 if (!newBlock) { 2751 ret = blockManager.convertLastBlockToUnderConstruction(file, 0); 2752 if (ret != null && delta != null) { 2753 Preconditions.checkState(delta.getStorageSpace() >= 0, 2754 "appending to a block with size larger than the preferred block size"); 2755 dir.writeLock(); 2756 try { 2757 dir.updateCountNoQuotaCheck(iip, iip.length() - 1, delta); 2758 } finally { 2759 dir.writeUnlock(); 2760 } 2761 } 2762 } else { 2763 BlockInfoContiguous lastBlock = file.getLastBlock(); 2764 if (lastBlock != null) { 2765 ExtendedBlock blk = new ExtendedBlock(this.getBlockPoolId(), lastBlock); 2766 ret = new LocatedBlock(blk, new DatanodeInfo[0]); 2767 } 2768 } 2769 2770 if (writeToEditLog) { 2771 getEditLog().logAppendFile(src, file, newBlock, logRetryCache); 2772 } 2773 return ret; 2774 } 2775 2776 /** 2777 * Verify quota when using the preferred block size for UC block. This is 2778 * usually used by append and truncate 2779 * @throws QuotaExceededException when violating the storage quota 2780 * @return expected quota usage update. null means no change or no need to 2781 * update quota usage later 2782 */ 2783 private QuotaCounts verifyQuotaForUCBlock(INodeFile file, INodesInPath iip) 2784 throws QuotaExceededException { 2785 if (!isImageLoaded() || dir.shouldSkipQuotaChecks()) { 2786 // Do not check quota if editlog is still being processed 2787 return null; 2788 } 2789 if (file.getLastBlock() != null) { 2790 final QuotaCounts delta = computeQuotaDeltaForUCBlock(file); 2791 dir.readLock(); 2792 try { 2793 FSDirectory.verifyQuota(iip, iip.length() - 1, delta, null); 2794 return delta; 2795 } finally { 2796 dir.readUnlock(); 2797 } 2798 } 2799 return null; 2800 } 2801 2802 /** Compute quota change for converting a complete block to a UC block */ 2803 private QuotaCounts computeQuotaDeltaForUCBlock(INodeFile file) { 2804 final QuotaCounts delta = new QuotaCounts.Builder().build(); 2805 final BlockInfoContiguous lastBlock = file.getLastBlock(); 2806 if (lastBlock != null) { 2807 final long diff = file.getPreferredBlockSize() - lastBlock.getNumBytes(); 2808 final short repl = file.getBlockReplication(); 2809 delta.addStorageSpace(diff * repl); 2810 final BlockStoragePolicy policy = dir.getBlockStoragePolicySuite() 2811 .getPolicy(file.getStoragePolicyID()); 2812 List<StorageType> types = policy.chooseStorageTypes(repl); 2813 for (StorageType t : types) { 2814 if (t.supportTypeQuota()) { 2815 delta.addTypeSpace(t, diff); 2816 } 2817 } 2818 } 2819 return delta; 2820 } 2821 2822 /** 2823 * Recover lease; 2824 * Immediately revoke the lease of the current lease holder and start lease 2825 * recovery so that the file can be forced to be closed. 2826 * 2827 * @param src the path of the file to start lease recovery 2828 * @param holder the lease holder's name 2829 * @param clientMachine the client machine's name 2830 * @return true if the file is already closed or 2831 * if the lease can be released and the file can be closed. 2832 * @throws IOException 2833 */ 2834 boolean recoverLease(String src, String holder, String clientMachine) 2835 throws IOException { 2836 if (!DFSUtil.isValidName(src)) { 2837 throw new IOException("Invalid file name: " + src); 2838 } 2839 2840 boolean skipSync = false; 2841 FSPermissionChecker pc = getPermissionChecker(); 2842 checkOperation(OperationCategory.WRITE); 2843 writeLock(); 2844 try { 2845 checkOperation(OperationCategory.WRITE); 2846 checkNameNodeSafeMode("Cannot recover the lease of " + src); 2847 final INodesInPath iip = dir.resolvePathForWrite(pc, src); 2848 src = iip.getPath(); 2849 final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src); 2850 if (!inode.isUnderConstruction()) { 2851 return true; 2852 } 2853 if (isPermissionEnabled) { 2854 dir.checkPathAccess(pc, iip, FsAction.WRITE); 2855 } 2856 2857 return recoverLeaseInternal(RecoverLeaseOp.RECOVER_LEASE, 2858 iip, src, holder, clientMachine, true); 2859 } catch (StandbyException se) { 2860 skipSync = true; 2861 throw se; 2862 } finally { 2863 writeUnlock("recoverLease"); 2864 // There might be transactions logged while trying to recover the lease. 2865 // They need to be sync'ed even when an exception was thrown. 2866 if (!skipSync) { 2867 getEditLog().logSync(); 2868 } 2869 } 2870 } 2871 2872 private enum RecoverLeaseOp { 2873 CREATE_FILE, 2874 APPEND_FILE, 2875 TRUNCATE_FILE, 2876 RECOVER_LEASE; 2877 2878 private String getExceptionMessage(String src, String holder, 2879 String clientMachine, String reason) { 2880 return "Failed to " + this + " " + src + " for " + holder + 2881 " on " + clientMachine + " because " + reason; 2882 } 2883 } 2884 2885 boolean recoverLeaseInternal(RecoverLeaseOp op, INodesInPath iip, 2886 String src, String holder, String clientMachine, boolean force) 2887 throws IOException { 2888 assert hasWriteLock(); 2889 INodeFile file = iip.getLastINode().asFile(); 2890 if (file.isUnderConstruction()) { 2891 // 2892 // If the file is under construction , then it must be in our 2893 // leases. Find the appropriate lease record. 2894 // 2895 Lease lease = leaseManager.getLease(holder); 2896 2897 if (!force && lease != null) { 2898 Lease leaseFile = leaseManager.getLeaseByPath(src); 2899 if (leaseFile != null && leaseFile.equals(lease)) { 2900 // We found the lease for this file but the original 2901 // holder is trying to obtain it again. 2902 throw new AlreadyBeingCreatedException( 2903 op.getExceptionMessage(src, holder, clientMachine, 2904 holder + " is already the current lease holder.")); 2905 } 2906 } 2907 // 2908 // Find the original holder. 2909 // 2910 FileUnderConstructionFeature uc = file.getFileUnderConstructionFeature(); 2911 String clientName = uc.getClientName(); 2912 lease = leaseManager.getLease(clientName); 2913 if (lease == null) { 2914 throw new AlreadyBeingCreatedException( 2915 op.getExceptionMessage(src, holder, clientMachine, 2916 "the file is under construction but no leases found.")); 2917 } 2918 if (force) { 2919 // close now: no need to wait for soft lease expiration and 2920 // close only the file src 2921 LOG.info("recoverLease: " + lease + ", src=" + src + 2922 " from client " + clientName); 2923 return internalReleaseLease(lease, src, iip, holder); 2924 } else { 2925 assert lease.getHolder().equals(clientName) : 2926 "Current lease holder " + lease.getHolder() + 2927 " does not match file creator " + clientName; 2928 // 2929 // If the original holder has not renewed in the last SOFTLIMIT 2930 // period, then start lease recovery. 2931 // 2932 if (lease.expiredSoftLimit()) { 2933 LOG.info("startFile: recover " + lease + ", src=" + src + " client " 2934 + clientName); 2935 if (internalReleaseLease(lease, src, iip, null)) { 2936 return true; 2937 } else { 2938 throw new RecoveryInProgressException( 2939 op.getExceptionMessage(src, holder, clientMachine, 2940 "lease recovery is in progress. Try again later.")); 2941 } 2942 } else { 2943 final BlockInfoContiguous lastBlock = file.getLastBlock(); 2944 if (lastBlock != null 2945 && lastBlock.getBlockUCState() == BlockUCState.UNDER_RECOVERY) { 2946 throw new RecoveryInProgressException( 2947 op.getExceptionMessage(src, holder, clientMachine, 2948 "another recovery is in progress by " 2949 + clientName + " on " + uc.getClientMachine())); 2950 } else { 2951 throw new AlreadyBeingCreatedException( 2952 op.getExceptionMessage(src, holder, clientMachine, 2953 "this file lease is currently owned by " 2954 + clientName + " on " + uc.getClientMachine())); 2955 } 2956 } 2957 } 2958 } else { 2959 return true; 2960 } 2961 } 2962 2963 /** 2964 * Append to an existing file in the namespace. 2965 */ 2966 LastBlockWithStatus appendFile(String src, String holder, 2967 String clientMachine, EnumSet<CreateFlag> flag, boolean logRetryCache) 2968 throws IOException { 2969 try { 2970 return appendFileInt(src, holder, clientMachine, 2971 flag.contains(CreateFlag.NEW_BLOCK), logRetryCache); 2972 } catch (AccessControlException e) { 2973 logAuditEvent(false, "append", src); 2974 throw e; 2975 } 2976 } 2977 2978 private LastBlockWithStatus appendFileInt(final String srcArg, String holder, 2979 String clientMachine, boolean newBlock, boolean logRetryCache) 2980 throws IOException { 2981 String src = srcArg; 2982 final String operationName = "append"; 2983 NameNode.stateChangeLog.debug( 2984 "DIR* NameSystem.appendFile: src={}, holder={}, clientMachine={}", 2985 src, holder, clientMachine); 2986 boolean skipSync = false; 2987 if (!supportAppends) { 2988 throw new UnsupportedOperationException( 2989 "Append is not enabled on this NameNode. Use the " + 2990 DFS_SUPPORT_APPEND_KEY + " configuration option to enable it."); 2991 } 2992 2993 LocatedBlock lb = null; 2994 HdfsFileStatus stat = null; 2995 FSPermissionChecker pc = getPermissionChecker(); 2996 writeLock(); 2997 try { 2998 checkOperation(OperationCategory.WRITE); 2999 checkNameNodeSafeMode("Cannot append to file" + src); 3000 final INodesInPath iip = dir.resolvePathForWrite(pc, src); 3001 src = iip.getPath(); 3002 lb = appendFileInternal(pc, iip, holder, clientMachine, newBlock, 3003 logRetryCache); 3004 stat = FSDirStatAndListingOp.getFileInfo(dir, src, false, 3005 FSDirectory.isReservedRawName(srcArg)); 3006 } catch (StandbyException se) { 3007 skipSync = true; 3008 throw se; 3009 } finally { 3010 writeUnlock(operationName); 3011 // There might be transactions logged while trying to recover the lease. 3012 // They need to be sync'ed even when an exception was thrown. 3013 if (!skipSync) { 3014 getEditLog().logSync(); 3015 } 3016 } 3017 if (lb != null) { 3018 NameNode.stateChangeLog.debug( 3019 "DIR* NameSystem.appendFile: file {} for {} at {} block {} block" + 3020 " size {}", src, holder, clientMachine, lb.getBlock(), 3021 lb.getBlock().getNumBytes()); 3022 } 3023 logAuditEvent(true, operationName, srcArg); 3024 return new LastBlockWithStatus(lb, stat); 3025 } 3026 3027 ExtendedBlock getExtendedBlock(Block blk) { 3028 return new ExtendedBlock(blockPoolId, blk); 3029 } 3030 3031 void setBlockPoolId(String bpid) { 3032 blockPoolId = bpid; 3033 blockManager.setBlockPoolId(blockPoolId); 3034 } 3035 3036 /** 3037 * The client would like to obtain an additional block for the indicated 3038 * filename (which is being written-to). Return an array that consists 3039 * of the block, plus a set of machines. The first on this list should 3040 * be where the client writes data. Subsequent items in the list must 3041 * be provided in the connection to the first datanode. 3042 * 3043 * Make sure the previous blocks have been reported by datanodes and 3044 * are replicated. Will return an empty 2-elt array if we want the 3045 * client to "try again later". 3046 */ 3047 LocatedBlock getAdditionalBlock(String src, long fileId, String clientName, 3048 ExtendedBlock previous, Set<Node> excludedNodes, 3049 List<String> favoredNodes) throws IOException { 3050 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 3051 DatanodeStorageInfo targets[] = getNewBlockTargets(src, fileId, 3052 clientName, previous, excludedNodes, favoredNodes, onRetryBlock); 3053 if (targets == null) { 3054 assert onRetryBlock[0] != null : "Retry block is null"; 3055 // This is a retry. Just return the last block. 3056 return onRetryBlock[0]; 3057 } 3058 LocatedBlock newBlock = storeAllocatedBlock( 3059 src, fileId, clientName, previous, targets); 3060 return newBlock; 3061 } 3062 3063 /** 3064 * Part I of getAdditionalBlock(). 3065 * Analyze the state of the file under read lock to determine if the client 3066 * can add a new block, detect potential retries, lease mismatches, 3067 * and minimal replication of the penultimate block. 3068 * 3069 * Generate target DataNode locations for the new block, 3070 * but do not create the new block yet. 3071 */ 3072 DatanodeStorageInfo[] getNewBlockTargets(String src, long fileId, 3073 String clientName, ExtendedBlock previous, Set<Node> excludedNodes, 3074 List<String> favoredNodes, LocatedBlock[] onRetryBlock) throws IOException { 3075 final long blockSize; 3076 final int replication; 3077 final byte storagePolicyID; 3078 Node clientNode = null; 3079 String clientMachine = null; 3080 3081 NameNode.stateChangeLog.debug("BLOCK* getAdditionalBlock: {} inodeId {}" + 3082 " for {}", src, fileId, clientName); 3083 3084 checkOperation(OperationCategory.READ); 3085 FSPermissionChecker pc = getPermissionChecker(); 3086 readLock(); 3087 try { 3088 checkOperation(OperationCategory.READ); 3089 INodesInPath iip = dir.resolvePath(pc, src, fileId); 3090 src = iip.getPath(); 3091 FileState fileState = analyzeFileState( 3092 iip, fileId, clientName, previous, onRetryBlock); 3093 if (onRetryBlock[0] != null && onRetryBlock[0].getLocations().length > 0) { 3094 // This is a retry. No need to generate new locations. 3095 // Use the last block if it has locations. 3096 return null; 3097 } 3098 3099 final INodeFile pendingFile = fileState.inode; 3100 if (!checkFileProgress(src, pendingFile, false)) { 3101 throw new NotReplicatedYetException("Not replicated yet: " + src); 3102 } 3103 src = fileState.path; 3104 3105 if (pendingFile.getBlocks().length >= maxBlocksPerFile) { 3106 throw new IOException("File has reached the limit on maximum number of" 3107 + " blocks (" + DFSConfigKeys.DFS_NAMENODE_MAX_BLOCKS_PER_FILE_KEY 3108 + "): " + pendingFile.getBlocks().length + " >= " 3109 + maxBlocksPerFile); 3110 } 3111 blockSize = pendingFile.getPreferredBlockSize(); 3112 clientMachine = pendingFile.getFileUnderConstructionFeature() 3113 .getClientMachine(); 3114 clientNode = blockManager.getDatanodeManager().getDatanodeByHost( 3115 clientMachine); 3116 replication = pendingFile.getFileReplication(); 3117 storagePolicyID = pendingFile.getStoragePolicyID(); 3118 } finally { 3119 readUnlock("getNewBlockTargets"); 3120 } 3121 3122 if (clientNode == null) { 3123 clientNode = getClientNode(clientMachine); 3124 } 3125 3126 // choose targets for the new block to be allocated. 3127 return getBlockManager().chooseTarget4NewBlock( 3128 src, replication, clientNode, excludedNodes, blockSize, favoredNodes, 3129 storagePolicyID); 3130 } 3131 3132 /** 3133 * Part II of getAdditionalBlock(). 3134 * Should repeat the same analysis of the file state as in Part 1, 3135 * but under the write lock. 3136 * If the conditions still hold, then allocate a new block with 3137 * the new targets, add it to the INode and to the BlocksMap. 3138 */ 3139 LocatedBlock storeAllocatedBlock(String src, long fileId, String clientName, 3140 ExtendedBlock previous, DatanodeStorageInfo[] targets) throws IOException { 3141 Block newBlock = null; 3142 long offset; 3143 checkOperation(OperationCategory.WRITE); 3144 waitForLoadingFSImage(); 3145 writeLock(); 3146 try { 3147 checkOperation(OperationCategory.WRITE); 3148 // Run the full analysis again, since things could have changed 3149 // while chooseTarget() was executing. 3150 LocatedBlock[] onRetryBlock = new LocatedBlock[1]; 3151 final INodesInPath iip = dir.resolvePath(null, src, fileId); 3152 FileState fileState = 3153 analyzeFileState(iip, fileId, clientName, previous, onRetryBlock); 3154 final INodeFile pendingFile = fileState.inode; 3155 src = fileState.path; 3156 3157 if (onRetryBlock[0] != null) { 3158 if (onRetryBlock[0].getLocations().length > 0) { 3159 // This is a retry. Just return the last block if having locations. 3160 return onRetryBlock[0]; 3161 } else { 3162 // add new chosen targets to already allocated block and return 3163 BlockInfoContiguous lastBlockInFile = pendingFile.getLastBlock(); 3164 ((BlockInfoContiguousUnderConstruction) lastBlockInFile) 3165 .setExpectedLocations(targets); 3166 offset = pendingFile.computeFileSize(); 3167 return makeLocatedBlock(lastBlockInFile, targets, offset); 3168 } 3169 } 3170 3171 // commit the last block and complete it if it has minimum replicas 3172 commitOrCompleteLastBlock(pendingFile, fileState.iip, 3173 ExtendedBlock.getLocalBlock(previous)); 3174 3175 // allocate new block, record block locations in INode. 3176 newBlock = createNewBlock(); 3177 INodesInPath inodesInPath = INodesInPath.fromINode(pendingFile); 3178 saveAllocatedBlock(src, inodesInPath, newBlock, targets); 3179 3180 persistNewBlock(src, pendingFile); 3181 offset = pendingFile.computeFileSize(); 3182 } finally { 3183 writeUnlock("storeAllocatedBlock"); 3184 } 3185 getEditLog().logSync(); 3186 3187 // Return located block 3188 return makeLocatedBlock(newBlock, targets, offset); 3189 } 3190 3191 /* 3192 * Resolve clientmachine address to get a network location path 3193 */ 3194 private Node getClientNode(String clientMachine) { 3195 List<String> hosts = new ArrayList<String>(1); 3196 hosts.add(clientMachine); 3197 List<String> rName = getBlockManager().getDatanodeManager() 3198 .resolveNetworkLocation(hosts); 3199 Node clientNode = null; 3200 if (rName != null) { 3201 // Able to resolve clientMachine mapping. 3202 // Create a temp node to findout the rack local nodes 3203 clientNode = new NodeBase(rName.get(0) + NodeBase.PATH_SEPARATOR_STR 3204 + clientMachine); 3205 } 3206 return clientNode; 3207 } 3208 3209 static class FileState { 3210 public final INodeFile inode; 3211 public final String path; 3212 public final INodesInPath iip; 3213 3214 public FileState(INodeFile inode, String fullPath, INodesInPath iip) { 3215 this.inode = inode; 3216 this.path = fullPath; 3217 this.iip = iip; 3218 } 3219 } 3220 3221 private FileState analyzeFileState( 3222 INodesInPath iip, long fileId, String clientName, 3223 ExtendedBlock previous, LocatedBlock[] onRetryBlock) 3224 throws IOException { 3225 assert hasReadLock(); 3226 String src = iip.getPath(); 3227 checkBlock(previous); 3228 onRetryBlock[0] = null; 3229 checkNameNodeSafeMode("Cannot add block to " + src); 3230 3231 // have we exceeded the configured limit of fs objects. 3232 checkFsObjectLimit(); 3233 3234 Block previousBlock = ExtendedBlock.getLocalBlock(previous); 3235 final INodeFile pendingFile = checkLease(iip, clientName, fileId); 3236 BlockInfoContiguous lastBlockInFile = pendingFile.getLastBlock(); 3237 if (!Block.matchingIdAndGenStamp(previousBlock, lastBlockInFile)) { 3238 // The block that the client claims is the current last block 3239 // doesn't match up with what we think is the last block. There are 3240 // four possibilities: 3241 // 1) This is the first block allocation of an append() pipeline 3242 // which started appending exactly at or exceeding the block boundary. 3243 // In this case, the client isn't passed the previous block, 3244 // so it makes the allocateBlock() call with previous=null. 3245 // We can distinguish this since the last block of the file 3246 // will be exactly a full block. 3247 // 2) This is a retry from a client that missed the response of a 3248 // prior getAdditionalBlock() call, perhaps because of a network 3249 // timeout, or because of an HA failover. In that case, we know 3250 // by the fact that the client is re-issuing the RPC that it 3251 // never began to write to the old block. Hence it is safe to 3252 // to return the existing block. 3253 // 3) This is an entirely bogus request/bug -- we should error out 3254 // rather than potentially appending a new block with an empty 3255 // one in the middle, etc 3256 // 4) This is a retry from a client that timed out while 3257 // the prior getAdditionalBlock() is still being processed, 3258 // currently working on chooseTarget(). 3259 // There are no means to distinguish between the first and 3260 // the second attempts in Part I, because the first one hasn't 3261 // changed the namesystem state yet. 3262 // We run this analysis again in Part II where case 4 is impossible. 3263 3264 BlockInfoContiguous penultimateBlock = pendingFile.getPenultimateBlock(); 3265 if (previous == null && 3266 lastBlockInFile != null && 3267 lastBlockInFile.getNumBytes() >= pendingFile.getPreferredBlockSize() && 3268 lastBlockInFile.isComplete()) { 3269 // Case 1 3270 NameNode.stateChangeLog.debug( 3271 "BLOCK* NameSystem.allocateBlock: handling block allocation" + 3272 " writing to a file with a complete previous block: src={}" + 3273 " lastBlock={}", src, lastBlockInFile); 3274 } else if (Block.matchingIdAndGenStamp(penultimateBlock, previousBlock)) { 3275 if (lastBlockInFile.getNumBytes() != 0) { 3276 throw new IOException( 3277 "Request looked like a retry to allocate block " + 3278 lastBlockInFile + " but it already contains " + 3279 lastBlockInFile.getNumBytes() + " bytes"); 3280 } 3281 3282 // Case 2 3283 // Return the last block. 3284 NameNode.stateChangeLog.info("BLOCK* allocateBlock: " + 3285 "caught retry for allocation of a new block in " + 3286 src + ". Returning previously allocated block " + lastBlockInFile); 3287 long offset = pendingFile.computeFileSize(); 3288 onRetryBlock[0] = makeLocatedBlock(lastBlockInFile, 3289 ((BlockInfoContiguousUnderConstruction)lastBlockInFile).getExpectedStorageLocations(), 3290 offset); 3291 return new FileState(pendingFile, src, iip); 3292 } else { 3293 // Case 3 3294 throw new IOException("Cannot allocate block in " + src + ": " + 3295 "passed 'previous' block " + previous + " does not match actual " + 3296 "last block in file " + lastBlockInFile); 3297 } 3298 } 3299 return new FileState(pendingFile, src, iip); 3300 } 3301 3302 LocatedBlock makeLocatedBlock(Block blk, DatanodeStorageInfo[] locs, 3303 long offset) throws IOException { 3304 LocatedBlock lBlk = new LocatedBlock( 3305 getExtendedBlock(blk), locs, offset, false); 3306 getBlockManager().setBlockToken( 3307 lBlk, BlockTokenSecretManager.AccessMode.WRITE); 3308 return lBlk; 3309 } 3310 3311 /** @see ClientProtocol#getAdditionalDatanode */ 3312 LocatedBlock getAdditionalDatanode(String src, long fileId, 3313 final ExtendedBlock blk, final DatanodeInfo[] existings, 3314 final String[] storageIDs, 3315 final Set<Node> excludes, 3316 final int numAdditionalNodes, final String clientName 3317 ) throws IOException { 3318 //check if the feature is enabled 3319 dtpReplaceDatanodeOnFailure.checkEnabled(); 3320 3321 Node clientnode = null; 3322 String clientMachine; 3323 final long preferredblocksize; 3324 final byte storagePolicyID; 3325 final List<DatanodeStorageInfo> chosen; 3326 checkOperation(OperationCategory.READ); 3327 FSPermissionChecker pc = getPermissionChecker(); 3328 readLock(); 3329 try { 3330 checkOperation(OperationCategory.READ); 3331 //check safe mode 3332 checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk); 3333 final INodesInPath iip = dir.resolvePath(pc, src, fileId); 3334 src = iip.getPath(); 3335 3336 //check lease 3337 final INodeFile file = checkLease(iip, clientName, fileId); 3338 clientMachine = file.getFileUnderConstructionFeature().getClientMachine(); 3339 clientnode = blockManager.getDatanodeManager().getDatanodeByHost(clientMachine); 3340 preferredblocksize = file.getPreferredBlockSize(); 3341 storagePolicyID = file.getStoragePolicyID(); 3342 3343 //find datanode storages 3344 final DatanodeManager dm = blockManager.getDatanodeManager(); 3345 chosen = Arrays.asList(dm.getDatanodeStorageInfos(existings, storageIDs, 3346 "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s", 3347 src, fileId, blk, clientName, clientMachine)); 3348 } finally { 3349 readUnlock("getAdditionalDatanode"); 3350 } 3351 3352 if (clientnode == null) { 3353 clientnode = getClientNode(clientMachine); 3354 } 3355 3356 // choose new datanodes. 3357 final DatanodeStorageInfo[] targets = blockManager.chooseTarget4AdditionalDatanode( 3358 src, numAdditionalNodes, clientnode, chosen, 3359 excludes, preferredblocksize, storagePolicyID); 3360 final LocatedBlock lb = new LocatedBlock(blk, targets); 3361 blockManager.setBlockToken(lb, AccessMode.COPY); 3362 return lb; 3363 } 3364 3365 /** 3366 * The client would like to let go of the given block 3367 */ 3368 boolean abandonBlock(ExtendedBlock b, long fileId, String src, String holder) 3369 throws IOException { 3370 NameNode.stateChangeLog.debug( 3371 "BLOCK* NameSystem.abandonBlock: {} of file {}", b, src); 3372 checkOperation(OperationCategory.WRITE); 3373 FSPermissionChecker pc = getPermissionChecker(); 3374 waitForLoadingFSImage(); 3375 writeLock(); 3376 final INodesInPath iip = dir.resolvePath(pc, src, fileId); 3377 src = iip.getPath(); 3378 try { 3379 checkOperation(OperationCategory.WRITE); 3380 checkNameNodeSafeMode("Cannot abandon block " + b + " for file" + src); 3381 final INodeFile file = checkLease(iip, holder, fileId); 3382 3383 // Remove the block from the pending creates list 3384 boolean removed = dir.removeBlock(src, iip, file, 3385 ExtendedBlock.getLocalBlock(b)); 3386 if (!removed) { 3387 return true; 3388 } 3389 NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: {} is " + 3390 "removed from pendingCreates", b); 3391 persistBlocks(src, file, false); 3392 } finally { 3393 writeUnlock("abandonBlock"); 3394 } 3395 getEditLog().logSync(); 3396 3397 return true; 3398 } 3399 3400 private INodeFile checkLease(INodesInPath iip, String holder, long fileId) 3401 throws LeaseExpiredException, FileNotFoundException { 3402 String src = iip.getPath(); 3403 INode inode = iip.getLastINode(); 3404 assert hasReadLock(); 3405 final String ident = src + " (inode " + fileId + ")"; 3406 if (inode == null) { 3407 Lease lease = leaseManager.getLease(holder); 3408 throw new LeaseExpiredException( 3409 "No lease on " + ident + ": File does not exist. " 3410 + (lease != null ? lease.toString() 3411 : "Holder " + holder + " does not have any open files.")); 3412 } 3413 if (!inode.isFile()) { 3414 Lease lease = leaseManager.getLease(holder); 3415 throw new LeaseExpiredException( 3416 "No lease on " + ident + ": INode is not a regular file. " 3417 + (lease != null ? lease.toString() 3418 : "Holder " + holder + " does not have any open files.")); 3419 } 3420 final INodeFile file = inode.asFile(); 3421 if (!file.isUnderConstruction()) { 3422 Lease lease = leaseManager.getLease(holder); 3423 throw new LeaseExpiredException( 3424 "No lease on " + ident + ": File is not open for writing. " 3425 + (lease != null ? lease.toString() 3426 : "Holder " + holder + " does not have any open files.")); 3427 } 3428 // No further modification is allowed on a deleted file. 3429 // A file is considered deleted, if it is not in the inodeMap or is marked 3430 // as deleted in the snapshot feature. 3431 if (isFileDeleted(file)) { 3432 throw new FileNotFoundException(src); 3433 } 3434 String clientName = file.getFileUnderConstructionFeature().getClientName(); 3435 if (holder != null && !clientName.equals(holder)) { 3436 throw new LeaseExpiredException("Lease mismatch on " + ident + 3437 " owned by " + clientName + " but is accessed by " + holder); 3438 } 3439 return file; 3440 } 3441 3442 /** 3443 * Complete in-progress write to the given file. 3444 * @return true if successful, false if the client should continue to retry 3445 * (e.g if not all blocks have reached minimum replication yet) 3446 * @throws IOException on error (eg lease mismatch, file not open, file deleted) 3447 */ 3448 boolean completeFile(final String srcArg, String holder, 3449 ExtendedBlock last, long fileId) 3450 throws SafeModeException, UnresolvedLinkException, IOException { 3451 String src = srcArg; 3452 NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: {} for {}", 3453 src, holder); 3454 checkBlock(last); 3455 boolean success = false; 3456 checkOperation(OperationCategory.WRITE); 3457 waitForLoadingFSImage(); 3458 writeLock(); 3459 try { 3460 checkOperation(OperationCategory.WRITE); 3461 checkNameNodeSafeMode("Cannot complete file " + src); 3462 success = completeFileInternal(src, holder, 3463 ExtendedBlock.getLocalBlock(last), fileId); 3464 } finally { 3465 writeUnlock("completeFile"); 3466 } 3467 getEditLog().logSync(); 3468 if (success) { 3469 NameNode.stateChangeLog.info("DIR* completeFile: " + srcArg 3470 + " is closed by " + holder); 3471 } 3472 return success; 3473 } 3474 3475 private boolean completeFileInternal(String src, String holder, Block last, 3476 long fileId) throws IOException { 3477 assert hasWriteLock(); 3478 final INodeFile pendingFile; 3479 FSPermissionChecker pc = getPermissionChecker(); 3480 final INodesInPath iip = dir.resolvePath(pc, src, fileId); 3481 src = iip.getPath(); 3482 INode inode = null; 3483 try { 3484 inode = iip.getLastINode(); 3485 pendingFile = checkLease(iip, holder, fileId); 3486 } catch (LeaseExpiredException lee) { 3487 if (inode != null && inode.isFile() && 3488 !inode.asFile().isUnderConstruction()) { 3489 // This could be a retry RPC - i.e the client tried to close 3490 // the file, but missed the RPC response. Thus, it is trying 3491 // again to close the file. If the file still exists and 3492 // the client's view of the last block matches the actual 3493 // last block, then we'll treat it as a successful close. 3494 // See HDFS-3031. 3495 final Block realLastBlock = inode.asFile().getLastBlock(); 3496 if (Block.matchingIdAndGenStamp(last, realLastBlock)) { 3497 NameNode.stateChangeLog.info("DIR* completeFile: " + 3498 "request from " + holder + " to complete inode " + fileId + 3499 "(" + src + ") which is already closed. But, it appears to be " + 3500 "an RPC retry. Returning success"); 3501 return true; 3502 } 3503 } 3504 throw lee; 3505 } 3506 // Check the state of the penultimate block. It should be completed 3507 // before attempting to complete the last one. 3508 if (!checkFileProgress(src, pendingFile, false)) { 3509 return false; 3510 } 3511 3512 // commit the last block and complete it if it has minimum replicas 3513 commitOrCompleteLastBlock(pendingFile, iip, last); 3514 3515 if (!checkFileProgress(src, pendingFile, true)) { 3516 return false; 3517 } 3518 3519 finalizeINodeFileUnderConstruction(src, pendingFile, 3520 Snapshot.CURRENT_STATE_ID); 3521 return true; 3522 } 3523 3524 /** 3525 * Save allocated block at the given pending filename 3526 * 3527 * @param src path to the file 3528 * @param inodesInPath representing each of the components of src. 3529 * The last INode is the INode for {@code src} file. 3530 * @param newBlock newly allocated block to be save 3531 * @param targets target datanodes where replicas of the new block is placed 3532 * @throws QuotaExceededException If addition of block exceeds space quota 3533 */ 3534 BlockInfoContiguous saveAllocatedBlock(String src, INodesInPath inodesInPath, 3535 Block newBlock, DatanodeStorageInfo[] targets) 3536 throws IOException { 3537 assert hasWriteLock(); 3538 BlockInfoContiguous b = dir.addBlock(src, inodesInPath, newBlock, targets); 3539 NameNode.stateChangeLog.info("BLOCK* allocate " + b + " for " + src); 3540 DatanodeStorageInfo.incrementBlocksScheduled(targets); 3541 return b; 3542 } 3543 3544 /** 3545 * Create new block with a unique block id and a new generation stamp. 3546 */ 3547 Block createNewBlock() throws IOException { 3548 assert hasWriteLock(); 3549 Block b = new Block(nextBlockId(), 0, 0); 3550 // Increment the generation stamp for every new block. 3551 b.setGenerationStamp(nextGenerationStamp(false)); 3552 return b; 3553 } 3554 3555 /** 3556 * Check that the indicated file's blocks are present and 3557 * replicated. If not, return false. If checkall is true, then check 3558 * all blocks, otherwise check only penultimate block. 3559 */ 3560 boolean checkFileProgress(String src, INodeFile v, boolean checkall) { 3561 if (checkall) { 3562 // check all blocks of the file. 3563 for (BlockInfoContiguous block: v.getBlocks()) { 3564 if (!isCompleteBlock(src, block, blockManager.minReplication)) { 3565 return false; 3566 } 3567 } 3568 } else { 3569 // check the penultimate block of this file 3570 BlockInfoContiguous b = v.getPenultimateBlock(); 3571 if (b != null 3572 && !isCompleteBlock(src, b, blockManager.minReplication)) { 3573 return false; 3574 } 3575 } 3576 return true; 3577 } 3578 3579 private static boolean isCompleteBlock(String src, BlockInfoContiguous b, int minRepl) { 3580 if (!b.isComplete()) { 3581 final BlockInfoContiguousUnderConstruction uc = (BlockInfoContiguousUnderConstruction)b; 3582 final int numNodes = b.numNodes(); 3583 LOG.info("BLOCK* " + b + " is not COMPLETE (ucState = " 3584 + uc.getBlockUCState() + ", replication# = " + numNodes 3585 + (numNodes < minRepl? " < ": " >= ") 3586 + " minimum = " + minRepl + ") in file " + src); 3587 return false; 3588 } 3589 return true; 3590 } 3591 3592 //////////////////////////////////////////////////////////////// 3593 // Here's how to handle block-copy failure during client write: 3594 // -- As usual, the client's write should result in a streaming 3595 // backup write to a k-machine sequence. 3596 // -- If one of the backup machines fails, no worries. Fail silently. 3597 // -- Before client is allowed to close and finalize file, make sure 3598 // that the blocks are backed up. Namenode may have to issue specific backup 3599 // commands to make up for earlier datanode failures. Once all copies 3600 // are made, edit namespace and return to client. 3601 //////////////////////////////////////////////////////////////// 3602 3603 /** 3604 * Change the indicated filename. 3605 * @deprecated Use {@link #renameTo(String, String, boolean, 3606 * Options.Rename...)} instead. 3607 */ 3608 @Deprecated 3609 boolean renameTo(String src, String dst, boolean logRetryCache) 3610 throws IOException { 3611 final String operationName = "rename"; 3612 waitForLoadingFSImage(); 3613 FSDirRenameOp.RenameOldResult ret = null; 3614 writeLock(); 3615 try { 3616 checkOperation(OperationCategory.WRITE); 3617 checkNameNodeSafeMode("Cannot rename " + src); 3618 ret = FSDirRenameOp.renameToInt(dir, src, dst, logRetryCache); 3619 } catch (AccessControlException e) { 3620 logAuditEvent(false, operationName, src, dst, null); 3621 throw e; 3622 } finally { 3623 writeUnlock(operationName); 3624 } 3625 boolean success = ret != null && ret.success; 3626 if (success) { 3627 getEditLog().logSync(); 3628 } 3629 logAuditEvent(success, "rename", src, dst, 3630 ret == null ? null : ret.auditStat); 3631 return success; 3632 } 3633 3634 void renameTo(final String src, final String dst, 3635 boolean logRetryCache, Options.Rename... options) 3636 throws IOException { 3637 final String operationName = "rename"; 3638 waitForLoadingFSImage(); 3639 Map.Entry<BlocksMapUpdateInfo, HdfsFileStatus> res = null; 3640 writeLock(); 3641 try { 3642 checkOperation(OperationCategory.WRITE); 3643 checkNameNodeSafeMode("Cannot rename " + src); 3644 res = FSDirRenameOp.renameToInt(dir, src, dst, logRetryCache, options); 3645 } catch (AccessControlException e) { 3646 logAuditEvent(false, operationName + " (options=" + 3647 Arrays.toString(options) + ")", src, dst, null); 3648 throw e; 3649 } finally { 3650 writeUnlock(operationName); 3651 } 3652 3653 getEditLog().logSync(); 3654 3655 BlocksMapUpdateInfo collectedBlocks = res.getKey(); 3656 HdfsFileStatus auditStat = res.getValue(); 3657 if (!collectedBlocks.getToDeleteList().isEmpty()) { 3658 removeBlocks(collectedBlocks); 3659 collectedBlocks.clear(); 3660 } 3661 3662 logAuditEvent(true, operationName + " (options=" + 3663 Arrays.toString(options) + ")", src, dst, auditStat); 3664 } 3665 3666 /** 3667 * Remove the indicated file from namespace. 3668 * 3669 * @see ClientProtocol#delete(String, boolean) for detailed description and 3670 * description of exceptions 3671 */ 3672 boolean delete(String src, boolean recursive, boolean logRetryCache) 3673 throws IOException { 3674 waitForLoadingFSImage(); 3675 final String operationName = "delete"; 3676 BlocksMapUpdateInfo toRemovedBlocks = null; 3677 writeLock(); 3678 boolean ret = false; 3679 try { 3680 checkOperation(OperationCategory.WRITE); 3681 checkNameNodeSafeMode("Cannot delete " + src); 3682 toRemovedBlocks = FSDirDeleteOp.delete( 3683 this, src, recursive, logRetryCache); 3684 ret = toRemovedBlocks != null; 3685 } catch (AccessControlException e) { 3686 logAuditEvent(false, operationName, src); 3687 throw e; 3688 } finally { 3689 writeUnlock(operationName); 3690 } 3691 getEditLog().logSync(); 3692 if (toRemovedBlocks != null) { 3693 removeBlocks(toRemovedBlocks); // Incremental deletion of blocks 3694 } 3695 logAuditEvent(true, operationName, src); 3696 return ret; 3697 } 3698 3699 FSPermissionChecker getPermissionChecker() 3700 throws AccessControlException { 3701 return dir.getPermissionChecker(); 3702 } 3703 3704 /** 3705 * From the given list, incrementally remove the blocks from blockManager 3706 * Writelock is dropped and reacquired every BLOCK_DELETION_INCREMENT to 3707 * ensure that other waiters on the lock can get in. See HDFS-2938 3708 * 3709 * @param blocks 3710 * An instance of {@link BlocksMapUpdateInfo} which contains a list 3711 * of blocks that need to be removed from blocksMap 3712 */ 3713 void removeBlocks(BlocksMapUpdateInfo blocks) { 3714 List<Block> toDeleteList = blocks.getToDeleteList(); 3715 Iterator<Block> iter = toDeleteList.iterator(); 3716 while (iter.hasNext()) { 3717 writeLock(); 3718 try { 3719 for (int i = 0; i < BLOCK_DELETION_INCREMENT && iter.hasNext(); i++) { 3720 blockManager.removeBlock(iter.next()); 3721 } 3722 } finally { 3723 writeUnlock("removeBlocks"); 3724 } 3725 } 3726 } 3727 3728 /** 3729 * Remove leases and inodes related to a given path 3730 * @param src The given path 3731 * @param removedINodes Containing the list of inodes to be removed from 3732 * inodesMap 3733 * @param acquireINodeMapLock Whether to acquire the lock for inode removal 3734 */ 3735 void removeLeasesAndINodes(String src, List<INode> removedINodes, 3736 final boolean acquireINodeMapLock) { 3737 assert hasWriteLock(); 3738 leaseManager.removeLeaseWithPrefixPath(src); 3739 // remove inodes from inodesMap 3740 if (removedINodes != null) { 3741 if (acquireINodeMapLock) { 3742 dir.writeLock(); 3743 } 3744 try { 3745 dir.removeFromInodeMap(removedINodes); 3746 } finally { 3747 if (acquireINodeMapLock) { 3748 dir.writeUnlock(); 3749 } 3750 } 3751 removedINodes.clear(); 3752 } 3753 } 3754 3755 /** 3756 * Removes the blocks from blocksmap and updates the safemode blocks total 3757 * 3758 * @param blocks 3759 * An instance of {@link BlocksMapUpdateInfo} which contains a list 3760 * of blocks that need to be removed from blocksMap 3761 */ 3762 void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) { 3763 assert hasWriteLock(); 3764 // In the case that we are a Standby tailing edits from the 3765 // active while in safe-mode, we need to track the total number 3766 // of blocks and safe blocks in the system. 3767 boolean trackBlockCounts = isSafeModeTrackingBlocks(); 3768 int numRemovedComplete = 0, numRemovedSafe = 0; 3769 3770 for (Block b : blocks.getToDeleteList()) { 3771 if (trackBlockCounts) { 3772 BlockInfoContiguous bi = getStoredBlock(b); 3773 if (bi.isComplete()) { 3774 numRemovedComplete++; 3775 if (bi.numNodes() >= blockManager.minReplication) { 3776 numRemovedSafe++; 3777 } 3778 } 3779 } 3780 blockManager.removeBlock(b); 3781 } 3782 if (trackBlockCounts) { 3783 if (LOG.isDebugEnabled()) { 3784 LOG.debug("Adjusting safe-mode totals for deletion." 3785 + "decreasing safeBlocks by " + numRemovedSafe 3786 + ", totalBlocks by " + numRemovedComplete); 3787 } 3788 adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete); 3789 } 3790 } 3791 3792 /** 3793 * @see SafeModeInfo#shouldIncrementallyTrackBlocks 3794 */ 3795 private boolean isSafeModeTrackingBlocks() { 3796 if (!haEnabled) { 3797 // Never track blocks incrementally in non-HA code. 3798 return false; 3799 } 3800 SafeModeInfo sm = this.safeMode; 3801 return sm != null && sm.shouldIncrementallyTrackBlocks(); 3802 } 3803 3804 /** 3805 * Get the file info for a specific file. 3806 * 3807 * @param src The string representation of the path to the file 3808 * @param resolveLink whether to throw UnresolvedLinkException 3809 * if src refers to a symlink 3810 * 3811 * @throws AccessControlException if access is denied 3812 * @throws UnresolvedLinkException if a symlink is encountered. 3813 * 3814 * @return object containing information regarding the file 3815 * or null if file not found 3816 * @throws StandbyException 3817 */ 3818 HdfsFileStatus getFileInfo(final String src, boolean resolveLink) 3819 throws IOException { 3820 final String operationName = "getfileinfo"; 3821 checkOperation(OperationCategory.READ); 3822 HdfsFileStatus stat = null; 3823 readLock(); 3824 try { 3825 checkOperation(OperationCategory.READ); 3826 stat = FSDirStatAndListingOp.getFileInfo(dir, src, resolveLink); 3827 } catch (AccessControlException e) { 3828 logAuditEvent(false, operationName, src); 3829 throw e; 3830 } finally { 3831 readUnlock(operationName); 3832 } 3833 logAuditEvent(true, operationName, src); 3834 return stat; 3835 } 3836 3837 /** 3838 * Returns true if the file is closed 3839 */ 3840 boolean isFileClosed(final String src) throws IOException { 3841 final String operationName = "isFileClosed"; 3842 checkOperation(OperationCategory.READ); 3843 readLock(); 3844 try { 3845 checkOperation(OperationCategory.READ); 3846 return FSDirStatAndListingOp.isFileClosed(dir, src); 3847 } catch (AccessControlException e) { 3848 logAuditEvent(false, operationName, src); 3849 throw e; 3850 } finally { 3851 readUnlock(operationName); 3852 } 3853 } 3854 3855 /** 3856 * Create all the necessary directories 3857 */ 3858 boolean mkdirs(String src, PermissionStatus permissions, 3859 boolean createParent) throws IOException { 3860 final String operationName = "mkdirs"; 3861 HdfsFileStatus auditStat = null; 3862 checkOperation(OperationCategory.WRITE); 3863 writeLock(); 3864 try { 3865 checkOperation(OperationCategory.WRITE); 3866 checkNameNodeSafeMode("Cannot create directory " + src); 3867 auditStat = FSDirMkdirOp.mkdirs(this, src, permissions, createParent); 3868 } catch (AccessControlException e) { 3869 logAuditEvent(false, operationName, src); 3870 throw e; 3871 } finally { 3872 writeUnlock(operationName); 3873 } 3874 getEditLog().logSync(); 3875 logAuditEvent(true, operationName, src, null, auditStat); 3876 return true; 3877 } 3878 3879 /** 3880 * Get the content summary for a specific file/dir. 3881 * 3882 * @param src The string representation of the path to the file 3883 * 3884 * @throws AccessControlException if access is denied 3885 * @throws UnresolvedLinkException if a symlink is encountered. 3886 * @throws FileNotFoundException if no file exists 3887 * @throws StandbyException 3888 * @throws IOException for issues with writing to the audit log 3889 * 3890 * @return object containing information regarding the file 3891 * or null if file not found 3892 */ 3893 ContentSummary getContentSummary(final String src) throws IOException { 3894 checkOperation(OperationCategory.READ); 3895 final String operationName = "contentSummary"; 3896 readLock(); 3897 boolean success = true; 3898 try { 3899 checkOperation(OperationCategory.READ); 3900 return FSDirStatAndListingOp.getContentSummary(dir, src); 3901 } catch (AccessControlException ace) { 3902 success = false; 3903 throw ace; 3904 } finally { 3905 readUnlock(operationName); 3906 logAuditEvent(success, operationName, src); 3907 } 3908 } 3909 3910 /** 3911 * Set the namespace quota and storage space quota for a directory. 3912 * See {@link ClientProtocol#setQuota(String, long, long, StorageType)} for the 3913 * contract. 3914 * 3915 * Note: This does not support ".inodes" relative path. 3916 */ 3917 void setQuota(String src, long nsQuota, long ssQuota, StorageType type) 3918 throws IOException { 3919 checkOperation(OperationCategory.WRITE); 3920 final String operationName = "setQuota"; 3921 writeLock(); 3922 boolean success = false; 3923 try { 3924 checkOperation(OperationCategory.WRITE); 3925 checkNameNodeSafeMode("Cannot set quota on " + src); 3926 FSDirAttrOp.setQuota(dir, src, nsQuota, ssQuota, type); 3927 success = true; 3928 } finally { 3929 writeUnlock(operationName); 3930 if (success) { 3931 getEditLog().logSync(); 3932 } 3933 logAuditEvent(success, operationName, src); 3934 } 3935 } 3936 3937 /** Persist all metadata about this file. 3938 * @param src The string representation of the path 3939 * @param fileId The inode ID that we're fsyncing. Older clients will pass 3940 * INodeId.GRANDFATHER_INODE_ID here. 3941 * @param clientName The string representation of the client 3942 * @param lastBlockLength The length of the last block 3943 * under construction reported from client. 3944 * @throws IOException if path does not exist 3945 */ 3946 void fsync(String src, long fileId, String clientName, long lastBlockLength) 3947 throws IOException { 3948 NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); 3949 checkOperation(OperationCategory.WRITE); 3950 3951 FSPermissionChecker pc = getPermissionChecker(); 3952 waitForLoadingFSImage(); 3953 writeLock(); 3954 try { 3955 checkOperation(OperationCategory.WRITE); 3956 checkNameNodeSafeMode("Cannot fsync file " + src); 3957 INodesInPath iip = dir.resolvePath(pc, src, fileId); 3958 src = iip.getPath(); 3959 final INodeFile pendingFile = checkLease(iip, clientName, fileId); 3960 if (lastBlockLength > 0) { 3961 pendingFile.getFileUnderConstructionFeature().updateLengthOfLastBlock( 3962 pendingFile, lastBlockLength); 3963 } 3964 persistBlocks(src, pendingFile, false); 3965 } finally { 3966 writeUnlock("fsync"); 3967 } 3968 getEditLog().logSync(); 3969 } 3970 3971 /** 3972 * Move a file that is being written to be immutable. 3973 * @param src The filename 3974 * @param lease The lease for the client creating the file 3975 * @param recoveryLeaseHolder reassign lease to this holder if the last block 3976 * needs recovery; keep current holder if null. 3977 * @throws AlreadyBeingCreatedException if file is waiting to achieve minimal 3978 * replication;<br> 3979 * RecoveryInProgressException if lease recovery is in progress.<br> 3980 * IOException in case of an error. 3981 * @return true if file has been successfully finalized and closed or 3982 * false if block recovery has been initiated. Since the lease owner 3983 * has been changed and logged, caller should call logSync(). 3984 */ 3985 boolean internalReleaseLease(Lease lease, String src, INodesInPath iip, 3986 String recoveryLeaseHolder) throws IOException { 3987 LOG.info("Recovering " + lease + ", src=" + src); 3988 assert !isInSafeMode(); 3989 assert hasWriteLock(); 3990 3991 final INodeFile pendingFile = iip.getLastINode().asFile(); 3992 int nrBlocks = pendingFile.numBlocks(); 3993 BlockInfoContiguous[] blocks = pendingFile.getBlocks(); 3994 3995 int nrCompleteBlocks; 3996 BlockInfoContiguous curBlock = null; 3997 for(nrCompleteBlocks = 0; nrCompleteBlocks < nrBlocks; nrCompleteBlocks++) { 3998 curBlock = blocks[nrCompleteBlocks]; 3999 if(!curBlock.isComplete()) 4000 break; 4001 assert blockManager.checkMinReplication(curBlock) : 4002 "A COMPLETE block is not minimally replicated in " + src; 4003 } 4004 4005 // If there are no incomplete blocks associated with this file, 4006 // then reap lease immediately and close the file. 4007 if(nrCompleteBlocks == nrBlocks) { 4008 finalizeINodeFileUnderConstruction(src, pendingFile, 4009 iip.getLatestSnapshotId()); 4010 NameNode.stateChangeLog.warn("BLOCK*" 4011 + " internalReleaseLease: All existing blocks are COMPLETE," 4012 + " lease removed, file closed."); 4013 return true; // closed! 4014 } 4015 4016 // Only the last and the penultimate blocks may be in non COMPLETE state. 4017 // If the penultimate block is not COMPLETE, then it must be COMMITTED. 4018 if(nrCompleteBlocks < nrBlocks - 2 || 4019 nrCompleteBlocks == nrBlocks - 2 && 4020 curBlock != null && 4021 curBlock.getBlockUCState() != BlockUCState.COMMITTED) { 4022 final String message = "DIR* NameSystem.internalReleaseLease: " 4023 + "attempt to release a create lock on " 4024 + src + " but file is already closed."; 4025 NameNode.stateChangeLog.warn(message); 4026 throw new IOException(message); 4027 } 4028 4029 // The last block is not COMPLETE, and 4030 // that the penultimate block if exists is either COMPLETE or COMMITTED 4031 final BlockInfoContiguous lastBlock = pendingFile.getLastBlock(); 4032 BlockUCState lastBlockState = lastBlock.getBlockUCState(); 4033 BlockInfoContiguous penultimateBlock = pendingFile.getPenultimateBlock(); 4034 4035 // If penultimate block doesn't exist then its minReplication is met 4036 boolean penultimateBlockMinReplication = penultimateBlock == null ? true : 4037 blockManager.checkMinReplication(penultimateBlock); 4038 4039 switch(lastBlockState) { 4040 case COMPLETE: 4041 assert false : "Already checked that the last block is incomplete"; 4042 break; 4043 case COMMITTED: 4044 // Close file if committed blocks are minimally replicated 4045 if(penultimateBlockMinReplication && 4046 blockManager.checkMinReplication(lastBlock)) { 4047 finalizeINodeFileUnderConstruction(src, pendingFile, 4048 iip.getLatestSnapshotId()); 4049 NameNode.stateChangeLog.warn("BLOCK*" 4050 + " internalReleaseLease: Committed blocks are minimally replicated," 4051 + " lease removed, file closed."); 4052 return true; // closed! 4053 } 4054 // Cannot close file right now, since some blocks 4055 // are not yet minimally replicated. 4056 // This may potentially cause infinite loop in lease recovery 4057 // if there are no valid replicas on data-nodes. 4058 String message = "DIR* NameSystem.internalReleaseLease: " + 4059 "Failed to release lease for file " + src + 4060 ". Committed blocks are waiting to be minimally replicated." + 4061 " Try again later."; 4062 NameNode.stateChangeLog.warn(message); 4063 throw new AlreadyBeingCreatedException(message); 4064 case UNDER_CONSTRUCTION: 4065 case UNDER_RECOVERY: 4066 final BlockInfoContiguousUnderConstruction uc = (BlockInfoContiguousUnderConstruction)lastBlock; 4067 // determine if last block was intended to be truncated 4068 Block recoveryBlock = uc.getTruncateBlock(); 4069 boolean truncateRecovery = recoveryBlock != null; 4070 boolean copyOnTruncate = truncateRecovery && 4071 recoveryBlock.getBlockId() != uc.getBlockId(); 4072 assert !copyOnTruncate || 4073 recoveryBlock.getBlockId() < uc.getBlockId() && 4074 recoveryBlock.getGenerationStamp() < uc.getGenerationStamp() && 4075 recoveryBlock.getNumBytes() > uc.getNumBytes() : 4076 "wrong recoveryBlock"; 4077 4078 // setup the last block locations from the blockManager if not known 4079 if (uc.getNumExpectedLocations() == 0) { 4080 uc.setExpectedLocations(blockManager.getStorages(lastBlock)); 4081 } 4082 4083 if (uc.getNumExpectedLocations() == 0 && uc.getNumBytes() == 0) { 4084 // There is no datanode reported to this block. 4085 // may be client have crashed before writing data to pipeline. 4086 // This blocks doesn't need any recovery. 4087 // We can remove this block and close the file. 4088 pendingFile.removeLastBlock(lastBlock); 4089 finalizeINodeFileUnderConstruction(src, pendingFile, 4090 iip.getLatestSnapshotId()); 4091 NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: " 4092 + "Removed empty last block and closed file."); 4093 return true; 4094 } 4095 // start recovery of the last block for this file 4096 long blockRecoveryId = nextGenerationStamp(blockIdManager.isLegacyBlock(uc)); 4097 lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile); 4098 if(copyOnTruncate) { 4099 uc.setGenerationStamp(blockRecoveryId); 4100 } else if(truncateRecovery) { 4101 recoveryBlock.setGenerationStamp(blockRecoveryId); 4102 } 4103 uc.initializeBlockRecovery(blockRecoveryId); 4104 leaseManager.renewLease(lease); 4105 // Cannot close file right now, since the last block requires recovery. 4106 // This may potentially cause infinite loop in lease recovery 4107 // if there are no valid replicas on data-nodes. 4108 NameNode.stateChangeLog.warn( 4109 "DIR* NameSystem.internalReleaseLease: " + 4110 "File " + src + " has not been closed." + 4111 " Lease recovery is in progress. " + 4112 "RecoveryId = " + blockRecoveryId + " for block " + lastBlock); 4113 break; 4114 } 4115 return false; 4116 } 4117 4118 private Lease reassignLease(Lease lease, String src, String newHolder, 4119 INodeFile pendingFile) { 4120 assert hasWriteLock(); 4121 if(newHolder == null) 4122 return lease; 4123 // The following transaction is not synced. Make sure it's sync'ed later. 4124 logReassignLease(lease.getHolder(), src, newHolder); 4125 return reassignLeaseInternal(lease, src, newHolder, pendingFile); 4126 } 4127 4128 Lease reassignLeaseInternal(Lease lease, String src, String newHolder, 4129 INodeFile pendingFile) { 4130 assert hasWriteLock(); 4131 pendingFile.getFileUnderConstructionFeature().setClientName(newHolder); 4132 return leaseManager.reassignLease(lease, src, newHolder); 4133 } 4134 4135 private void commitOrCompleteLastBlock(final INodeFile fileINode, 4136 final INodesInPath iip, final Block commitBlock) throws IOException { 4137 assert hasWriteLock(); 4138 Preconditions.checkArgument(fileINode.isUnderConstruction()); 4139 blockManager.commitOrCompleteLastBlock(fileINode, commitBlock, iip); 4140 } 4141 4142 private void finalizeINodeFileUnderConstruction(String src, 4143 INodeFile pendingFile, int latestSnapshot) throws IOException { 4144 assert hasWriteLock(); 4145 4146 FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature(); 4147 if (uc == null) { 4148 throw new IOException("Cannot finalize file " + src 4149 + " because it is not under construction"); 4150 } 4151 4152 pendingFile.recordModification(latestSnapshot); 4153 4154 // The file is no longer pending. 4155 // Create permanent INode, update blocks. No need to replace the inode here 4156 // since we just remove the uc feature from pendingFile 4157 pendingFile.toCompleteFile(now()); 4158 4159 leaseManager.removeLease(uc.getClientName(), src); 4160 4161 waitForLoadingFSImage(); 4162 // close file and persist block allocations for this file 4163 closeFile(src, pendingFile); 4164 4165 blockManager.checkReplication(pendingFile); 4166 } 4167 4168 @VisibleForTesting 4169 BlockInfoContiguous getStoredBlock(Block block) { 4170 return blockManager.getStoredBlock(block); 4171 } 4172 4173 @Override 4174 public boolean isInSnapshot(BlockInfoContiguousUnderConstruction blockUC) { 4175 assert hasReadLock(); 4176 final BlockCollection bc = blockUC.getBlockCollection(); 4177 if (bc == null || !(bc instanceof INodeFile) 4178 || !bc.isUnderConstruction()) { 4179 return false; 4180 } 4181 4182 String fullName = bc.getName(); 4183 try { 4184 if (fullName != null && fullName.startsWith(Path.SEPARATOR) 4185 && dir.getINode(fullName) == bc) { 4186 // If file exists in normal path then no need to look in snapshot 4187 return false; 4188 } 4189 } catch (UnresolvedLinkException e) { 4190 LOG.error("Error while resolving the link : " + fullName, e); 4191 return false; 4192 } 4193 /* 4194 * 1. if bc is under construction and also with snapshot, and 4195 * bc is not in the current fsdirectory tree, bc must represent a snapshot 4196 * file. 4197 * 2. if fullName is not an absolute path, bc cannot be existent in the 4198 * current fsdirectory tree. 4199 * 3. if bc is not the current node associated with fullName, bc must be a 4200 * snapshot inode. 4201 */ 4202 return true; 4203 } 4204 4205 void commitBlockSynchronization(ExtendedBlock oldBlock, 4206 long newgenerationstamp, long newlength, 4207 boolean closeFile, boolean deleteblock, DatanodeID[] newtargets, 4208 String[] newtargetstorages) throws IOException { 4209 LOG.info("commitBlockSynchronization(oldBlock=" + oldBlock 4210 + ", newgenerationstamp=" + newgenerationstamp 4211 + ", newlength=" + newlength 4212 + ", newtargets=" + Arrays.asList(newtargets) 4213 + ", closeFile=" + closeFile 4214 + ", deleteBlock=" + deleteblock 4215 + ")"); 4216 checkOperation(OperationCategory.WRITE); 4217 final String src; 4218 waitForLoadingFSImage(); 4219 writeLock(); 4220 try { 4221 checkOperation(OperationCategory.WRITE); 4222 // If a DN tries to commit to the standby, the recovery will 4223 // fail, and the next retry will succeed on the new NN. 4224 4225 checkNameNodeSafeMode( 4226 "Cannot commitBlockSynchronization while in safe mode"); 4227 final BlockInfoContiguous storedBlock = getStoredBlock( 4228 ExtendedBlock.getLocalBlock(oldBlock)); 4229 if (storedBlock == null) { 4230 if (deleteblock) { 4231 // This may be a retry attempt so ignore the failure 4232 // to locate the block. 4233 if (LOG.isDebugEnabled()) { 4234 LOG.debug("Block (=" + oldBlock + ") not found"); 4235 } 4236 return; 4237 } else { 4238 throw new IOException("Block (=" + oldBlock + ") not found"); 4239 } 4240 } 4241 final long oldGenerationStamp = storedBlock.getGenerationStamp(); 4242 final long oldNumBytes = storedBlock.getNumBytes(); 4243 // 4244 // The implementation of delete operation (see @deleteInternal method) 4245 // first removes the file paths from namespace, and delays the removal 4246 // of blocks to later time for better performance. When 4247 // commitBlockSynchronization (this method) is called in between, the 4248 // blockCollection of storedBlock could have been assigned to null by 4249 // the delete operation, throw IOException here instead of NPE; if the 4250 // file path is already removed from namespace by the delete operation, 4251 // throw FileNotFoundException here, so not to proceed to the end of 4252 // this method to add a CloseOp to the edit log for an already deleted 4253 // file (See HDFS-6825). 4254 // 4255 BlockCollection blockCollection = storedBlock.getBlockCollection(); 4256 if (blockCollection == null) { 4257 throw new IOException("The blockCollection of " + storedBlock 4258 + " is null, likely because the file owning this block was" 4259 + " deleted and the block removal is delayed"); 4260 } 4261 INodeFile iFile = ((INode)blockCollection).asFile(); 4262 src = iFile.getFullPathName(); 4263 if (isFileDeleted(iFile)) { 4264 throw new FileNotFoundException("File not found: " 4265 + src + ", likely due to delayed block removal"); 4266 } 4267 if ((!iFile.isUnderConstruction() || storedBlock.isComplete()) && 4268 iFile.getLastBlock().isComplete()) { 4269 if (LOG.isDebugEnabled()) { 4270 LOG.debug("Unexpected block (=" + oldBlock 4271 + ") since the file (=" + iFile.getLocalName() 4272 + ") is not under construction"); 4273 } 4274 return; 4275 } 4276 4277 BlockInfoContiguousUnderConstruction truncatedBlock = 4278 (BlockInfoContiguousUnderConstruction) iFile.getLastBlock(); 4279 long recoveryId = truncatedBlock.getBlockRecoveryId(); 4280 boolean copyTruncate = 4281 truncatedBlock.getBlockId() != storedBlock.getBlockId(); 4282 if(recoveryId != newgenerationstamp) { 4283 throw new IOException("The recovery id " + newgenerationstamp 4284 + " does not match current recovery id " 4285 + recoveryId + " for block " + oldBlock); 4286 } 4287 4288 if (deleteblock) { 4289 Block blockToDel = ExtendedBlock.getLocalBlock(oldBlock); 4290 boolean remove = iFile.removeLastBlock(blockToDel); 4291 if (remove) { 4292 blockManager.removeBlock(storedBlock); 4293 } 4294 } 4295 else { 4296 // update last block 4297 if(!copyTruncate) { 4298 storedBlock.setGenerationStamp(newgenerationstamp); 4299 storedBlock.setNumBytes(newlength); 4300 } 4301 4302 // find the DatanodeDescriptor objects 4303 ArrayList<DatanodeDescriptor> trimmedTargets = 4304 new ArrayList<DatanodeDescriptor>(newtargets.length); 4305 ArrayList<String> trimmedStorages = 4306 new ArrayList<String>(newtargets.length); 4307 if (newtargets.length > 0) { 4308 for (int i = 0; i < newtargets.length; ++i) { 4309 // try to get targetNode 4310 DatanodeDescriptor targetNode = 4311 blockManager.getDatanodeManager().getDatanode(newtargets[i]); 4312 if (targetNode != null) { 4313 trimmedTargets.add(targetNode); 4314 trimmedStorages.add(newtargetstorages[i]); 4315 } else if (LOG.isDebugEnabled()) { 4316 LOG.debug("DatanodeDescriptor (=" + newtargets[i] + ") not found"); 4317 } 4318 } 4319 } 4320 if ((closeFile) && !trimmedTargets.isEmpty()) { 4321 // the file is getting closed. Insert block locations into blockManager. 4322 // Otherwise fsck will report these blocks as MISSING, especially if the 4323 // blocksReceived from Datanodes take a long time to arrive. 4324 for (int i = 0; i < trimmedTargets.size(); i++) { 4325 DatanodeStorageInfo storageInfo = 4326 trimmedTargets.get(i).getStorageInfo(trimmedStorages.get(i)); 4327 if (storageInfo != null) { 4328 if(copyTruncate) { 4329 storageInfo.addBlock(truncatedBlock); 4330 } else { 4331 storageInfo.addBlock(storedBlock); 4332 } 4333 } 4334 } 4335 } 4336 4337 // add pipeline locations into the INodeUnderConstruction 4338 DatanodeStorageInfo[] trimmedStorageInfos = 4339 blockManager.getDatanodeManager().getDatanodeStorageInfos( 4340 trimmedTargets.toArray(new DatanodeID[trimmedTargets.size()]), 4341 trimmedStorages.toArray(new String[trimmedStorages.size()]), 4342 "src=%s, oldBlock=%s, newgenerationstamp=%d, newlength=%d", 4343 src, oldBlock, newgenerationstamp, newlength); 4344 4345 if(copyTruncate) { 4346 iFile.setLastBlock(truncatedBlock, trimmedStorageInfos); 4347 } else { 4348 iFile.setLastBlock(storedBlock, trimmedStorageInfos); 4349 if (closeFile) { 4350 blockManager.markBlockReplicasAsCorrupt(storedBlock, 4351 oldGenerationStamp, oldNumBytes, trimmedStorageInfos); 4352 } 4353 } 4354 } 4355 4356 if (closeFile) { 4357 if(copyTruncate) { 4358 closeFileCommitBlocks(src, iFile, truncatedBlock); 4359 if(!iFile.isBlockInLatestSnapshot(storedBlock)) { 4360 blockManager.removeBlock(storedBlock); 4361 } 4362 } else { 4363 closeFileCommitBlocks(src, iFile, storedBlock); 4364 } 4365 } else { 4366 // If this commit does not want to close the file, persist blocks 4367 persistBlocks(src, iFile, false); 4368 } 4369 } finally { 4370 writeUnlock("commitBlockSynchronization"); 4371 } 4372 getEditLog().logSync(); 4373 if (closeFile) { 4374 LOG.info("commitBlockSynchronization(oldBlock=" + oldBlock 4375 + ", file=" + src 4376 + ", newgenerationstamp=" + newgenerationstamp 4377 + ", newlength=" + newlength 4378 + ", newtargets=" + Arrays.asList(newtargets) + ") successful"); 4379 } else { 4380 LOG.info("commitBlockSynchronization(" + oldBlock + ") successful"); 4381 } 4382 } 4383 4384 /** 4385 * @param pendingFile open file that needs to be closed 4386 * @param storedBlock last block 4387 * @throws IOException on error 4388 */ 4389 @VisibleForTesting 4390 void closeFileCommitBlocks(String src, INodeFile pendingFile, 4391 BlockInfoContiguous storedBlock) throws IOException { 4392 final INodesInPath iip = INodesInPath.fromINode(pendingFile); 4393 4394 // commit the last block and complete it if it has minimum replicas 4395 commitOrCompleteLastBlock(pendingFile, iip, storedBlock); 4396 4397 //remove lease, close file 4398 finalizeINodeFileUnderConstruction(src, pendingFile, 4399 Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID)); 4400 } 4401 4402 /** 4403 * Renew the lease(s) held by the given client 4404 */ 4405 void renewLease(String holder) throws IOException { 4406 checkOperation(OperationCategory.WRITE); 4407 readLock(); 4408 try { 4409 checkOperation(OperationCategory.WRITE); 4410 checkNameNodeSafeMode("Cannot renew lease for " + holder); 4411 leaseManager.renewLease(holder); 4412 } finally { 4413 readUnlock("renewLease"); 4414 } 4415 } 4416 4417 /** 4418 * Get a partial listing of the indicated directory 4419 * 4420 * @param src the directory name 4421 * @param startAfter the name to start after 4422 * @param needLocation if blockLocations need to be returned 4423 * @return a partial listing starting after startAfter 4424 * 4425 * @throws AccessControlException if access is denied 4426 * @throws UnresolvedLinkException if symbolic link is encountered 4427 * @throws IOException if other I/O error occurred 4428 */ 4429 DirectoryListing getListing(String src, byte[] startAfter, 4430 boolean needLocation) 4431 throws IOException { 4432 checkOperation(OperationCategory.READ); 4433 final String operationName = "listStatus"; 4434 DirectoryListing dl = null; 4435 readLock(); 4436 try { 4437 checkOperation(NameNode.OperationCategory.READ); 4438 dl = FSDirStatAndListingOp.getListingInt(dir, src, startAfter, 4439 needLocation); 4440 } catch (AccessControlException e) { 4441 logAuditEvent(false, operationName, src); 4442 throw e; 4443 } finally { 4444 readUnlock(operationName); 4445 } 4446 logAuditEvent(true, operationName, src); 4447 return dl; 4448 } 4449 4450 ///////////////////////////////////////////////////////// 4451 // 4452 // These methods are called by datanodes 4453 // 4454 ///////////////////////////////////////////////////////// 4455 /** 4456 * Register Datanode. 4457 * <p> 4458 * The purpose of registration is to identify whether the new datanode 4459 * serves a new data storage, and will report new data block copies, 4460 * which the namenode was not aware of; or the datanode is a replacement 4461 * node for the data storage that was previously served by a different 4462 * or the same (in terms of host:port) datanode. 4463 * The data storages are distinguished by their storageIDs. When a new 4464 * data storage is reported the namenode issues a new unique storageID. 4465 * <p> 4466 * Finally, the namenode returns its namespaceID as the registrationID 4467 * for the datanodes. 4468 * namespaceID is a persistent attribute of the name space. 4469 * The registrationID is checked every time the datanode is communicating 4470 * with the namenode. 4471 * Datanodes with inappropriate registrationID are rejected. 4472 * If the namenode stops, and then restarts it can restore its 4473 * namespaceID and will continue serving the datanodes that has previously 4474 * registered with the namenode without restarting the whole cluster. 4475 * 4476 * @see org.apache.hadoop.hdfs.server.datanode.DataNode 4477 */ 4478 void registerDatanode(DatanodeRegistration nodeReg) throws IOException { 4479 writeLock(); 4480 try { 4481 getBlockManager().getDatanodeManager().registerDatanode(nodeReg); 4482 checkSafeMode(); 4483 } finally { 4484 writeUnlock("registerDatanode"); 4485 } 4486 } 4487 4488 /** 4489 * Get registrationID for datanodes based on the namespaceID. 4490 * 4491 * @see #registerDatanode(DatanodeRegistration) 4492 * @return registration ID 4493 */ 4494 String getRegistrationID() { 4495 return Storage.getRegistrationID(getFSImage().getStorage()); 4496 } 4497 4498 /** 4499 * The given node has reported in. This method should: 4500 * 1) Record the heartbeat, so the datanode isn't timed out 4501 * 2) Adjust usage stats for future block allocation 4502 * 4503 * If a substantial amount of time passed since the last datanode 4504 * heartbeat then request an immediate block report. 4505 * 4506 * @return an array of datanode commands 4507 * @throws IOException 4508 */ 4509 HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, 4510 StorageReport[] reports, long cacheCapacity, long cacheUsed, 4511 int xceiverCount, int xmitsInProgress, int failedVolumes, 4512 VolumeFailureSummary volumeFailureSummary) throws IOException { 4513 readLock(); 4514 try { 4515 //get datanode commands 4516 final int maxTransfer = blockManager.getMaxReplicationStreams() 4517 - xmitsInProgress; 4518 DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat( 4519 nodeReg, reports, blockPoolId, cacheCapacity, cacheUsed, 4520 xceiverCount, maxTransfer, failedVolumes, volumeFailureSummary); 4521 4522 //create ha status 4523 final NNHAStatusHeartbeat haState = new NNHAStatusHeartbeat( 4524 haContext.getState().getServiceState(), 4525 getFSImage().getCorrectLastAppliedOrWrittenTxId()); 4526 4527 return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo); 4528 } finally { 4529 readUnlock("handleHeartbeat"); 4530 } 4531 } 4532 4533 /** 4534 * Returns whether or not there were available resources at the last check of 4535 * resources. 4536 * 4537 * @return true if there were sufficient resources available, false otherwise. 4538 */ 4539 boolean nameNodeHasResourcesAvailable() { 4540 return hasResourcesAvailable; 4541 } 4542 4543 /** 4544 * Perform resource checks and cache the results. 4545 */ 4546 void checkAvailableResources() { 4547 Preconditions.checkState(nnResourceChecker != null, 4548 "nnResourceChecker not initialized"); 4549 hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace(); 4550 } 4551 4552 /** 4553 * Persist the block list for the inode. 4554 * @param path 4555 * @param file 4556 * @param logRetryCache 4557 */ 4558 private void persistBlocks(String path, INodeFile file, 4559 boolean logRetryCache) { 4560 assert hasWriteLock(); 4561 Preconditions.checkArgument(file.isUnderConstruction()); 4562 getEditLog().logUpdateBlocks(path, file, logRetryCache); 4563 NameNode.stateChangeLog.debug("persistBlocks: {} with {} blocks is" + 4564 " peristed to the file system", path, file.getBlocks().length); 4565 } 4566 4567 /** 4568 * Close file. 4569 * @param path 4570 * @param file 4571 */ 4572 private void closeFile(String path, INodeFile file) { 4573 assert hasWriteLock(); 4574 waitForLoadingFSImage(); 4575 // file is closed 4576 getEditLog().logCloseFile(path, file); 4577 NameNode.stateChangeLog.debug("closeFile: {} with {} blocks is persisted" + 4578 " to the file system", path, file.getBlocks().length); 4579 } 4580 4581 /** 4582 * Periodically calls hasAvailableResources of NameNodeResourceChecker, and if 4583 * there are found to be insufficient resources available, causes the NN to 4584 * enter safe mode. If resources are later found to have returned to 4585 * acceptable levels, this daemon will cause the NN to exit safe mode. 4586 */ 4587 class NameNodeResourceMonitor implements Runnable { 4588 boolean shouldNNRmRun = true; 4589 @Override 4590 public void run () { 4591 try { 4592 while (fsRunning && shouldNNRmRun) { 4593 checkAvailableResources(); 4594 if(!nameNodeHasResourcesAvailable()) { 4595 String lowResourcesMsg = "NameNode low on available disk space. "; 4596 if (!isInSafeMode()) { 4597 LOG.warn(lowResourcesMsg + "Entering safe mode."); 4598 } else { 4599 LOG.warn(lowResourcesMsg + "Already in safe mode."); 4600 } 4601 enterSafeMode(true); 4602 } 4603 try { 4604 Thread.sleep(resourceRecheckInterval); 4605 } catch (InterruptedException ie) { 4606 // Deliberately ignore 4607 } 4608 } 4609 } catch (Exception e) { 4610 FSNamesystem.LOG.error("Exception in NameNodeResourceMonitor: ", e); 4611 } 4612 } 4613 4614 public void stopMonitor() { 4615 shouldNNRmRun = false; 4616 } 4617 } 4618 4619 class NameNodeEditLogRoller implements Runnable { 4620 4621 private boolean shouldRun = true; 4622 private final long rollThreshold; 4623 private final long sleepIntervalMs; 4624 4625 public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) { 4626 this.rollThreshold = rollThreshold; 4627 this.sleepIntervalMs = sleepIntervalMs; 4628 } 4629 4630 @Override 4631 public void run() { 4632 while (fsRunning && shouldRun) { 4633 try { 4634 FSEditLog editLog = getFSImage().getEditLog(); 4635 long numEdits = 4636 editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId(); 4637 if (numEdits > rollThreshold) { 4638 FSNamesystem.LOG.info("NameNode rolling its own edit log because" 4639 + " number of edits in open segment exceeds threshold of " 4640 + rollThreshold); 4641 rollEditLog(); 4642 } 4643 } catch (Exception e) { 4644 FSNamesystem.LOG.error("Swallowing exception in " 4645 + NameNodeEditLogRoller.class.getSimpleName() + ":", e); 4646 } 4647 try { 4648 Thread.sleep(sleepIntervalMs); 4649 } catch (InterruptedException e) { 4650 FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName() 4651 + " was interrupted, exiting"); 4652 break; 4653 } 4654 } 4655 } 4656 4657 public void stop() { 4658 shouldRun = false; 4659 } 4660 } 4661 4662 /** 4663 * Daemon to periodically scan the namespace for lazyPersist files 4664 * with missing blocks and unlink them. 4665 */ 4666 class LazyPersistFileScrubber implements Runnable { 4667 private volatile boolean shouldRun = true; 4668 final int scrubIntervalSec; 4669 public LazyPersistFileScrubber(final int scrubIntervalSec) { 4670 this.scrubIntervalSec = scrubIntervalSec; 4671 } 4672 4673 /** 4674 * Periodically go over the list of lazyPersist files with missing 4675 * blocks and unlink them from the namespace. 4676 */ 4677 private void clearCorruptLazyPersistFiles() 4678 throws IOException { 4679 4680 BlockStoragePolicy lpPolicy = blockManager.getStoragePolicy("LAZY_PERSIST"); 4681 4682 List<BlockCollection> filesToDelete = new ArrayList<>(); 4683 boolean changed = false; 4684 writeLock(); 4685 try { 4686 final Iterator<Block> it = blockManager.getCorruptReplicaBlockIterator(); 4687 4688 while (it.hasNext()) { 4689 Block b = it.next(); 4690 BlockInfoContiguous blockInfo = blockManager.getStoredBlock(b); 4691 if (blockInfo == null) { 4692 LOG.info("Cannot find block info for block " + b); 4693 } else { 4694 if (blockInfo.getBlockCollection().getStoragePolicyID() 4695 == lpPolicy.getId()) { 4696 filesToDelete.add(blockInfo.getBlockCollection()); 4697 } 4698 } 4699 } 4700 4701 for (BlockCollection bc : filesToDelete) { 4702 LOG.warn("Removing lazyPersist file " + bc.getName() + " with no replicas."); 4703 BlocksMapUpdateInfo toRemoveBlocks = 4704 FSDirDeleteOp.deleteInternal( 4705 FSNamesystem.this, bc.getName(), 4706 INodesInPath.fromINode((INodeFile) bc), false); 4707 changed |= toRemoveBlocks != null; 4708 if (toRemoveBlocks != null) { 4709 removeBlocks(toRemoveBlocks); // Incremental deletion of blocks 4710 } 4711 } 4712 } finally { 4713 writeUnlock("clearCorruptLazyPersistFiles"); 4714 } 4715 if (changed) { 4716 getEditLog().logSync(); 4717 } 4718 } 4719 4720 @Override 4721 public void run() { 4722 while (fsRunning && shouldRun) { 4723 try { 4724 clearCorruptLazyPersistFiles(); 4725 } catch (Exception e) { 4726 FSNamesystem.LOG.error( 4727 "Ignoring exception in LazyPersistFileScrubber:", e); 4728 } 4729 4730 try { 4731 Thread.sleep(scrubIntervalSec * 1000); 4732 } catch (InterruptedException e) { 4733 FSNamesystem.LOG.info( 4734 "LazyPersistFileScrubber was interrupted, exiting"); 4735 break; 4736 } 4737 } 4738 } 4739 4740 public void stop() { 4741 shouldRun = false; 4742 } 4743 } 4744 4745 public FSImage getFSImage() { 4746 return fsImage; 4747 } 4748 4749 public FSEditLog getEditLog() { 4750 return getFSImage().getEditLog(); 4751 } 4752 4753 private void checkBlock(ExtendedBlock block) throws IOException { 4754 if (block != null && !this.blockPoolId.equals(block.getBlockPoolId())) { 4755 throw new IOException("Unexpected BlockPoolId " + block.getBlockPoolId() 4756 + " - expected " + blockPoolId); 4757 } 4758 } 4759 4760 @Metric({"MissingBlocks", "Number of missing blocks"}) 4761 public long getMissingBlocksCount() { 4762 // not locking 4763 return blockManager.getMissingBlocksCount(); 4764 } 4765 4766 @Metric({"MissingReplOneBlocks", "Number of missing blocks " + 4767 "with replication factor 1"}) 4768 public long getMissingReplOneBlocksCount() { 4769 // not locking 4770 return blockManager.getMissingReplOneBlocksCount(); 4771 } 4772 4773 @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"}) 4774 public int getExpiredHeartbeats() { 4775 return datanodeStatistics.getExpiredHeartbeats(); 4776 } 4777 4778 @Metric({"TransactionsSinceLastCheckpoint", 4779 "Number of transactions since last checkpoint"}) 4780 public long getTransactionsSinceLastCheckpoint() { 4781 return getEditLog().getLastWrittenTxIdWithoutLock() - 4782 getFSImage().getStorage().getMostRecentCheckpointTxId(); 4783 } 4784 4785 @Metric({"TransactionsSinceLastLogRoll", 4786 "Number of transactions since last edit log roll"}) 4787 public long getTransactionsSinceLastLogRoll() { 4788 if (isInStandbyState() || !getEditLog().isSegmentOpenWithoutLock()) { 4789 return 0; 4790 } else { 4791 return getEditLog().getLastWrittenTxIdWithoutLock() - 4792 getEditLog().getCurSegmentTxIdWithoutLock() + 1; 4793 } 4794 } 4795 4796 @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"}) 4797 public long getLastWrittenTransactionId() { 4798 return getEditLog().getLastWrittenTxIdWithoutLock(); 4799 } 4800 4801 @Metric({"LastCheckpointTime", 4802 "Time in milliseconds since the epoch of the last checkpoint"}) 4803 public long getLastCheckpointTime() { 4804 return getFSImage().getStorage().getMostRecentCheckpointTime(); 4805 } 4806 4807 /** @see ClientProtocol#getStats() */ 4808 long[] getStats() { 4809 final long[] stats = datanodeStatistics.getStats(); 4810 stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks(); 4811 stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks(); 4812 stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount(); 4813 stats[ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX] = 4814 getMissingReplOneBlocksCount(); 4815 return stats; 4816 } 4817 4818 @Override // FSNamesystemMBean 4819 @Metric({"CapacityTotal", 4820 "Total raw capacity of data nodes in bytes"}) 4821 public long getCapacityTotal() { 4822 return datanodeStatistics.getCapacityTotal(); 4823 } 4824 4825 @Metric({"CapacityTotalGB", 4826 "Total raw capacity of data nodes in GB"}) 4827 public float getCapacityTotalGB() { 4828 return DFSUtil.roundBytesToGB(getCapacityTotal()); 4829 } 4830 4831 @Override // FSNamesystemMBean 4832 @Metric({"CapacityUsed", 4833 "Total used capacity across all data nodes in bytes"}) 4834 public long getCapacityUsed() { 4835 return datanodeStatistics.getCapacityUsed(); 4836 } 4837 4838 @Metric({"CapacityUsedGB", 4839 "Total used capacity across all data nodes in GB"}) 4840 public float getCapacityUsedGB() { 4841 return DFSUtil.roundBytesToGB(getCapacityUsed()); 4842 } 4843 4844 @Override // FSNamesystemMBean 4845 @Metric({"CapacityRemaining", "Remaining capacity in bytes"}) 4846 public long getCapacityRemaining() { 4847 return datanodeStatistics.getCapacityRemaining(); 4848 } 4849 4850 @Metric({"CapacityRemainingGB", "Remaining capacity in GB"}) 4851 public float getCapacityRemainingGB() { 4852 return DFSUtil.roundBytesToGB(getCapacityRemaining()); 4853 } 4854 4855 @Metric({"CapacityUsedNonDFS", 4856 "Total space used by data nodes for non DFS purposes in bytes"}) 4857 public long getCapacityUsedNonDFS() { 4858 return datanodeStatistics.getCapacityUsedNonDFS(); 4859 } 4860 4861 /** 4862 * Total number of connections. 4863 */ 4864 @Override // FSNamesystemMBean 4865 @Metric 4866 public int getTotalLoad() { 4867 return datanodeStatistics.getXceiverCount(); 4868 } 4869 4870 @Metric({ "SnapshottableDirectories", "Number of snapshottable directories" }) 4871 public int getNumSnapshottableDirs() { 4872 return this.snapshotManager.getNumSnapshottableDirs(); 4873 } 4874 4875 @Metric({ "Snapshots", "The number of snapshots" }) 4876 public int getNumSnapshots() { 4877 return this.snapshotManager.getNumSnapshots(); 4878 } 4879 4880 @Override 4881 public String getSnapshotStats() { 4882 Map<String, Object> info = new HashMap<String, Object>(); 4883 info.put("SnapshottableDirectories", this.getNumSnapshottableDirs()); 4884 info.put("Snapshots", this.getNumSnapshots()); 4885 return JSON.toString(info); 4886 } 4887 4888 @Override // FSNamesystemMBean 4889 @Metric({ "NumEncryptionZones", "The number of encryption zones" }) 4890 public int getNumEncryptionZones() { 4891 return dir.ezManager.getNumEncryptionZones(); 4892 } 4893 4894 /** 4895 * Returns the length of the wait Queue for the FSNameSystemLock. 4896 * 4897 * A larger number here indicates lots of threads are waiting for 4898 * FSNameSystemLock. 4899 * 4900 * @return int - Number of Threads waiting to acquire FSNameSystemLock 4901 */ 4902 @Override 4903 @Metric({"LockQueueLength", "Number of threads waiting to " + 4904 "acquire FSNameSystemLock"}) 4905 public int getFsLockQueueLength() { 4906 return fsLock.getQueueLength(); 4907 } 4908 4909 int getNumberOfDatanodes(DatanodeReportType type) { 4910 readLock(); 4911 try { 4912 return getBlockManager().getDatanodeManager().getDatanodeListForReport( 4913 type).size(); 4914 } finally { 4915 readUnlock("getNumberOfDatanodes"); 4916 } 4917 } 4918 4919 DatanodeInfo[] datanodeReport(final DatanodeReportType type 4920 ) throws AccessControlException, StandbyException { 4921 checkSuperuserPrivilege(); 4922 checkOperation(OperationCategory.UNCHECKED); 4923 readLock(); 4924 try { 4925 checkOperation(OperationCategory.UNCHECKED); 4926 final DatanodeManager dm = getBlockManager().getDatanodeManager(); 4927 final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type); 4928 4929 DatanodeInfo[] arr = new DatanodeInfo[results.size()]; 4930 for (int i=0; i<arr.length; i++) { 4931 arr[i] = new DatanodeInfo(results.get(i)); 4932 } 4933 return arr; 4934 } finally { 4935 readUnlock("datanodeReport"); 4936 } 4937 } 4938 4939 DatanodeStorageReport[] getDatanodeStorageReport(final DatanodeReportType type 4940 ) throws AccessControlException, StandbyException { 4941 checkSuperuserPrivilege(); 4942 checkOperation(OperationCategory.UNCHECKED); 4943 readLock(); 4944 try { 4945 checkOperation(OperationCategory.UNCHECKED); 4946 final DatanodeManager dm = getBlockManager().getDatanodeManager(); 4947 final List<DatanodeDescriptor> datanodes = dm.getDatanodeListForReport(type); 4948 4949 DatanodeStorageReport[] reports = new DatanodeStorageReport[datanodes.size()]; 4950 for (int i = 0; i < reports.length; i++) { 4951 final DatanodeDescriptor d = datanodes.get(i); 4952 reports[i] = new DatanodeStorageReport(new DatanodeInfo(d), 4953 d.getStorageReports()); 4954 } 4955 return reports; 4956 } finally { 4957 readUnlock("getDatanodeStorageReport"); 4958 } 4959 } 4960 4961 /** 4962 * Save namespace image. 4963 * This will save current namespace into fsimage file and empty edits file. 4964 * Requires superuser privilege and safe mode. 4965 * 4966 * @throws AccessControlException if superuser privilege is violated. 4967 * @throws IOException if 4968 */ 4969 void saveNamespace() throws AccessControlException, IOException { 4970 checkOperation(OperationCategory.UNCHECKED); 4971 checkSuperuserPrivilege(); 4972 4973 cpLock(); // Block if a checkpointing is in progress on standby. 4974 readLock(); 4975 try { 4976 checkOperation(OperationCategory.UNCHECKED); 4977 4978 if (!isInSafeMode()) { 4979 throw new IOException("Safe mode should be turned ON " 4980 + "in order to create namespace image."); 4981 } 4982 getFSImage().saveNamespace(this); 4983 } finally { 4984 readUnlock("saveNamespace"); 4985 cpUnlock(); 4986 } 4987 LOG.info("New namespace image has been created"); 4988 } 4989 4990 /** 4991 * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again. 4992 * Requires superuser privilege. 4993 * 4994 * @throws AccessControlException if superuser privilege is violated. 4995 */ 4996 boolean restoreFailedStorage(String arg) throws AccessControlException, 4997 StandbyException { 4998 checkSuperuserPrivilege(); 4999 checkOperation(OperationCategory.UNCHECKED); 5000 cpLock(); // Block if a checkpointing is in progress on standby. 5001 writeLock(); 5002 try { 5003 checkOperation(OperationCategory.UNCHECKED); 5004 5005 // if it is disabled - enable it and vice versa. 5006 if(arg.equals("check")) 5007 return getFSImage().getStorage().getRestoreFailedStorage(); 5008 5009 boolean val = arg.equals("true"); // false if not 5010 getFSImage().getStorage().setRestoreFailedStorage(val); 5011 5012 return val; 5013 } finally { 5014 writeUnlock("restoreFailedStorage"); 5015 cpUnlock(); 5016 } 5017 } 5018 5019 Date getStartTime() { 5020 return new Date(startTime); 5021 } 5022 5023 void finalizeUpgrade() throws IOException { 5024 checkSuperuserPrivilege(); 5025 checkOperation(OperationCategory.UNCHECKED); 5026 cpLock(); // Block if a checkpointing is in progress on standby. 5027 writeLock(); 5028 try { 5029 checkOperation(OperationCategory.UNCHECKED); 5030 getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState()); 5031 } finally { 5032 writeUnlock("finalizeUpgrade"); 5033 cpUnlock(); 5034 } 5035 } 5036 5037 void refreshNodes() throws IOException { 5038 checkOperation(OperationCategory.UNCHECKED); 5039 checkSuperuserPrivilege(); 5040 getBlockManager().getDatanodeManager().refreshNodes(new HdfsConfiguration()); 5041 } 5042 5043 void setBalancerBandwidth(long bandwidth) throws IOException { 5044 checkOperation(OperationCategory.UNCHECKED); 5045 checkSuperuserPrivilege(); 5046 getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth); 5047 } 5048 5049 /** 5050 * Persist the new block (the last block of the given file). 5051 * @param path 5052 * @param file 5053 */ 5054 private void persistNewBlock(String path, INodeFile file) { 5055 Preconditions.checkArgument(file.isUnderConstruction()); 5056 getEditLog().logAddBlock(path, file); 5057 NameNode.stateChangeLog.debug("persistNewBlock: {} with new block {}," + 5058 " current total block count is {}", path, 5059 file.getLastBlock().toString(), file.getBlocks().length); 5060 } 5061 5062 /** 5063 * SafeModeInfo contains information related to the safe mode. 5064 * <p> 5065 * An instance of {@link SafeModeInfo} is created when the name node 5066 * enters safe mode. 5067 * <p> 5068 * During name node startup {@link SafeModeInfo} counts the number of 5069 * <em>safe blocks</em>, those that have at least the minimal number of 5070 * replicas, and calculates the ratio of safe blocks to the total number 5071 * of blocks in the system, which is the size of blocks in 5072 * {@link FSNamesystem#blockManager}. When the ratio reaches the 5073 * {@link #threshold} it starts the SafeModeMonitor daemon in order 5074 * to monitor whether the safe mode {@link #extension} is passed. 5075 * Then it leaves safe mode and destroys itself. 5076 * <p> 5077 * If safe mode is turned on manually then the number of safe blocks is 5078 * not tracked because the name node is not intended to leave safe mode 5079 * automatically in the case. 5080 * 5081 * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction, boolean) 5082 */ 5083 public class SafeModeInfo { 5084 // configuration fields 5085 /** Safe mode threshold condition %.*/ 5086 private final double threshold; 5087 /** Safe mode minimum number of datanodes alive */ 5088 private final int datanodeThreshold; 5089 /** 5090 * Safe mode extension after the threshold. 5091 * Make it volatile so that getSafeModeTip can read the latest value 5092 * without taking a lock. 5093 */ 5094 private volatile int extension; 5095 /** Min replication required by safe mode. */ 5096 private final int safeReplication; 5097 /** threshold for populating needed replication queues */ 5098 private final double replQueueThreshold; 5099 // internal fields 5100 /** Time when threshold was reached. 5101 * <br> -1 safe mode is off 5102 * <br> 0 safe mode is on, and threshold is not reached yet 5103 * <br> >0 safe mode is on, but we are in extension period 5104 */ 5105 private long reached = -1; 5106 private long reachedTimestamp = -1; 5107 /** Total number of blocks. */ 5108 int blockTotal; 5109 /** Number of safe blocks. */ 5110 int blockSafe; 5111 /** Number of blocks needed to satisfy safe mode threshold condition */ 5112 private int blockThreshold; 5113 /** Number of blocks needed before populating replication queues */ 5114 private int blockReplQueueThreshold; 5115 /** time of the last status printout */ 5116 private long lastStatusReport = 0; 5117 /** 5118 * Was safemode entered automatically because available resources were low. 5119 * Make it volatile so that getSafeModeTip can read the latest value 5120 * without taking a lock. 5121 */ 5122 private volatile boolean resourcesLow = false; 5123 /** Should safemode adjust its block totals as blocks come in */ 5124 private boolean shouldIncrementallyTrackBlocks = false; 5125 /** counter for tracking startup progress of reported blocks */ 5126 private Counter awaitingReportedBlocksCounter; 5127 5128 /** 5129 * Creates SafeModeInfo when the name node enters 5130 * automatic safe mode at startup. 5131 * 5132 * @param conf configuration 5133 */ 5134 private SafeModeInfo(Configuration conf) { 5135 this.threshold = conf.getFloat(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, 5136 DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT); 5137 if(threshold > 1.0) { 5138 LOG.warn("The threshold value should't be greater than 1, threshold: " + threshold); 5139 } 5140 this.datanodeThreshold = conf.getInt( 5141 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 5142 DFS_NAMENODE_SAFEMODE_MIN_DATANODES_DEFAULT); 5143 this.extension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0); 5144 this.safeReplication = conf.getInt(DFS_NAMENODE_REPLICATION_MIN_KEY, 5145 DFS_NAMENODE_REPLICATION_MIN_DEFAULT); 5146 5147 LOG.info(DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY + " = " + threshold); 5148 LOG.info(DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY + " = " + datanodeThreshold); 5149 LOG.info(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + " = " + extension); 5150 5151 // default to safe mode threshold (i.e., don't populate queues before leaving safe mode) 5152 this.replQueueThreshold = 5153 conf.getFloat(DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 5154 (float) threshold); 5155 this.blockTotal = 0; 5156 this.blockSafe = 0; 5157 } 5158 5159 /** 5160 * In the HA case, the StandbyNode can be in safemode while the namespace 5161 * is modified by the edit log tailer. In this case, the number of total 5162 * blocks changes as edits are processed (eg blocks are added and deleted). 5163 * However, we don't want to do the incremental tracking during the 5164 * startup-time loading process -- only once the initial total has been 5165 * set after the image has been loaded. 5166 */ 5167 private boolean shouldIncrementallyTrackBlocks() { 5168 return shouldIncrementallyTrackBlocks; 5169 } 5170 5171 /** 5172 * Creates SafeModeInfo when safe mode is entered manually, or because 5173 * available resources are low. 5174 * 5175 * The {@link #threshold} is set to 1.5 so that it could never be reached. 5176 * {@link #blockTotal} is set to -1 to indicate that safe mode is manual. 5177 * 5178 * @see SafeModeInfo 5179 */ 5180 private SafeModeInfo(boolean resourcesLow) { 5181 this.threshold = 1.5f; // this threshold can never be reached 5182 this.datanodeThreshold = Integer.MAX_VALUE; 5183 this.extension = Integer.MAX_VALUE; 5184 this.safeReplication = Short.MAX_VALUE + 1; // more than maxReplication 5185 this.replQueueThreshold = 1.5f; // can never be reached 5186 this.blockTotal = -1; 5187 this.blockSafe = -1; 5188 this.resourcesLow = resourcesLow; 5189 enter(); 5190 reportStatus("STATE* Safe mode is ON.", true); 5191 } 5192 5193 /** 5194 * Check if safe mode is on. 5195 * @return true if in safe mode 5196 */ 5197 private synchronized boolean isOn() { 5198 doConsistencyCheck(); 5199 return this.reached >= 0; 5200 } 5201 5202 /** 5203 * Enter safe mode. 5204 */ 5205 private void enter() { 5206 this.reached = 0; 5207 this.reachedTimestamp = 0; 5208 } 5209 5210 /** 5211 * Leave safe mode. 5212 * <p> 5213 * Check for invalid, under- & over-replicated blocks in the end of startup. 5214 */ 5215 private synchronized void leave() { 5216 // if not done yet, initialize replication queues. 5217 // In the standby, do not populate repl queues 5218 if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) { 5219 initializeReplQueues(); 5220 } 5221 long timeInSafemode = now() - startTime; 5222 NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 5223 + timeInSafemode/1000 + " secs"); 5224 NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode); 5225 5226 //Log the following only once (when transitioning from ON -> OFF) 5227 if (reached >= 0) { 5228 NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 5229 } 5230 reached = -1; 5231 reachedTimestamp = -1; 5232 safeMode = null; 5233 final NetworkTopology nt = blockManager.getDatanodeManager().getNetworkTopology(); 5234 NameNode.stateChangeLog.info("STATE* Network topology has " 5235 + nt.getNumOfRacks() + " racks and " 5236 + nt.getNumOfLeaves() + " datanodes"); 5237 NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has " 5238 + blockManager.numOfUnderReplicatedBlocks() + " blocks"); 5239 5240 startSecretManagerIfNecessary(); 5241 5242 // If startup has not yet completed, end safemode phase. 5243 StartupProgress prog = NameNode.getStartupProgress(); 5244 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 5245 prog.endStep(Phase.SAFEMODE, STEP_AWAITING_REPORTED_BLOCKS); 5246 prog.endPhase(Phase.SAFEMODE); 5247 } 5248 } 5249 5250 /** 5251 * Check whether we have reached the threshold for 5252 * initializing replication queues. 5253 */ 5254 private synchronized boolean canInitializeReplQueues() { 5255 return shouldPopulateReplQueues() 5256 && blockSafe >= blockReplQueueThreshold; 5257 } 5258 5259 /** 5260 * Safe mode can be turned off iff 5261 * the threshold is reached and 5262 * the extension time have passed. 5263 * @return true if can leave or false otherwise. 5264 */ 5265 private synchronized boolean canLeave() { 5266 if (reached == 0) { 5267 return false; 5268 } 5269 5270 if (monotonicNow() - reached < extension) { 5271 reportStatus("STATE* Safe mode ON, in safe mode extension.", false); 5272 return false; 5273 } 5274 5275 if (needEnter()) { 5276 reportStatus("STATE* Safe mode ON, thresholds not met.", false); 5277 return false; 5278 } 5279 5280 return true; 5281 } 5282 5283 /** 5284 * There is no need to enter safe mode 5285 * if DFS is empty or {@link #threshold} == 0 5286 */ 5287 private boolean needEnter() { 5288 return (threshold != 0 && blockSafe < blockThreshold) || 5289 (datanodeThreshold != 0 && getNumLiveDataNodes() < datanodeThreshold) || 5290 (!nameNodeHasResourcesAvailable()); 5291 } 5292 5293 /** 5294 * Check and trigger safe mode if needed. 5295 */ 5296 private void checkMode() { 5297 // Have to have write-lock since leaving safemode initializes 5298 // repl queues, which requires write lock 5299 assert hasWriteLock(); 5300 if (inTransitionToActive()) { 5301 return; 5302 } 5303 // if smmthread is already running, the block threshold must have been 5304 // reached before, there is no need to enter the safe mode again 5305 if (smmthread == null && needEnter()) { 5306 enter(); 5307 // check if we are ready to initialize replication queues 5308 if (canInitializeReplQueues() && !isPopulatingReplQueues() 5309 && !haEnabled) { 5310 initializeReplQueues(); 5311 } 5312 reportStatus("STATE* Safe mode ON.", false); 5313 return; 5314 } 5315 // the threshold is reached or was reached before 5316 if (!isOn() || // safe mode is off 5317 extension <= 0 || threshold <= 0) { // don't need to wait 5318 this.leave(); // leave safe mode 5319 return; 5320 } 5321 if (reached > 0) { // threshold has already been reached before 5322 reportStatus("STATE* Safe mode ON.", false); 5323 return; 5324 } 5325 // start monitor 5326 reached = monotonicNow(); 5327 reachedTimestamp = now(); 5328 if (smmthread == null) { 5329 smmthread = new Daemon(new SafeModeMonitor()); 5330 smmthread.start(); 5331 reportStatus("STATE* Safe mode extension entered.", true); 5332 } 5333 5334 // check if we are ready to initialize replication queues 5335 if (canInitializeReplQueues() && !isPopulatingReplQueues() && !haEnabled) { 5336 initializeReplQueues(); 5337 } 5338 } 5339 5340 /** 5341 * Set total number of blocks. 5342 */ 5343 private synchronized void setBlockTotal(int total) { 5344 this.blockTotal = total; 5345 this.blockThreshold = (int) (blockTotal * threshold); 5346 this.blockReplQueueThreshold = 5347 (int) (blockTotal * replQueueThreshold); 5348 if (haEnabled) { 5349 // After we initialize the block count, any further namespace 5350 // modifications done while in safe mode need to keep track 5351 // of the number of total blocks in the system. 5352 this.shouldIncrementallyTrackBlocks = true; 5353 } 5354 if(blockSafe < 0) 5355 this.blockSafe = 0; 5356 checkMode(); 5357 } 5358 5359 /** 5360 * Increment number of safe blocks if current block has 5361 * reached minimal replication. 5362 * @param replication current replication 5363 */ 5364 private synchronized void incrementSafeBlockCount(short replication) { 5365 if (replication == safeReplication) { 5366 this.blockSafe++; 5367 5368 // Report startup progress only if we haven't completed startup yet. 5369 StartupProgress prog = NameNode.getStartupProgress(); 5370 if (prog.getStatus(Phase.SAFEMODE) != Status.COMPLETE) { 5371 if (this.awaitingReportedBlocksCounter == null) { 5372 this.awaitingReportedBlocksCounter = prog.getCounter(Phase.SAFEMODE, 5373 STEP_AWAITING_REPORTED_BLOCKS); 5374 } 5375 this.awaitingReportedBlocksCounter.increment(); 5376 } 5377 5378 checkMode(); 5379 } 5380 } 5381 5382 /** 5383 * Decrement number of safe blocks if current block has 5384 * fallen below minimal replication. 5385 * @param replication current replication 5386 */ 5387 private synchronized void decrementSafeBlockCount(short replication) { 5388 if (replication == safeReplication-1) { 5389 this.blockSafe--; 5390 //blockSafe is set to -1 in manual / low resources safemode 5391 assert blockSafe >= 0 || isManual() || areResourcesLow(); 5392 checkMode(); 5393 } 5394 } 5395 5396 /** 5397 * Check if safe mode was entered manually 5398 */ 5399 private boolean isManual() { 5400 return extension == Integer.MAX_VALUE; 5401 } 5402 5403 /** 5404 * Set manual safe mode. 5405 */ 5406 private synchronized void setManual() { 5407 extension = Integer.MAX_VALUE; 5408 } 5409 5410 /** 5411 * Check if safe mode was entered due to resources being low. 5412 */ 5413 private boolean areResourcesLow() { 5414 return resourcesLow; 5415 } 5416 5417 /** 5418 * Set that resources are low for this instance of safe mode. 5419 */ 5420 private void setResourcesLow() { 5421 resourcesLow = true; 5422 } 5423 5424 /** 5425 * A tip on how safe mode is to be turned off: manually or automatically. 5426 */ 5427 String getTurnOffTip() { 5428 if(!isOn()) { 5429 return "Safe mode is OFF."; 5430 } 5431 5432 //Manual OR low-resource safemode. (Admin intervention required) 5433 String adminMsg = "It was turned on manually. "; 5434 if (areResourcesLow()) { 5435 adminMsg = "Resources are low on NN. Please add or free up more " 5436 + "resources then turn off safe mode manually. NOTE: If you turn off" 5437 + " safe mode before adding resources, " 5438 + "the NN will immediately return to safe mode. "; 5439 } 5440 if (isManual() || areResourcesLow()) { 5441 return adminMsg 5442 + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off."; 5443 } 5444 5445 boolean thresholdsMet = true; 5446 int numLive = getNumLiveDataNodes(); 5447 String msg = ""; 5448 if (blockSafe < blockThreshold) { 5449 msg += String.format( 5450 "The reported blocks %d needs additional %d" 5451 + " blocks to reach the threshold %.4f of total blocks %d.%n", 5452 blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal); 5453 thresholdsMet = false; 5454 } else { 5455 msg += String.format("The reported blocks %d has reached the threshold" 5456 + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal); 5457 } 5458 if (numLive < datanodeThreshold) { 5459 msg += String.format( 5460 "The number of live datanodes %d needs an additional %d live " 5461 + "datanodes to reach the minimum number %d.%n", 5462 numLive, (datanodeThreshold - numLive), datanodeThreshold); 5463 thresholdsMet = false; 5464 } else { 5465 msg += String.format("The number of live datanodes %d has reached " 5466 + "the minimum number %d. ", 5467 numLive, datanodeThreshold); 5468 } 5469 msg += (reached > 0) ? "In safe mode extension. " : ""; 5470 msg += "Safe mode will be turned off automatically "; 5471 5472 if (!thresholdsMet) { 5473 msg += "once the thresholds have been reached."; 5474 } else if (reached + extension - monotonicNow() > 0) { 5475 msg += ("in " + (reached + extension - monotonicNow()) / 1000 + " seconds."); 5476 } else { 5477 msg += "soon."; 5478 } 5479 5480 return msg; 5481 } 5482 5483 /** 5484 * Print status every 20 seconds. 5485 */ 5486 private void reportStatus(String msg, boolean rightNow) { 5487 long curTime = now(); 5488 if(!rightNow && (curTime - lastStatusReport < 20 * 1000)) 5489 return; 5490 NameNode.stateChangeLog.info(msg + " \n" + getTurnOffTip()); 5491 lastStatusReport = curTime; 5492 } 5493 5494 @Override 5495 public String toString() { 5496 String resText = "Current safe blocks = " 5497 + blockSafe 5498 + ". Target blocks = " + blockThreshold + " for threshold = %" + threshold 5499 + ". Minimal replication = " + safeReplication + "."; 5500 if (reached > 0) 5501 resText += " Threshold was reached " + new Date(reachedTimestamp) + "."; 5502 return resText; 5503 } 5504 5505 /** 5506 * Checks consistency of the class state. 5507 * This is costly so only runs if asserts are enabled. 5508 */ 5509 private void doConsistencyCheck() { 5510 boolean assertsOn = false; 5511 assert assertsOn = true; // set to true if asserts are on 5512 if (!assertsOn) return; 5513 5514 if (blockTotal == -1 && blockSafe == -1) { 5515 return; // manual safe mode 5516 } 5517 int activeBlocks = blockManager.getActiveBlockCount(); 5518 if ((blockTotal != activeBlocks) && 5519 !(blockSafe >= 0 && blockSafe <= blockTotal)) { 5520 throw new AssertionError( 5521 " SafeMode: Inconsistent filesystem state: " 5522 + "SafeMode data: blockTotal=" + blockTotal 5523 + " blockSafe=" + blockSafe + "; " 5524 + "BlockManager data: active=" + activeBlocks); 5525 } 5526 } 5527 5528 private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) { 5529 if (!shouldIncrementallyTrackBlocks) { 5530 return; 5531 } 5532 assert haEnabled; 5533 5534 if (LOG.isDebugEnabled()) { 5535 LOG.debug("Adjusting block totals from " + 5536 blockSafe + "/" + blockTotal + " to " + 5537 (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal)); 5538 } 5539 assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " + 5540 blockSafe + " by " + deltaSafe + ": would be negative"; 5541 assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " + 5542 blockTotal + " by " + deltaTotal + ": would be negative"; 5543 5544 blockSafe += deltaSafe; 5545 setBlockTotal(blockTotal + deltaTotal); 5546 } 5547 } 5548 5549 /** 5550 * Periodically check whether it is time to leave safe mode. 5551 * This thread starts when the threshold level is reached. 5552 * 5553 */ 5554 class SafeModeMonitor implements Runnable { 5555 /** interval in msec for checking safe mode: {@value} */ 5556 private static final long recheckInterval = 1000; 5557 5558 /** 5559 */ 5560 @Override 5561 public void run() { 5562 while (fsRunning) { 5563 writeLock(); 5564 try { 5565 if (safeMode == null) { // Not in safe mode. 5566 break; 5567 } 5568 if (safeMode.canLeave()) { 5569 // Leave safe mode. 5570 safeMode.leave(); 5571 smmthread = null; 5572 break; 5573 } 5574 } finally { 5575 writeUnlock(); 5576 } 5577 5578 try { 5579 Thread.sleep(recheckInterval); 5580 } catch (InterruptedException ie) { 5581 // Ignored 5582 } 5583 } 5584 if (!fsRunning) { 5585 LOG.info("NameNode is being shutdown, exit SafeModeMonitor thread"); 5586 } 5587 } 5588 } 5589 5590 boolean setSafeMode(SafeModeAction action) throws IOException { 5591 if (action != SafeModeAction.SAFEMODE_GET) { 5592 checkSuperuserPrivilege(); 5593 switch(action) { 5594 case SAFEMODE_LEAVE: // leave safe mode 5595 leaveSafeMode(); 5596 break; 5597 case SAFEMODE_ENTER: // enter safe mode 5598 enterSafeMode(false); 5599 break; 5600 default: 5601 LOG.error("Unexpected safe mode action"); 5602 } 5603 } 5604 return isInSafeMode(); 5605 } 5606 5607 @Override 5608 public void checkSafeMode() { 5609 // safeMode is volatile, and may be set to null at any time 5610 SafeModeInfo safeMode = this.safeMode; 5611 if (safeMode != null) { 5612 safeMode.checkMode(); 5613 } 5614 } 5615 5616 @Override 5617 public boolean isInSafeMode() { 5618 // safeMode is volatile, and may be set to null at any time 5619 SafeModeInfo safeMode = this.safeMode; 5620 if (safeMode == null) 5621 return false; 5622 return safeMode.isOn(); 5623 } 5624 5625 @Override 5626 public boolean isInStartupSafeMode() { 5627 // safeMode is volatile, and may be set to null at any time 5628 SafeModeInfo safeMode = this.safeMode; 5629 if (safeMode == null) 5630 return false; 5631 // If the NN is in safemode, and not due to manual / low resources, we 5632 // assume it must be because of startup. If the NN had low resources during 5633 // startup, we assume it came out of startup safemode and it is now in low 5634 // resources safemode 5635 return !safeMode.isManual() && !safeMode.areResourcesLow() 5636 && safeMode.isOn(); 5637 } 5638 5639 /** 5640 * Check if replication queues are to be populated 5641 * @return true when node is HAState.Active and not in the very first safemode 5642 */ 5643 @Override 5644 public boolean isPopulatingReplQueues() { 5645 if (!shouldPopulateReplQueues()) { 5646 return false; 5647 } 5648 return initializedReplQueues; 5649 } 5650 5651 private boolean shouldPopulateReplQueues() { 5652 if(haContext == null || haContext.getState() == null) 5653 return false; 5654 return haContext.getState().shouldPopulateReplQueues(); 5655 } 5656 5657 @Override 5658 public void incrementSafeBlockCount(int replication) { 5659 // safeMode is volatile, and may be set to null at any time 5660 SafeModeInfo safeMode = this.safeMode; 5661 if (safeMode == null) 5662 return; 5663 safeMode.incrementSafeBlockCount((short)replication); 5664 } 5665 5666 @Override 5667 public void decrementSafeBlockCount(Block b) { 5668 // safeMode is volatile, and may be set to null at any time 5669 SafeModeInfo safeMode = this.safeMode; 5670 if (safeMode == null) // mostly true 5671 return; 5672 BlockInfoContiguous storedBlock = getStoredBlock(b); 5673 if (storedBlock.isComplete()) { 5674 safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas()); 5675 } 5676 } 5677 5678 /** 5679 * Adjust the total number of blocks safe and expected during safe mode. 5680 * If safe mode is not currently on, this is a no-op. 5681 * @param deltaSafe the change in number of safe blocks 5682 * @param deltaTotal the change i nnumber of total blocks expected 5683 */ 5684 @Override 5685 public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) { 5686 // safeMode is volatile, and may be set to null at any time 5687 SafeModeInfo safeMode = this.safeMode; 5688 if (safeMode == null) 5689 return; 5690 safeMode.adjustBlockTotals(deltaSafe, deltaTotal); 5691 } 5692 5693 /** 5694 * Set the total number of blocks in the system. 5695 */ 5696 public void setBlockTotal() { 5697 // safeMode is volatile, and may be set to null at any time 5698 SafeModeInfo safeMode = this.safeMode; 5699 if (safeMode == null) 5700 return; 5701 safeMode.setBlockTotal((int)getCompleteBlocksTotal()); 5702 } 5703 5704 /** 5705 * Get the total number of blocks in the system. 5706 */ 5707 @Override // FSNamesystemMBean 5708 @Metric 5709 public long getBlocksTotal() { 5710 return blockManager.getTotalBlocks(); 5711 } 5712 5713 /** 5714 * Get the total number of COMPLETE blocks in the system. 5715 * For safe mode only complete blocks are counted. 5716 */ 5717 private long getCompleteBlocksTotal() { 5718 // Calculate number of blocks under construction 5719 long numUCBlocks = 0; 5720 readLock(); 5721 numUCBlocks = leaseManager.getNumUnderConstructionBlocks(); 5722 try { 5723 return getBlocksTotal() - numUCBlocks; 5724 } finally { 5725 readUnlock("getCompleteBlocksTotal"); 5726 } 5727 } 5728 5729 /** 5730 * Enter safe mode. If resourcesLow is false, then we assume it is manual 5731 * @throws IOException 5732 */ 5733 void enterSafeMode(boolean resourcesLow) throws IOException { 5734 writeLock(); 5735 try { 5736 // Stop the secret manager, since rolling the master key would 5737 // try to write to the edit log 5738 stopSecretManager(); 5739 5740 // Ensure that any concurrent operations have been fully synced 5741 // before entering safe mode. This ensures that the FSImage 5742 // is entirely stable on disk as soon as we're in safe mode. 5743 boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite(); 5744 // Before Editlog is in OpenForWrite mode, editLogStream will be null. So, 5745 // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode 5746 if (isEditlogOpenForWrite) { 5747 getEditLog().logSyncAll(); 5748 } 5749 if (!isInSafeMode()) { 5750 safeMode = new SafeModeInfo(resourcesLow); 5751 return; 5752 } 5753 if (resourcesLow) { 5754 safeMode.setResourcesLow(); 5755 } else { 5756 safeMode.setManual(); 5757 } 5758 if (isEditlogOpenForWrite) { 5759 getEditLog().logSyncAll(); 5760 } 5761 NameNode.stateChangeLog.info("STATE* Safe mode is ON" 5762 + safeMode.getTurnOffTip()); 5763 } finally { 5764 writeUnlock("enterSafeMode"); 5765 } 5766 } 5767 5768 /** 5769 * Leave safe mode. 5770 */ 5771 void leaveSafeMode() { 5772 writeLock(); 5773 try { 5774 if (!isInSafeMode()) { 5775 NameNode.stateChangeLog.info("STATE* Safe mode is already OFF"); 5776 return; 5777 } 5778 safeMode.leave(); 5779 } finally { 5780 writeUnlock("leaveSafeMode"); 5781 } 5782 } 5783 5784 String getSafeModeTip() { 5785 // There is no need to take readLock. 5786 // Don't use isInSafeMode as this.safeMode might be set to null. 5787 // after isInSafeMode returns. 5788 boolean inSafeMode; 5789 SafeModeInfo safeMode = this.safeMode; 5790 if (safeMode == null) { 5791 inSafeMode = false; 5792 } else { 5793 inSafeMode = safeMode.isOn(); 5794 } 5795 5796 if (!inSafeMode) { 5797 return ""; 5798 } else { 5799 return safeMode.getTurnOffTip(); 5800 } 5801 } 5802 5803 CheckpointSignature rollEditLog() throws IOException { 5804 checkSuperuserPrivilege(); 5805 checkOperation(OperationCategory.JOURNAL); 5806 writeLock(); 5807 try { 5808 checkOperation(OperationCategory.JOURNAL); 5809 checkNameNodeSafeMode("Log not rolled"); 5810 if (Server.isRpcInvocation()) { 5811 LOG.info("Roll Edit Log from " + Server.getRemoteAddress()); 5812 } 5813 return getFSImage().rollEditLog(); 5814 } finally { 5815 writeUnlock("rollEditLog"); 5816 } 5817 } 5818 5819 NamenodeCommand startCheckpoint(NamenodeRegistration backupNode, 5820 NamenodeRegistration activeNamenode) throws IOException { 5821 checkOperation(OperationCategory.CHECKPOINT); 5822 writeLock(); 5823 try { 5824 checkOperation(OperationCategory.CHECKPOINT); 5825 checkNameNodeSafeMode("Checkpoint not started"); 5826 5827 LOG.info("Start checkpoint for " + backupNode.getAddress()); 5828 NamenodeCommand cmd = getFSImage().startCheckpoint(backupNode, 5829 activeNamenode); 5830 getEditLog().logSync(); 5831 return cmd; 5832 } finally { 5833 writeUnlock("startCheckpoint"); 5834 } 5835 } 5836 5837 public void processIncrementalBlockReport(final DatanodeID nodeID, 5838 final StorageReceivedDeletedBlocks srdb) 5839 throws IOException { 5840 writeLock(); 5841 try { 5842 blockManager.processIncrementalBlockReport(nodeID, srdb); 5843 } finally { 5844 writeUnlock("processIncrementalBlockReport"); 5845 } 5846 } 5847 5848 void endCheckpoint(NamenodeRegistration registration, 5849 CheckpointSignature sig) throws IOException { 5850 checkOperation(OperationCategory.CHECKPOINT); 5851 readLock(); 5852 try { 5853 checkOperation(OperationCategory.CHECKPOINT); 5854 checkNameNodeSafeMode("Checkpoint not ended"); 5855 LOG.info("End checkpoint for " + registration.getAddress()); 5856 getFSImage().endCheckpoint(sig); 5857 } finally { 5858 readUnlock("endCheckpoint"); 5859 } 5860 } 5861 5862 PermissionStatus createFsOwnerPermissions(FsPermission permission) { 5863 return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission); 5864 } 5865 5866 private void checkUnreadableBySuperuser(FSPermissionChecker pc, 5867 INode inode, int snapshotId) 5868 throws IOException { 5869 if (pc.isSuperUser()) { 5870 for (XAttr xattr : FSDirXAttrOp.getXAttrs(dir, inode, snapshotId)) { 5871 if (XAttrHelper.getPrefixName(xattr). 5872 equals(SECURITY_XATTR_UNREADABLE_BY_SUPERUSER)) { 5873 throw new AccessControlException("Access is denied for " + 5874 pc.getUser() + " since the superuser is not allowed to " + 5875 "perform this operation."); 5876 } 5877 } 5878 } 5879 } 5880 5881 @Override 5882 public void checkSuperuserPrivilege() 5883 throws AccessControlException { 5884 if (isPermissionEnabled) { 5885 FSPermissionChecker pc = getPermissionChecker(); 5886 pc.checkSuperuserPrivilege(); 5887 } 5888 } 5889 5890 /** 5891 * Check to see if we have exceeded the limit on the number 5892 * of inodes. 5893 */ 5894 void checkFsObjectLimit() throws IOException { 5895 if (maxFsObjects != 0 && 5896 maxFsObjects <= dir.totalInodes() + getBlocksTotal()) { 5897 throw new IOException("Exceeded the configured number of objects " + 5898 maxFsObjects + " in the filesystem."); 5899 } 5900 } 5901 5902 /** 5903 * Get the total number of objects in the system. 5904 */ 5905 @Override // FSNamesystemMBean 5906 public long getMaxObjects() { 5907 return maxFsObjects; 5908 } 5909 5910 @Override // FSNamesystemMBean 5911 @Metric 5912 public long getFilesTotal() { 5913 // There is no need to take fSNamesystem's lock as 5914 // FSDirectory has its own lock. 5915 return this.dir.totalInodes(); 5916 } 5917 5918 @Override // FSNamesystemMBean 5919 @Metric 5920 public long getPendingReplicationBlocks() { 5921 return blockManager.getPendingReplicationBlocksCount(); 5922 } 5923 5924 @Override // FSNamesystemMBean 5925 @Metric 5926 public long getUnderReplicatedBlocks() { 5927 return blockManager.getUnderReplicatedBlocksCount(); 5928 } 5929 5930 /** Returns number of blocks with corrupt replicas */ 5931 @Metric({"CorruptBlocks", "Number of blocks with corrupt replicas"}) 5932 public long getCorruptReplicaBlocks() { 5933 return blockManager.getCorruptReplicaBlocksCount(); 5934 } 5935 5936 @Override // FSNamesystemMBean 5937 @Metric 5938 public long getScheduledReplicationBlocks() { 5939 return blockManager.getScheduledReplicationBlocksCount(); 5940 } 5941 5942 @Override 5943 @Metric 5944 public long getPendingDeletionBlocks() { 5945 return blockManager.getPendingDeletionBlocksCount(); 5946 } 5947 5948 @Override 5949 public long getBlockDeletionStartTime() { 5950 return startTime + blockManager.getStartupDelayBlockDeletionInMs(); 5951 } 5952 5953 @Metric 5954 public long getExcessBlocks() { 5955 return blockManager.getExcessBlocksCount(); 5956 } 5957 5958 // HA-only metric 5959 @Metric 5960 public long getPostponedMisreplicatedBlocks() { 5961 return blockManager.getPostponedMisreplicatedBlocksCount(); 5962 } 5963 5964 // HA-only metric 5965 @Metric 5966 public int getPendingDataNodeMessageCount() { 5967 return blockManager.getPendingDataNodeMessageCount(); 5968 } 5969 5970 // HA-only metric 5971 @Metric 5972 public String getHAState() { 5973 return haContext.getState().toString(); 5974 } 5975 5976 // HA-only metric 5977 @Metric 5978 public long getMillisSinceLastLoadedEdits() { 5979 if (isInStandbyState() && editLogTailer != null) { 5980 return monotonicNow() - editLogTailer.getLastLoadTimeMs(); 5981 } else { 5982 return 0; 5983 } 5984 } 5985 5986 @Metric 5987 public int getBlockCapacity() { 5988 return blockManager.getCapacity(); 5989 } 5990 5991 @Override // FSNamesystemMBean 5992 public String getFSState() { 5993 return isInSafeMode() ? "safeMode" : "Operational"; 5994 } 5995 5996 private ObjectName mbeanName; 5997 private ObjectName mxbeanName; 5998 5999 /** 6000 * Register the FSNamesystem MBean using the name 6001 * "hadoop:service=NameNode,name=FSNamesystemState" 6002 */ 6003 private void registerMBean() { 6004 // We can only implement one MXBean interface, so we keep the old one. 6005 try { 6006 StandardMBean bean = new StandardMBean(this, FSNamesystemMBean.class); 6007 mbeanName = MBeans.register("NameNode", "FSNamesystemState", bean); 6008 } catch (NotCompliantMBeanException e) { 6009 throw new RuntimeException("Bad MBean setup", e); 6010 } 6011 6012 LOG.info("Registered FSNamesystemState MBean"); 6013 } 6014 6015 /** 6016 * shutdown FSNamesystem 6017 */ 6018 void shutdown() { 6019 if (snapshotManager != null) { 6020 snapshotManager.shutdown(); 6021 } 6022 if (mbeanName != null) { 6023 MBeans.unregister(mbeanName); 6024 mbeanName = null; 6025 } 6026 if (mxbeanName != null) { 6027 MBeans.unregister(mxbeanName); 6028 mxbeanName = null; 6029 } 6030 if (dir != null) { 6031 dir.shutdown(); 6032 } 6033 if (blockManager != null) { 6034 blockManager.shutdown(); 6035 } 6036 } 6037 6038 @Override // FSNamesystemMBean 6039 @Metric({"NumLiveDataNodes", "Number of datanodes which are currently live"}) 6040 public int getNumLiveDataNodes() { 6041 return getBlockManager().getDatanodeManager().getNumLiveDataNodes(); 6042 } 6043 6044 @Override // FSNamesystemMBean 6045 @Metric({"NumDeadDataNodes", "Number of datanodes which are currently dead"}) 6046 public int getNumDeadDataNodes() { 6047 return getBlockManager().getDatanodeManager().getNumDeadDataNodes(); 6048 } 6049 6050 @Override // FSNamesystemMBean 6051 @Metric({"NumDecomLiveDataNodes", 6052 "Number of datanodes which have been decommissioned and are now live"}) 6053 public int getNumDecomLiveDataNodes() { 6054 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6055 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); 6056 int liveDecommissioned = 0; 6057 for (DatanodeDescriptor node : live) { 6058 liveDecommissioned += node.isDecommissioned() ? 1 : 0; 6059 } 6060 return liveDecommissioned; 6061 } 6062 6063 @Override // FSNamesystemMBean 6064 @Metric({"NumDecomDeadDataNodes", 6065 "Number of datanodes which have been decommissioned and are now dead"}) 6066 public int getNumDecomDeadDataNodes() { 6067 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 6068 getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, false); 6069 int deadDecommissioned = 0; 6070 for (DatanodeDescriptor node : dead) { 6071 deadDecommissioned += node.isDecommissioned() ? 1 : 0; 6072 } 6073 return deadDecommissioned; 6074 } 6075 6076 @Override // FSNamesystemMBean 6077 @Metric({"VolumeFailuresTotal", 6078 "Total number of volume failures across all Datanodes"}) 6079 public int getVolumeFailuresTotal() { 6080 List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6081 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); 6082 int volumeFailuresTotal = 0; 6083 for (DatanodeDescriptor node: live) { 6084 volumeFailuresTotal += node.getVolumeFailures(); 6085 } 6086 return volumeFailuresTotal; 6087 } 6088 6089 @Override // FSNamesystemMBean 6090 @Metric({"EstimatedCapacityLostTotal", 6091 "An estimate of the total capacity lost due to volume failures"}) 6092 public long getEstimatedCapacityLostTotal() { 6093 List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6094 getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); 6095 long estimatedCapacityLostTotal = 0; 6096 for (DatanodeDescriptor node: live) { 6097 VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary(); 6098 if (volumeFailureSummary != null) { 6099 estimatedCapacityLostTotal += 6100 volumeFailureSummary.getEstimatedCapacityLostTotal(); 6101 } 6102 } 6103 return estimatedCapacityLostTotal; 6104 } 6105 6106 @Override // FSNamesystemMBean 6107 @Metric({"NumDecommissioningDataNodes", 6108 "Number of datanodes in decommissioning state"}) 6109 public int getNumDecommissioningDataNodes() { 6110 return getBlockManager().getDatanodeManager().getDecommissioningNodes() 6111 .size(); 6112 } 6113 6114 @Override // FSNamesystemMBean 6115 @Metric({"StaleDataNodes", 6116 "Number of datanodes marked stale due to delayed heartbeat"}) 6117 public int getNumStaleDataNodes() { 6118 return getBlockManager().getDatanodeManager().getNumStaleNodes(); 6119 } 6120 6121 /** 6122 * Storages are marked as "content stale" after NN restart or fails over and 6123 * before NN receives the first Heartbeat followed by the first Blockreport. 6124 */ 6125 @Override // FSNamesystemMBean 6126 @Metric({"NumStaleStorages", 6127 "Number of storages marked as content stale"}) 6128 public int getNumStaleStorages() { 6129 return getBlockManager().getDatanodeManager().getNumStaleStorages(); 6130 } 6131 6132 @Override // FSNamesystemMBean 6133 public String getTopUserOpCounts() { 6134 if (!topConf.isEnabled) { 6135 return null; 6136 } 6137 6138 Date now = new Date(); 6139 final List<RollingWindowManager.TopWindow> topWindows = 6140 topMetrics.getTopWindows(); 6141 Map<String, Object> topMap = new TreeMap<String, Object>(); 6142 topMap.put("windows", topWindows); 6143 topMap.put("timestamp", DFSUtil.dateToIso8601String(now)); 6144 ObjectMapper mapper = new ObjectMapper(); 6145 try { 6146 return mapper.writeValueAsString(topMap); 6147 } catch (IOException e) { 6148 LOG.warn("Failed to fetch TopUser metrics", e); 6149 } 6150 return null; 6151 } 6152 6153 /** 6154 * Increments, logs and then returns the stamp 6155 */ 6156 long nextGenerationStamp(boolean legacyBlock) 6157 throws IOException, SafeModeException { 6158 assert hasWriteLock(); 6159 checkNameNodeSafeMode("Cannot get next generation stamp"); 6160 6161 long gs = blockIdManager.nextGenerationStamp(legacyBlock); 6162 if (legacyBlock) { 6163 getEditLog().logGenerationStampV1(gs); 6164 } else { 6165 getEditLog().logGenerationStampV2(gs); 6166 } 6167 6168 // NB: callers sync the log 6169 return gs; 6170 } 6171 6172 /** 6173 * Increments, logs and then returns the block ID 6174 */ 6175 private long nextBlockId() throws IOException { 6176 assert hasWriteLock(); 6177 checkNameNodeSafeMode("Cannot get next block ID"); 6178 final long blockId = blockIdManager.nextBlockId(); 6179 getEditLog().logAllocateBlockId(blockId); 6180 // NB: callers sync the log 6181 return blockId; 6182 } 6183 6184 private boolean isFileDeleted(INodeFile file) { 6185 // Not in the inodeMap or in the snapshot but marked deleted. 6186 if (dir.getInode(file.getId()) == null) { 6187 return true; 6188 } 6189 6190 // look at the path hierarchy to see if one parent is deleted by recursive 6191 // deletion 6192 INode tmpChild = file; 6193 INodeDirectory tmpParent = file.getParent(); 6194 while (true) { 6195 if (tmpParent == null) { 6196 return true; 6197 } 6198 6199 INode childINode = tmpParent.getChild(tmpChild.getLocalNameBytes(), 6200 Snapshot.CURRENT_STATE_ID); 6201 if (childINode == null || !childINode.equals(tmpChild)) { 6202 // a newly created INode with the same name as an already deleted one 6203 // would be a different INode than the deleted one 6204 return true; 6205 } 6206 6207 if (tmpParent.isRoot()) { 6208 break; 6209 } 6210 6211 tmpChild = tmpParent; 6212 tmpParent = tmpParent.getParent(); 6213 } 6214 6215 if (file.isWithSnapshot() && 6216 file.getFileWithSnapshotFeature().isCurrentFileDeleted()) { 6217 return true; 6218 } 6219 return false; 6220 } 6221 6222 private INodeFile checkUCBlock(ExtendedBlock block, 6223 String clientName) throws IOException { 6224 assert hasWriteLock(); 6225 checkNameNodeSafeMode("Cannot get a new generation stamp and an " 6226 + "access token for block " + block); 6227 6228 // check stored block state 6229 BlockInfoContiguous storedBlock = getStoredBlock(ExtendedBlock.getLocalBlock(block)); 6230 if (storedBlock == null || 6231 storedBlock.getBlockUCState() != BlockUCState.UNDER_CONSTRUCTION) { 6232 throw new IOException(block + 6233 " does not exist or is not under Construction" + storedBlock); 6234 } 6235 6236 // check file inode 6237 final INodeFile file = ((INode)storedBlock.getBlockCollection()).asFile(); 6238 if (file == null || !file.isUnderConstruction() || isFileDeleted(file)) { 6239 throw new IOException("The file " + storedBlock + 6240 " belonged to does not exist or it is not under construction."); 6241 } 6242 6243 // check lease 6244 if (clientName == null 6245 || !clientName.equals(file.getFileUnderConstructionFeature() 6246 .getClientName())) { 6247 throw new LeaseExpiredException("Lease mismatch: " + block + 6248 " is accessed by a non lease holder " + clientName); 6249 } 6250 6251 return file; 6252 } 6253 6254 /** 6255 * Client is reporting some bad block locations. 6256 */ 6257 void reportBadBlocks(LocatedBlock[] blocks) throws IOException { 6258 checkOperation(OperationCategory.WRITE); 6259 writeLock(); 6260 try { 6261 checkOperation(OperationCategory.WRITE); 6262 for (int i = 0; i < blocks.length; i++) { 6263 ExtendedBlock blk = blocks[i].getBlock(); 6264 DatanodeInfo[] nodes = blocks[i].getLocations(); 6265 String[] storageIDs = blocks[i].getStorageIDs(); 6266 for (int j = 0; j < nodes.length; j++) { 6267 NameNode.stateChangeLog.info("*DIR* reportBadBlocks for block: {} on" 6268 + " datanode: {}", blk, nodes[j].getXferAddr()); 6269 blockManager.findAndMarkBlockAsCorrupt(blk, nodes[j], 6270 storageIDs == null ? null: storageIDs[j], 6271 "client machine reported it"); 6272 } 6273 } 6274 } finally { 6275 writeUnlock("reportBadBlocks"); 6276 } 6277 } 6278 6279 /** 6280 * Get a new generation stamp together with an access token for 6281 * a block under construction 6282 * 6283 * This method is called for recovering a failed pipeline or setting up 6284 * a pipeline to append to a block. 6285 * 6286 * @param block a block 6287 * @param clientName the name of a client 6288 * @return a located block with a new generation stamp and an access token 6289 * @throws IOException if any error occurs 6290 */ 6291 LocatedBlock updateBlockForPipeline(ExtendedBlock block, 6292 String clientName) throws IOException { 6293 LocatedBlock locatedBlock; 6294 checkOperation(OperationCategory.WRITE); 6295 writeLock(); 6296 try { 6297 checkOperation(OperationCategory.WRITE); 6298 6299 // check vadility of parameters 6300 checkUCBlock(block, clientName); 6301 6302 // get a new generation stamp and an access token 6303 block.setGenerationStamp(nextGenerationStamp(blockIdManager.isLegacyBlock(block.getLocalBlock()))); 6304 locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]); 6305 blockManager.setBlockToken(locatedBlock, AccessMode.WRITE); 6306 } finally { 6307 writeUnlock("bumpBlockGenerationStamp"); 6308 } 6309 // Ensure we record the new generation stamp 6310 getEditLog().logSync(); 6311 return locatedBlock; 6312 } 6313 6314 /** 6315 * Update a pipeline for a block under construction 6316 * 6317 * @param clientName the name of the client 6318 * @param oldBlock and old block 6319 * @param newBlock a new block with a new generation stamp and length 6320 * @param newNodes datanodes in the pipeline 6321 * @throws IOException if any error occurs 6322 */ 6323 void updatePipeline( 6324 String clientName, ExtendedBlock oldBlock, ExtendedBlock newBlock, 6325 DatanodeID[] newNodes, String[] newStorageIDs, boolean logRetryCache) 6326 throws IOException { 6327 LOG.info("updatePipeline(" + oldBlock.getLocalBlock() 6328 + ", newGS=" + newBlock.getGenerationStamp() 6329 + ", newLength=" + newBlock.getNumBytes() 6330 + ", newNodes=" + Arrays.asList(newNodes) 6331 + ", client=" + clientName 6332 + ")"); 6333 waitForLoadingFSImage(); 6334 writeLock(); 6335 try { 6336 checkOperation(OperationCategory.WRITE); 6337 checkNameNodeSafeMode("Pipeline not updated"); 6338 assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and " 6339 + oldBlock + " has different block identifier"; 6340 updatePipelineInternal(clientName, oldBlock, newBlock, newNodes, 6341 newStorageIDs, logRetryCache); 6342 } finally { 6343 writeUnlock("updatePipeline"); 6344 } 6345 getEditLog().logSync(); 6346 LOG.info("updatePipeline(" + oldBlock.getLocalBlock() + " => " 6347 + newBlock.getLocalBlock() + ") success"); 6348 } 6349 6350 private void updatePipelineInternal(String clientName, ExtendedBlock oldBlock, 6351 ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs, 6352 boolean logRetryCache) 6353 throws IOException { 6354 assert hasWriteLock(); 6355 // check the vadility of the block and lease holder name 6356 final INodeFile pendingFile = checkUCBlock(oldBlock, clientName); 6357 final String src = pendingFile.getFullPathName(); 6358 final BlockInfoContiguousUnderConstruction blockinfo 6359 = (BlockInfoContiguousUnderConstruction)pendingFile.getLastBlock(); 6360 6361 // check new GS & length: this is not expected 6362 if (newBlock.getGenerationStamp() <= blockinfo.getGenerationStamp() || 6363 newBlock.getNumBytes() < blockinfo.getNumBytes()) { 6364 String msg = "Update " + oldBlock + " (len = " + 6365 blockinfo.getNumBytes() + ") to an older state: " + newBlock + 6366 " (len = " + newBlock.getNumBytes() +")"; 6367 LOG.warn(msg); 6368 throw new IOException(msg); 6369 } 6370 6371 // Update old block with the new generation stamp and new length 6372 blockManager.updateLastBlock(blockinfo, newBlock); 6373 6374 // find the DatanodeDescriptor objects 6375 final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager() 6376 .getDatanodeStorageInfos(newNodes, newStorageIDs, 6377 "src=%s, oldBlock=%s, newBlock=%s, clientName=%s", 6378 src, oldBlock, newBlock, clientName); 6379 blockinfo.setExpectedLocations(storages); 6380 6381 persistBlocks(src, pendingFile, logRetryCache); 6382 } 6383 6384 // rename was successful. If any part of the renamed subtree had 6385 // files that were being written to, update with new filename. 6386 void unprotectedChangeLease(String src, String dst) { 6387 assert hasWriteLock(); 6388 leaseManager.changeLease(src, dst); 6389 } 6390 6391 /** 6392 * Serializes leases. 6393 */ 6394 void saveFilesUnderConstruction(DataOutputStream out, 6395 Map<Long, INodeFile> snapshotUCMap) throws IOException { 6396 // This is run by an inferior thread of saveNamespace, which holds a read 6397 // lock on our behalf. If we took the read lock here, we could block 6398 // for fairness if a writer is waiting on the lock. 6399 synchronized (leaseManager) { 6400 Map<String, INodeFile> nodes = leaseManager.getINodesUnderConstruction(); 6401 for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) { 6402 // TODO: for HDFS-5428, because of rename operations, some 6403 // under-construction files that are 6404 // in the current fs directory can also be captured in the 6405 // snapshotUCMap. We should remove them from the snapshotUCMap. 6406 snapshotUCMap.remove(entry.getValue().getId()); 6407 } 6408 6409 out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size 6410 for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) { 6411 FSImageSerialization.writeINodeUnderConstruction( 6412 out, entry.getValue(), entry.getKey()); 6413 } 6414 for (Map.Entry<Long, INodeFile> entry : snapshotUCMap.entrySet()) { 6415 // for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>" 6416 // as their paths 6417 StringBuilder b = new StringBuilder(); 6418 b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX) 6419 .append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING) 6420 .append(Path.SEPARATOR).append(entry.getValue().getId()); 6421 FSImageSerialization.writeINodeUnderConstruction( 6422 out, entry.getValue(), b.toString()); 6423 } 6424 } 6425 } 6426 6427 /** 6428 * @return all the under-construction files in the lease map 6429 */ 6430 Map<String, INodeFile> getFilesUnderConstruction() { 6431 synchronized (leaseManager) { 6432 return leaseManager.getINodesUnderConstruction(); 6433 } 6434 } 6435 6436 /** 6437 * Register a Backup name-node, verifying that it belongs 6438 * to the correct namespace, and adding it to the set of 6439 * active journals if necessary. 6440 * 6441 * @param bnReg registration of the new BackupNode 6442 * @param nnReg registration of this NameNode 6443 * @throws IOException if the namespace IDs do not match 6444 */ 6445 void registerBackupNode(NamenodeRegistration bnReg, 6446 NamenodeRegistration nnReg) throws IOException { 6447 writeLock(); 6448 try { 6449 if(getFSImage().getStorage().getNamespaceID() 6450 != bnReg.getNamespaceID()) 6451 throw new IOException("Incompatible namespaceIDs: " 6452 + " Namenode namespaceID = " 6453 + getFSImage().getStorage().getNamespaceID() + "; " 6454 + bnReg.getRole() + 6455 " node namespaceID = " + bnReg.getNamespaceID()); 6456 if (bnReg.getRole() == NamenodeRole.BACKUP) { 6457 getFSImage().getEditLog().registerBackupNode( 6458 bnReg, nnReg); 6459 } 6460 } finally { 6461 writeUnlock("registerBackupNode"); 6462 } 6463 } 6464 6465 /** 6466 * Release (unregister) backup node. 6467 * <p> 6468 * Find and remove the backup stream corresponding to the node. 6469 * @throws IOException 6470 */ 6471 void releaseBackupNode(NamenodeRegistration registration) 6472 throws IOException { 6473 checkOperation(OperationCategory.WRITE); 6474 writeLock(); 6475 try { 6476 checkOperation(OperationCategory.WRITE); 6477 if(getFSImage().getStorage().getNamespaceID() 6478 != registration.getNamespaceID()) 6479 throw new IOException("Incompatible namespaceIDs: " 6480 + " Namenode namespaceID = " 6481 + getFSImage().getStorage().getNamespaceID() + "; " 6482 + registration.getRole() + 6483 " node namespaceID = " + registration.getNamespaceID()); 6484 getEditLog().releaseBackupStream(registration); 6485 } finally { 6486 writeUnlock("releaseBackupNode"); 6487 } 6488 } 6489 6490 static class CorruptFileBlockInfo { 6491 final String path; 6492 final Block block; 6493 6494 public CorruptFileBlockInfo(String p, Block b) { 6495 path = p; 6496 block = b; 6497 } 6498 6499 @Override 6500 public String toString() { 6501 return block.getBlockName() + "\t" + path; 6502 } 6503 } 6504 /** 6505 * @param path Restrict corrupt files to this portion of namespace. 6506 * @param cookieTab Support for continuation; cookieTab tells where 6507 * to start from 6508 * @return a list in which each entry describes a corrupt file/block 6509 * @throws IOException 6510 */ 6511 Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path, 6512 String[] cookieTab) throws IOException { 6513 checkSuperuserPrivilege(); 6514 checkOperation(OperationCategory.READ); 6515 6516 int count = 0; 6517 ArrayList<CorruptFileBlockInfo> corruptFiles = 6518 new ArrayList<CorruptFileBlockInfo>(); 6519 if (cookieTab == null) { 6520 cookieTab = new String[] { null }; 6521 } 6522 6523 // Do a quick check if there are any corrupt files without taking the lock 6524 if (blockManager.getMissingBlocksCount() == 0) { 6525 if (cookieTab[0] == null) { 6526 cookieTab[0] = String.valueOf(getIntCookie(cookieTab[0])); 6527 } 6528 if (LOG.isDebugEnabled()) { 6529 LOG.debug("there are no corrupt file blocks."); 6530 } 6531 return corruptFiles; 6532 } 6533 6534 readLock(); 6535 try { 6536 checkOperation(OperationCategory.READ); 6537 if (!isPopulatingReplQueues()) { 6538 throw new IOException("Cannot run listCorruptFileBlocks because " + 6539 "replication queues have not been initialized."); 6540 } 6541 // print a limited # of corrupt files per call 6542 6543 final Iterator<Block> blkIterator = blockManager.getCorruptReplicaBlockIterator(); 6544 6545 int skip = getIntCookie(cookieTab[0]); 6546 for (int i = 0; i < skip && blkIterator.hasNext(); i++) { 6547 blkIterator.next(); 6548 } 6549 6550 while (blkIterator.hasNext()) { 6551 Block blk = blkIterator.next(); 6552 final INode inode = (INode)blockManager.getBlockCollection(blk); 6553 skip++; 6554 if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) { 6555 String src = inode.getFullPathName(); 6556 if (src.startsWith(path)){ 6557 corruptFiles.add(new CorruptFileBlockInfo(src, blk)); 6558 count++; 6559 if (count >= DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED) 6560 break; 6561 } 6562 } 6563 } 6564 cookieTab[0] = String.valueOf(skip); 6565 if (LOG.isDebugEnabled()) { 6566 LOG.debug("list corrupt file blocks returned: " + count); 6567 } 6568 return corruptFiles; 6569 } finally { 6570 readUnlock("listCorruptFileBlocks"); 6571 } 6572 } 6573 6574 /** 6575 * Convert string cookie to integer. 6576 */ 6577 private static int getIntCookie(String cookie){ 6578 int c; 6579 if(cookie == null){ 6580 c = 0; 6581 } else { 6582 try{ 6583 c = Integer.parseInt(cookie); 6584 }catch (NumberFormatException e) { 6585 c = 0; 6586 } 6587 } 6588 c = Math.max(0, c); 6589 return c; 6590 } 6591 6592 /** 6593 * Create delegation token secret manager 6594 */ 6595 private DelegationTokenSecretManager createDelegationTokenSecretManager( 6596 Configuration conf) { 6597 return new DelegationTokenSecretManager(conf.getLong( 6598 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY, 6599 DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT), 6600 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 6601 DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT), 6602 conf.getLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 6603 DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT), 6604 DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL, 6605 conf.getBoolean(DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 6606 DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT), 6607 this); 6608 } 6609 6610 /** 6611 * Returns the DelegationTokenSecretManager instance in the namesystem. 6612 * @return delegation token secret manager object 6613 */ 6614 DelegationTokenSecretManager getDelegationTokenSecretManager() { 6615 return dtSecretManager; 6616 } 6617 6618 /** 6619 * @param renewer Renewer information 6620 * @return delegation toek 6621 * @throws IOException on error 6622 */ 6623 Token<DelegationTokenIdentifier> getDelegationToken(Text renewer) 6624 throws IOException { 6625 Token<DelegationTokenIdentifier> token; 6626 checkOperation(OperationCategory.WRITE); 6627 writeLock(); 6628 try { 6629 checkOperation(OperationCategory.WRITE); 6630 checkNameNodeSafeMode("Cannot issue delegation token"); 6631 if (!isAllowedDelegationTokenOp()) { 6632 throw new IOException( 6633 "Delegation Token can be issued only with kerberos or web authentication"); 6634 } 6635 if (dtSecretManager == null || !dtSecretManager.isRunning()) { 6636 LOG.warn("trying to get DT with no secret manager running"); 6637 return null; 6638 } 6639 6640 UserGroupInformation ugi = getRemoteUser(); 6641 String user = ugi.getUserName(); 6642 Text owner = new Text(user); 6643 Text realUser = null; 6644 if (ugi.getRealUser() != null) { 6645 realUser = new Text(ugi.getRealUser().getUserName()); 6646 } 6647 DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(owner, 6648 renewer, realUser); 6649 token = new Token<DelegationTokenIdentifier>( 6650 dtId, dtSecretManager); 6651 long expiryTime = dtSecretManager.getTokenExpiryTime(dtId); 6652 getEditLog().logGetDelegationToken(dtId, expiryTime); 6653 } finally { 6654 writeUnlock("getDelegationToken"); 6655 } 6656 getEditLog().logSync(); 6657 return token; 6658 } 6659 6660 /** 6661 * 6662 * @param token token to renew 6663 * @return new expiryTime of the token 6664 * @throws InvalidToken if {@code token} is invalid 6665 * @throws IOException on other errors 6666 */ 6667 long renewDelegationToken(Token<DelegationTokenIdentifier> token) 6668 throws InvalidToken, IOException { 6669 long expiryTime; 6670 checkOperation(OperationCategory.WRITE); 6671 writeLock(); 6672 try { 6673 checkOperation(OperationCategory.WRITE); 6674 6675 checkNameNodeSafeMode("Cannot renew delegation token"); 6676 if (!isAllowedDelegationTokenOp()) { 6677 throw new IOException( 6678 "Delegation Token can be renewed only with kerberos or web authentication"); 6679 } 6680 String renewer = getRemoteUser().getShortUserName(); 6681 expiryTime = dtSecretManager.renewToken(token, renewer); 6682 DelegationTokenIdentifier id = new DelegationTokenIdentifier(); 6683 ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); 6684 DataInputStream in = new DataInputStream(buf); 6685 id.readFields(in); 6686 getEditLog().logRenewDelegationToken(id, expiryTime); 6687 } finally { 6688 writeUnlock("renewDelegationToken"); 6689 } 6690 getEditLog().logSync(); 6691 return expiryTime; 6692 } 6693 6694 /** 6695 * 6696 * @param token token to cancel 6697 * @throws IOException on error 6698 */ 6699 void cancelDelegationToken(Token<DelegationTokenIdentifier> token) 6700 throws IOException { 6701 checkOperation(OperationCategory.WRITE); 6702 writeLock(); 6703 try { 6704 checkOperation(OperationCategory.WRITE); 6705 6706 checkNameNodeSafeMode("Cannot cancel delegation token"); 6707 String canceller = getRemoteUser().getUserName(); 6708 DelegationTokenIdentifier id = dtSecretManager 6709 .cancelToken(token, canceller); 6710 getEditLog().logCancelDelegationToken(id); 6711 } finally { 6712 writeUnlock("cancelDelegationToken"); 6713 } 6714 getEditLog().logSync(); 6715 } 6716 6717 /** 6718 * @param out save state of the secret manager 6719 * @param sdPath String storage directory path 6720 */ 6721 void saveSecretManagerStateCompat(DataOutputStream out, String sdPath) 6722 throws IOException { 6723 dtSecretManager.saveSecretManagerStateCompat(out, sdPath); 6724 } 6725 6726 SecretManagerState saveSecretManagerState() { 6727 return dtSecretManager.saveSecretManagerState(); 6728 } 6729 6730 /** 6731 * @param in load the state of secret manager from input stream 6732 */ 6733 void loadSecretManagerStateCompat(DataInput in) throws IOException { 6734 dtSecretManager.loadSecretManagerStateCompat(in); 6735 } 6736 6737 void loadSecretManagerState(SecretManagerSection s, 6738 List<SecretManagerSection.DelegationKey> keys, 6739 List<SecretManagerSection.PersistToken> tokens) throws IOException { 6740 dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens)); 6741 } 6742 6743 /** 6744 * Log the updateMasterKey operation to edit logs 6745 * 6746 * @param key new delegation key. 6747 */ 6748 public void logUpdateMasterKey(DelegationKey key) { 6749 6750 assert !isInSafeMode() : 6751 "this should never be called while in safemode, since we stop " + 6752 "the DT manager before entering safemode!"; 6753 // No need to hold FSN lock since we don't access any internal 6754 // structures, and this is stopped before the FSN shuts itself 6755 // down, etc. 6756 getEditLog().logUpdateMasterKey(key); 6757 getEditLog().logSync(); 6758 } 6759 6760 /** 6761 * Log the cancellation of expired tokens to edit logs 6762 * 6763 * @param id token identifier to cancel 6764 */ 6765 public void logExpireDelegationToken(DelegationTokenIdentifier id) { 6766 assert !isInSafeMode() : 6767 "this should never be called while in safemode, since we stop " + 6768 "the DT manager before entering safemode!"; 6769 // No need to hold FSN lock since we don't access any internal 6770 // structures, and this is stopped before the FSN shuts itself 6771 // down, etc. 6772 getEditLog().logCancelDelegationToken(id); 6773 } 6774 6775 private void logReassignLease(String leaseHolder, String src, 6776 String newHolder) { 6777 assert hasWriteLock(); 6778 getEditLog().logReassignLease(leaseHolder, src, newHolder); 6779 } 6780 6781 /** 6782 * 6783 * @return true if delegation token operation is allowed 6784 */ 6785 private boolean isAllowedDelegationTokenOp() throws IOException { 6786 AuthenticationMethod authMethod = getConnectionAuthenticationMethod(); 6787 if (UserGroupInformation.isSecurityEnabled() 6788 && (authMethod != AuthenticationMethod.KERBEROS) 6789 && (authMethod != AuthenticationMethod.KERBEROS_SSL) 6790 && (authMethod != AuthenticationMethod.CERTIFICATE)) { 6791 return false; 6792 } 6793 return true; 6794 } 6795 6796 /** 6797 * Returns authentication method used to establish the connection 6798 * @return AuthenticationMethod used to establish connection 6799 * @throws IOException 6800 */ 6801 private AuthenticationMethod getConnectionAuthenticationMethod() 6802 throws IOException { 6803 UserGroupInformation ugi = getRemoteUser(); 6804 AuthenticationMethod authMethod = ugi.getAuthenticationMethod(); 6805 if (authMethod == AuthenticationMethod.PROXY) { 6806 authMethod = ugi.getRealUser().getAuthenticationMethod(); 6807 } 6808 return authMethod; 6809 } 6810 6811 /** 6812 * Client invoked methods are invoked over RPC and will be in 6813 * RPC call context even if the client exits. 6814 */ 6815 boolean isExternalInvocation() { 6816 return Server.isRpcInvocation() || NamenodeWebHdfsMethods.isWebHdfsInvocation(); 6817 } 6818 6819 private static InetAddress getRemoteIp() { 6820 InetAddress ip = Server.getRemoteIp(); 6821 if (ip != null) { 6822 return ip; 6823 } 6824 return NamenodeWebHdfsMethods.getRemoteIp(); 6825 } 6826 6827 // optimize ugi lookup for RPC operations to avoid a trip through 6828 // UGI.getCurrentUser which is synch'ed 6829 private static UserGroupInformation getRemoteUser() throws IOException { 6830 return NameNode.getRemoteUser(); 6831 } 6832 6833 /** 6834 * Log fsck event in the audit log 6835 */ 6836 void logFsckEvent(String src, InetAddress remoteAddress) throws IOException { 6837 if (isAuditEnabled()) { 6838 logAuditEvent(true, getRemoteUser(), 6839 remoteAddress, 6840 "fsck", src, null, null); 6841 } 6842 } 6843 /** 6844 * Register NameNodeMXBean 6845 */ 6846 private void registerMXBean() { 6847 mxbeanName = MBeans.register("NameNode", "NameNodeInfo", this); 6848 } 6849 6850 /** 6851 * Class representing Namenode information for JMX interfaces 6852 */ 6853 @Override // NameNodeMXBean 6854 public String getVersion() { 6855 return VersionInfo.getVersion() + ", r" + VersionInfo.getRevision(); 6856 } 6857 6858 @Override // NameNodeMXBean 6859 public long getUsed() { 6860 return this.getCapacityUsed(); 6861 } 6862 6863 @Override // NameNodeMXBean 6864 public long getFree() { 6865 return this.getCapacityRemaining(); 6866 } 6867 6868 @Override // NameNodeMXBean 6869 public long getTotal() { 6870 return this.getCapacityTotal(); 6871 } 6872 6873 @Override // NameNodeMXBean 6874 public String getSafemode() { 6875 if (!this.isInSafeMode()) 6876 return ""; 6877 return "Safe mode is ON. " + this.getSafeModeTip(); 6878 } 6879 6880 @Override // NameNodeMXBean 6881 public boolean isUpgradeFinalized() { 6882 return this.getFSImage().isUpgradeFinalized(); 6883 } 6884 6885 @Override // NameNodeMXBean 6886 public long getNonDfsUsedSpace() { 6887 return datanodeStatistics.getCapacityUsedNonDFS(); 6888 } 6889 6890 @Override // NameNodeMXBean 6891 public float getPercentUsed() { 6892 return datanodeStatistics.getCapacityUsedPercent(); 6893 } 6894 6895 @Override // NameNodeMXBean 6896 public long getBlockPoolUsedSpace() { 6897 return datanodeStatistics.getBlockPoolUsed(); 6898 } 6899 6900 @Override // NameNodeMXBean 6901 public float getPercentBlockPoolUsed() { 6902 return datanodeStatistics.getPercentBlockPoolUsed(); 6903 } 6904 6905 @Override // NameNodeMXBean 6906 public float getPercentRemaining() { 6907 return datanodeStatistics.getCapacityRemainingPercent(); 6908 } 6909 6910 @Override // NameNodeMXBean 6911 public long getCacheCapacity() { 6912 return datanodeStatistics.getCacheCapacity(); 6913 } 6914 6915 @Override // NameNodeMXBean 6916 public long getCacheUsed() { 6917 return datanodeStatistics.getCacheUsed(); 6918 } 6919 6920 @Override // NameNodeMXBean 6921 public long getTotalBlocks() { 6922 return getBlocksTotal(); 6923 } 6924 6925 @Override // NameNodeMXBean 6926 @Metric 6927 public long getTotalFiles() { 6928 return getFilesTotal(); 6929 } 6930 6931 @Override // NameNodeMXBean 6932 public long getNumberOfMissingBlocks() { 6933 return getMissingBlocksCount(); 6934 } 6935 6936 @Override // NameNodeMXBean 6937 public long getNumberOfMissingBlocksWithReplicationFactorOne() { 6938 return getMissingReplOneBlocksCount(); 6939 } 6940 6941 @Override // NameNodeMXBean 6942 public int getThreads() { 6943 return ManagementFactory.getThreadMXBean().getThreadCount(); 6944 } 6945 6946 /** 6947 * Returned information is a JSON representation of map with host name as the 6948 * key and value is a map of live node attribute keys to its values 6949 */ 6950 @Override // NameNodeMXBean 6951 public String getLiveNodes() { 6952 final Map<String, Map<String,Object>> info = 6953 new HashMap<String, Map<String,Object>>(); 6954 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 6955 blockManager.getDatanodeManager().fetchDatanodes(live, null, false); 6956 for (DatanodeDescriptor node : live) { 6957 ImmutableMap.Builder<String, Object> innerinfo = 6958 ImmutableMap.<String,Object>builder(); 6959 innerinfo 6960 .put("infoAddr", node.getInfoAddr()) 6961 .put("infoSecureAddr", node.getInfoSecureAddr()) 6962 .put("xferaddr", node.getXferAddr()) 6963 .put("lastContact", getLastContact(node)) 6964 .put("usedSpace", getDfsUsed(node)) 6965 .put("adminState", node.getAdminState().toString()) 6966 .put("nonDfsUsedSpace", node.getNonDfsUsed()) 6967 .put("capacity", node.getCapacity()) 6968 .put("numBlocks", node.numBlocks()) 6969 .put("version", node.getSoftwareVersion()) 6970 .put("used", node.getDfsUsed()) 6971 .put("remaining", node.getRemaining()) 6972 .put("blockScheduled", node.getBlocksScheduled()) 6973 .put("blockPoolUsed", node.getBlockPoolUsed()) 6974 .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent()) 6975 .put("volfails", node.getVolumeFailures()); 6976 VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary(); 6977 if (volumeFailureSummary != null) { 6978 innerinfo 6979 .put("failedStorageLocations", 6980 volumeFailureSummary.getFailedStorageLocations()) 6981 .put("lastVolumeFailureDate", 6982 volumeFailureSummary.getLastVolumeFailureDate()) 6983 .put("estimatedCapacityLostTotal", 6984 volumeFailureSummary.getEstimatedCapacityLostTotal()); 6985 } 6986 info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo.build()); 6987 } 6988 return JSON.toString(info); 6989 } 6990 6991 /** 6992 * Returned information is a JSON representation of map with host name as the 6993 * key and value is a map of dead node attribute keys to its values 6994 */ 6995 @Override // NameNodeMXBean 6996 public String getDeadNodes() { 6997 final Map<String, Map<String, Object>> info = 6998 new HashMap<String, Map<String, Object>>(); 6999 final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); 7000 blockManager.getDatanodeManager().fetchDatanodes(null, dead, false); 7001 for (DatanodeDescriptor node : dead) { 7002 Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder() 7003 .put("lastContact", getLastContact(node)) 7004 .put("decommissioned", node.isDecommissioned()) 7005 .put("xferaddr", node.getXferAddr()) 7006 .build(); 7007 info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo); 7008 } 7009 return JSON.toString(info); 7010 } 7011 7012 /** 7013 * Returned information is a JSON representation of map with host name as the 7014 * key and value is a map of decommissioning node attribute keys to its 7015 * values 7016 */ 7017 @Override // NameNodeMXBean 7018 public String getDecomNodes() { 7019 final Map<String, Map<String, Object>> info = 7020 new HashMap<String, Map<String, Object>>(); 7021 final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager( 7022 ).getDecommissioningNodes(); 7023 for (DatanodeDescriptor node : decomNodeList) { 7024 Map<String, Object> innerinfo = ImmutableMap 7025 .<String, Object> builder() 7026 .put("xferaddr", node.getXferAddr()) 7027 .put("underReplicatedBlocks", 7028 node.decommissioningStatus.getUnderReplicatedBlocks()) 7029 .put("decommissionOnlyReplicas", 7030 node.decommissioningStatus.getDecommissionOnlyReplicas()) 7031 .put("underReplicateInOpenFiles", 7032 node.decommissioningStatus.getUnderReplicatedInOpenFiles()) 7033 .build(); 7034 info.put(node.getHostName() + ":" + node.getXferPort(), innerinfo); 7035 } 7036 return JSON.toString(info); 7037 } 7038 7039 private long getLastContact(DatanodeDescriptor alivenode) { 7040 return (monotonicNow() - alivenode.getLastUpdateMonotonic())/1000; 7041 } 7042 7043 private long getDfsUsed(DatanodeDescriptor alivenode) { 7044 return alivenode.getDfsUsed(); 7045 } 7046 7047 @Override // NameNodeMXBean 7048 public String getClusterId() { 7049 return getFSImage().getStorage().getClusterID(); 7050 } 7051 7052 @Override // NameNodeMXBean 7053 public String getBlockPoolId() { 7054 return blockPoolId; 7055 } 7056 7057 @Override // NameNodeMXBean 7058 public String getNameDirStatuses() { 7059 Map<String, Map<File, StorageDirType>> statusMap = 7060 new HashMap<String, Map<File, StorageDirType>>(); 7061 7062 Map<File, StorageDirType> activeDirs = new HashMap<File, StorageDirType>(); 7063 for (Iterator<StorageDirectory> it 7064 = getFSImage().getStorage().dirIterator(); it.hasNext();) { 7065 StorageDirectory st = it.next(); 7066 activeDirs.put(st.getRoot(), st.getStorageDirType()); 7067 } 7068 statusMap.put("active", activeDirs); 7069 7070 List<Storage.StorageDirectory> removedStorageDirs 7071 = getFSImage().getStorage().getRemovedStorageDirs(); 7072 Map<File, StorageDirType> failedDirs = new HashMap<File, StorageDirType>(); 7073 for (StorageDirectory st : removedStorageDirs) { 7074 failedDirs.put(st.getRoot(), st.getStorageDirType()); 7075 } 7076 statusMap.put("failed", failedDirs); 7077 7078 return JSON.toString(statusMap); 7079 } 7080 7081 @Override // NameNodeMXBean 7082 public String getNodeUsage() { 7083 float median = 0; 7084 float max = 0; 7085 float min = 0; 7086 float dev = 0; 7087 7088 final Map<String, Map<String,Object>> info = 7089 new HashMap<String, Map<String,Object>>(); 7090 final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); 7091 blockManager.getDatanodeManager().fetchDatanodes(live, null, true); 7092 7093 if (live.size() > 0) { 7094 float totalDfsUsed = 0; 7095 float[] usages = new float[live.size()]; 7096 int i = 0; 7097 for (DatanodeDescriptor dn : live) { 7098 usages[i++] = dn.getDfsUsedPercent(); 7099 totalDfsUsed += dn.getDfsUsedPercent(); 7100 } 7101 totalDfsUsed /= live.size(); 7102 Arrays.sort(usages); 7103 median = usages[usages.length / 2]; 7104 max = usages[usages.length - 1]; 7105 min = usages[0]; 7106 7107 for (i = 0; i < usages.length; i++) { 7108 dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed); 7109 } 7110 dev = (float) Math.sqrt(dev / usages.length); 7111 } 7112 7113 final Map<String, Object> innerInfo = new HashMap<String, Object>(); 7114 innerInfo.put("min", StringUtils.format("%.2f%%", min)); 7115 innerInfo.put("median", StringUtils.format("%.2f%%", median)); 7116 innerInfo.put("max", StringUtils.format("%.2f%%", max)); 7117 innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev)); 7118 info.put("nodeUsage", innerInfo); 7119 7120 return JSON.toString(info); 7121 } 7122 7123 @Override // NameNodeMXBean 7124 public String getNameJournalStatus() { 7125 List<Map<String, String>> jasList = new ArrayList<Map<String, String>>(); 7126 FSEditLog log = getFSImage().getEditLog(); 7127 if (log != null) { 7128 // This flag can be false because we cannot hold a lock of FSEditLog 7129 // for metrics. 7130 boolean openForWrite = log.isOpenForWriteWithoutLock(); 7131 for (JournalAndStream jas : log.getJournals()) { 7132 final Map<String, String> jasMap = new HashMap<String, String>(); 7133 String manager = jas.getManager().toString(); 7134 7135 jasMap.put("required", String.valueOf(jas.isRequired())); 7136 jasMap.put("disabled", String.valueOf(jas.isDisabled())); 7137 jasMap.put("manager", manager); 7138 7139 if (jas.isDisabled()) { 7140 jasMap.put("stream", "Failed"); 7141 } else if (openForWrite) { 7142 EditLogOutputStream elos = jas.getCurrentStream(); 7143 if (elos != null) { 7144 jasMap.put("stream", elos.generateReport()); 7145 } else { 7146 jasMap.put("stream", "not currently writing"); 7147 } 7148 } else { 7149 jasMap.put("stream", "open for read"); 7150 } 7151 jasList.add(jasMap); 7152 } 7153 } 7154 return JSON.toString(jasList); 7155 } 7156 7157 @Override // NameNodeMxBean 7158 public String getJournalTransactionInfo() { 7159 Map<String, String> txnIdMap = new HashMap<String, String>(); 7160 txnIdMap.put("LastAppliedOrWrittenTxId", 7161 Long.toString(this.getFSImage().getLastAppliedOrWrittenTxId())); 7162 txnIdMap.put("MostRecentCheckpointTxId", 7163 Long.toString(this.getFSImage().getMostRecentCheckpointTxId())); 7164 return JSON.toString(txnIdMap); 7165 } 7166 7167 @Override // NameNodeMXBean 7168 public String getNNStarted() { 7169 return getStartTime().toString(); 7170 } 7171 7172 @Override // NameNodeMXBean 7173 public String getCompileInfo() { 7174 return VersionInfo.getDate() + " by " + VersionInfo.getUser() + 7175 " from " + VersionInfo.getBranch(); 7176 } 7177 7178 /** @return the block manager. */ 7179 public BlockManager getBlockManager() { 7180 return blockManager; 7181 } 7182 7183 public BlockIdManager getBlockIdManager() { 7184 return blockIdManager; 7185 } 7186 7187 /** @return the FSDirectory. */ 7188 @Override 7189 public FSDirectory getFSDirectory() { 7190 return dir; 7191 } 7192 /** Set the FSDirectory. */ 7193 @VisibleForTesting 7194 public void setFSDirectory(FSDirectory dir) { 7195 this.dir = dir; 7196 } 7197 /** @return the cache manager. */ 7198 public CacheManager getCacheManager() { 7199 return cacheManager; 7200 } 7201 7202 @Override // NameNodeMXBean 7203 public String getCorruptFiles() { 7204 List<String> list = new ArrayList<String>(); 7205 Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks; 7206 try { 7207 corruptFileBlocks = listCorruptFileBlocks("/", null); 7208 int corruptFileCount = corruptFileBlocks.size(); 7209 if (corruptFileCount != 0) { 7210 for (FSNamesystem.CorruptFileBlockInfo c : corruptFileBlocks) { 7211 list.add(c.toString()); 7212 } 7213 } 7214 } catch (IOException e) { 7215 LOG.warn("Get corrupt file blocks returned error: " + e.getMessage()); 7216 } 7217 return JSON.toString(list); 7218 } 7219 7220 @Override //NameNodeMXBean 7221 public int getDistinctVersionCount() { 7222 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions() 7223 .size(); 7224 } 7225 7226 @Override //NameNodeMXBean 7227 public Map<String, Integer> getDistinctVersions() { 7228 return blockManager.getDatanodeManager().getDatanodesSoftwareVersions(); 7229 } 7230 7231 @Override //NameNodeMXBean 7232 public String getSoftwareVersion() { 7233 return VersionInfo.getVersion(); 7234 } 7235 7236 /** 7237 * Verifies that the given identifier and password are valid and match. 7238 * @param identifier Token identifier. 7239 * @param password Password in the token. 7240 */ 7241 public synchronized void verifyToken(DelegationTokenIdentifier identifier, 7242 byte[] password) throws InvalidToken, RetriableException { 7243 try { 7244 getDelegationTokenSecretManager().verifyToken(identifier, password); 7245 } catch (InvalidToken it) { 7246 if (inTransitionToActive()) { 7247 throw new RetriableException(it); 7248 } 7249 throw it; 7250 } 7251 } 7252 7253 @Override 7254 public boolean isGenStampInFuture(Block block) { 7255 return blockIdManager.isGenStampInFuture(block); 7256 } 7257 7258 @VisibleForTesting 7259 public EditLogTailer getEditLogTailer() { 7260 return editLogTailer; 7261 } 7262 7263 @VisibleForTesting 7264 public void setEditLogTailerForTests(EditLogTailer tailer) { 7265 this.editLogTailer = tailer; 7266 } 7267 7268 @VisibleForTesting 7269 void setFsLockForTests(ReentrantReadWriteLock lock) { 7270 this.fsLock.coarseLock = lock; 7271 } 7272 7273 @VisibleForTesting 7274 public ReentrantReadWriteLock getFsLockForTests() { 7275 return fsLock.coarseLock; 7276 } 7277 7278 @VisibleForTesting 7279 public ReentrantLock getCpLockForTests() { 7280 return cpLock; 7281 } 7282 7283 @VisibleForTesting 7284 public SafeModeInfo getSafeModeInfoForTests() { 7285 return safeMode; 7286 } 7287 7288 @VisibleForTesting 7289 public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) { 7290 this.nnResourceChecker = nnResourceChecker; 7291 } 7292 7293 public SnapshotManager getSnapshotManager() { 7294 return snapshotManager; 7295 } 7296 7297 /** Allow snapshot on a directory. */ 7298 void allowSnapshot(String path) throws IOException { 7299 checkOperation(OperationCategory.WRITE); 7300 final String operationName = "allowSnapshot"; 7301 boolean success = false; 7302 writeLock(); 7303 try { 7304 checkOperation(OperationCategory.WRITE); 7305 checkNameNodeSafeMode("Cannot allow snapshot for " + path); 7306 checkSuperuserPrivilege(); 7307 FSDirSnapshotOp.allowSnapshot(dir, snapshotManager, path); 7308 success = true; 7309 } finally { 7310 writeUnlock(operationName); 7311 } 7312 getEditLog().logSync(); 7313 logAuditEvent(success, operationName, path, null, null); 7314 } 7315 7316 /** Disallow snapshot on a directory. */ 7317 void disallowSnapshot(String path) throws IOException { 7318 checkOperation(OperationCategory.WRITE); 7319 final String operationName = "disallowSnapshot"; 7320 boolean success = false; 7321 writeLock(); 7322 try { 7323 checkOperation(OperationCategory.WRITE); 7324 checkNameNodeSafeMode("Cannot disallow snapshot for " + path); 7325 checkSuperuserPrivilege(); 7326 FSDirSnapshotOp.disallowSnapshot(dir, snapshotManager, path); 7327 success = true; 7328 } finally { 7329 writeUnlock(operationName); 7330 } 7331 getEditLog().logSync(); 7332 logAuditEvent(success, operationName, path, null, null); 7333 } 7334 7335 /** 7336 * Create a snapshot 7337 * @param snapshotRoot The directory path where the snapshot is taken 7338 * @param snapshotName The name of the snapshot 7339 */ 7340 String createSnapshot(String snapshotRoot, String snapshotName, 7341 boolean logRetryCache) throws IOException { 7342 final String operationName = "createSnapshot"; 7343 String snapshotPath = null; 7344 writeLock(); 7345 try { 7346 checkOperation(OperationCategory.WRITE); 7347 checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot); 7348 snapshotPath = FSDirSnapshotOp.createSnapshot(dir, 7349 snapshotManager, snapshotRoot, snapshotName, logRetryCache); 7350 } finally { 7351 writeUnlock(operationName); 7352 } 7353 getEditLog().logSync(); 7354 logAuditEvent(snapshotPath != null, operationName, snapshotRoot, 7355 snapshotPath, null); 7356 return snapshotPath; 7357 } 7358 7359 /** 7360 * Rename a snapshot 7361 * @param path The directory path where the snapshot was taken 7362 * @param snapshotOldName Old snapshot name 7363 * @param snapshotNewName New snapshot name 7364 * @throws SafeModeException 7365 * @throws IOException 7366 */ 7367 void renameSnapshot( 7368 String path, String snapshotOldName, String snapshotNewName, 7369 boolean logRetryCache) throws IOException { 7370 final String operationName = "renameSnapshot"; 7371 boolean success = false; 7372 writeLock(); 7373 try { 7374 checkOperation(OperationCategory.WRITE); 7375 checkNameNodeSafeMode("Cannot rename snapshot for " + path); 7376 FSDirSnapshotOp.renameSnapshot(dir, snapshotManager, path, 7377 snapshotOldName, snapshotNewName, logRetryCache); 7378 success = true; 7379 } finally { 7380 writeUnlock(operationName); 7381 } 7382 getEditLog().logSync(); 7383 String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName); 7384 String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName); 7385 logAuditEvent(success, operationName, oldSnapshotRoot, 7386 newSnapshotRoot, null); 7387 } 7388 7389 /** 7390 * Get the list of snapshottable directories that are owned 7391 * by the current user. Return all the snapshottable directories if the 7392 * current user is a super user. 7393 * @return The list of all the current snapshottable directories 7394 * @throws IOException 7395 */ 7396 public SnapshottableDirectoryStatus[] getSnapshottableDirListing() 7397 throws IOException { 7398 final String operationName = "listSnapshottableDirectory"; 7399 SnapshottableDirectoryStatus[] status = null; 7400 checkOperation(OperationCategory.READ); 7401 boolean success = false; 7402 readLock(); 7403 try { 7404 checkOperation(OperationCategory.READ); 7405 status = FSDirSnapshotOp.getSnapshottableDirListing(dir, snapshotManager); 7406 success = true; 7407 } finally { 7408 readUnlock(operationName); 7409 } 7410 logAuditEvent(success, operationName, null, null, null); 7411 return status; 7412 } 7413 7414 /** 7415 * Get the difference between two snapshots (or between a snapshot and the 7416 * current status) of a snapshottable directory. 7417 * 7418 * @param path The full path of the snapshottable directory. 7419 * @param fromSnapshot Name of the snapshot to calculate the diff from. Null 7420 * or empty string indicates the current tree. 7421 * @param toSnapshot Name of the snapshot to calculated the diff to. Null or 7422 * empty string indicates the current tree. 7423 * @return A report about the difference between {@code fromSnapshot} and 7424 * {@code toSnapshot}. Modified/deleted/created/renamed files and 7425 * directories belonging to the snapshottable directories are listed 7426 * and labeled as M/-/+/R respectively. 7427 * @throws IOException 7428 */ 7429 SnapshotDiffReport getSnapshotDiffReport(String path, 7430 String fromSnapshot, String toSnapshot) throws IOException { 7431 final String operationName = "computeSnapshotDiff"; 7432 SnapshotDiffReport diffs = null; 7433 checkOperation(OperationCategory.READ); 7434 readLock(); 7435 try { 7436 checkOperation(OperationCategory.READ); 7437 diffs = FSDirSnapshotOp.getSnapshotDiffReport(dir, snapshotManager, 7438 path, fromSnapshot, toSnapshot); 7439 } finally { 7440 readUnlock(operationName); 7441 } 7442 7443 logAuditEvent(diffs != null, operationName, null, null, null); 7444 return diffs; 7445 } 7446 7447 /** 7448 * Delete a snapshot of a snapshottable directory 7449 * @param snapshotRoot The snapshottable directory 7450 * @param snapshotName The name of the to-be-deleted snapshot 7451 * @throws SafeModeException 7452 * @throws IOException 7453 */ 7454 void deleteSnapshot(String snapshotRoot, String snapshotName, 7455 boolean logRetryCache) throws IOException { 7456 final String operationName = "deleteSnapshot"; 7457 boolean success = false; 7458 writeLock(); 7459 BlocksMapUpdateInfo blocksToBeDeleted = null; 7460 try { 7461 checkOperation(OperationCategory.WRITE); 7462 checkNameNodeSafeMode("Cannot delete snapshot for " + snapshotRoot); 7463 7464 blocksToBeDeleted = FSDirSnapshotOp.deleteSnapshot(dir, snapshotManager, 7465 snapshotRoot, snapshotName, logRetryCache); 7466 success = true; 7467 } finally { 7468 writeUnlock(operationName); 7469 } 7470 getEditLog().logSync(); 7471 7472 // Breaking the pattern as removing blocks have to happen outside of the 7473 // global lock 7474 if (blocksToBeDeleted != null) { 7475 removeBlocks(blocksToBeDeleted); 7476 } 7477 7478 String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName); 7479 logAuditEvent(success, operationName, rootPath, null, null); 7480 } 7481 7482 /** 7483 * Remove a list of INodeDirectorySnapshottable from the SnapshotManager 7484 * @param toRemove the list of INodeDirectorySnapshottable to be removed 7485 */ 7486 void removeSnapshottableDirs(List<INodeDirectory> toRemove) { 7487 if (snapshotManager != null) { 7488 snapshotManager.removeSnapshottable(toRemove); 7489 } 7490 } 7491 7492 RollingUpgradeInfo queryRollingUpgrade() throws IOException { 7493 checkSuperuserPrivilege(); 7494 checkOperation(OperationCategory.READ); 7495 readLock(); 7496 try { 7497 if (!isRollingUpgrade()) { 7498 return null; 7499 } 7500 Preconditions.checkNotNull(rollingUpgradeInfo); 7501 boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage(); 7502 rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage); 7503 return rollingUpgradeInfo; 7504 } finally { 7505 readUnlock("queryRollingUpgrade"); 7506 } 7507 } 7508 7509 RollingUpgradeInfo startRollingUpgrade() throws IOException { 7510 final String operationName = "startRollingUpgrade"; 7511 checkSuperuserPrivilege(); 7512 checkOperation(OperationCategory.WRITE); 7513 writeLock(); 7514 try { 7515 checkOperation(OperationCategory.WRITE); 7516 if (isRollingUpgrade()) { 7517 return rollingUpgradeInfo; 7518 } 7519 long startTime = now(); 7520 if (!haEnabled) { // for non-HA, we require NN to be in safemode 7521 startRollingUpgradeInternalForNonHA(startTime); 7522 } else { // for HA, NN cannot be in safemode 7523 checkNameNodeSafeMode("Failed to start rolling upgrade"); 7524 startRollingUpgradeInternal(startTime); 7525 } 7526 7527 getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime()); 7528 if (haEnabled) { 7529 // roll the edit log to make sure the standby NameNode can tail 7530 getFSImage().rollEditLog(); 7531 } 7532 } finally { 7533 writeUnlock(operationName); 7534 } 7535 7536 getEditLog().logSync(); 7537 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7538 logAuditEvent(true, operationName, null, null, null); 7539 } 7540 return rollingUpgradeInfo; 7541 } 7542 7543 /** 7544 * Update internal state to indicate that a rolling upgrade is in progress. 7545 * @param startTime rolling upgrade start time 7546 */ 7547 void startRollingUpgradeInternal(long startTime) 7548 throws IOException { 7549 checkRollingUpgrade("start rolling upgrade"); 7550 getFSImage().checkUpgrade(); 7551 setRollingUpgradeInfo(false, startTime); 7552 } 7553 7554 /** 7555 * Update internal state to indicate that a rolling upgrade is in progress for 7556 * non-HA setup. This requires the namesystem is in SafeMode and after doing a 7557 * checkpoint for rollback the namesystem will quit the safemode automatically 7558 */ 7559 private void startRollingUpgradeInternalForNonHA(long startTime) 7560 throws IOException { 7561 Preconditions.checkState(!haEnabled); 7562 if (!isInSafeMode()) { 7563 throw new IOException("Safe mode should be turned ON " 7564 + "in order to create namespace image."); 7565 } 7566 checkRollingUpgrade("start rolling upgrade"); 7567 getFSImage().checkUpgrade(); 7568 // in non-HA setup, we do an extra checkpoint to generate a rollback image 7569 getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null); 7570 LOG.info("Successfully saved namespace for preparing rolling upgrade."); 7571 7572 // leave SafeMode automatically 7573 setSafeMode(SafeModeAction.SAFEMODE_LEAVE); 7574 setRollingUpgradeInfo(true, startTime); 7575 } 7576 7577 void setRollingUpgradeInfo(boolean createdRollbackImages, long startTime) { 7578 rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId, 7579 createdRollbackImages, startTime, 0L); 7580 } 7581 7582 public void setCreatedRollbackImages(boolean created) { 7583 if (rollingUpgradeInfo != null) { 7584 rollingUpgradeInfo.setCreatedRollbackImages(created); 7585 } 7586 } 7587 7588 public RollingUpgradeInfo getRollingUpgradeInfo() { 7589 return rollingUpgradeInfo; 7590 } 7591 7592 public boolean isNeedRollbackFsImage() { 7593 return needRollbackFsImage; 7594 } 7595 7596 public void setNeedRollbackFsImage(boolean needRollbackFsImage) { 7597 this.needRollbackFsImage = needRollbackFsImage; 7598 } 7599 7600 @Override // NameNodeMXBean 7601 public RollingUpgradeInfo.Bean getRollingUpgradeStatus() { 7602 if (!isRollingUpgrade()) { 7603 return null; 7604 } 7605 RollingUpgradeInfo upgradeInfo = getRollingUpgradeInfo(); 7606 if (upgradeInfo.createdRollbackImages()) { 7607 return new RollingUpgradeInfo.Bean(upgradeInfo); 7608 } 7609 readLock(); 7610 try { 7611 // check again after acquiring the read lock. 7612 upgradeInfo = getRollingUpgradeInfo(); 7613 if (upgradeInfo == null) { 7614 return null; 7615 } 7616 if (!upgradeInfo.createdRollbackImages()) { 7617 boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage(); 7618 upgradeInfo.setCreatedRollbackImages(hasRollbackImage); 7619 } 7620 } catch (IOException ioe) { 7621 LOG.warn("Encountered exception setting Rollback Image", ioe); 7622 } finally { 7623 readUnlock("getRollingUpgradeStatus"); 7624 } 7625 return new RollingUpgradeInfo.Bean(upgradeInfo); 7626 } 7627 7628 /** Is rolling upgrade in progress? */ 7629 public boolean isRollingUpgrade() { 7630 return rollingUpgradeInfo != null && !rollingUpgradeInfo.isFinalized(); 7631 } 7632 7633 void checkRollingUpgrade(String action) throws RollingUpgradeException { 7634 if (isRollingUpgrade()) { 7635 throw new RollingUpgradeException("Failed to " + action 7636 + " since a rolling upgrade is already in progress." 7637 + " Existing rolling upgrade info:\n" + rollingUpgradeInfo); 7638 } 7639 } 7640 7641 RollingUpgradeInfo finalizeRollingUpgrade() throws IOException { 7642 final String operationName = "finalizeRollingUpgrade"; 7643 checkSuperuserPrivilege(); 7644 checkOperation(OperationCategory.WRITE); 7645 writeLock(); 7646 try { 7647 checkOperation(OperationCategory.WRITE); 7648 if (!isRollingUpgrade()) { 7649 return null; 7650 } 7651 checkNameNodeSafeMode("Failed to finalize rolling upgrade"); 7652 7653 finalizeRollingUpgradeInternal(now()); 7654 getEditLog().logFinalizeRollingUpgrade(rollingUpgradeInfo.getFinalizeTime()); 7655 if (haEnabled) { 7656 // roll the edit log to make sure the standby NameNode can tail 7657 getFSImage().rollEditLog(); 7658 } 7659 getFSImage().updateStorageVersion(); 7660 getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK, 7661 NameNodeFile.IMAGE); 7662 } finally { 7663 writeUnlock(operationName); 7664 } 7665 7666 if (!haEnabled) { 7667 // Sync not needed for ha since the edit was rolled after logging. 7668 getEditLog().logSync(); 7669 } 7670 7671 if (auditLog.isInfoEnabled() && isExternalInvocation()) { 7672 logAuditEvent(true, operationName, null, null, null); 7673 } 7674 return rollingUpgradeInfo; 7675 } 7676 7677 void finalizeRollingUpgradeInternal(long finalizeTime) { 7678 // Set the finalize time 7679 rollingUpgradeInfo.finalize(finalizeTime); 7680 } 7681 7682 long addCacheDirective(CacheDirectiveInfo directive, 7683 EnumSet<CacheFlag> flags, boolean logRetryCache) 7684 throws IOException { 7685 final String operationName = "addCacheDirective"; 7686 CacheDirectiveInfo effectiveDirective = null; 7687 if (!flags.contains(CacheFlag.FORCE)) { 7688 cacheManager.waitForRescanIfNeeded(); 7689 } 7690 writeLock(); 7691 try { 7692 checkOperation(OperationCategory.WRITE); 7693 if (isInSafeMode()) { 7694 throw new SafeModeException( 7695 "Cannot add cache directive", safeMode); 7696 } 7697 effectiveDirective = FSNDNCacheOp.addCacheDirective(this, cacheManager, 7698 directive, flags, logRetryCache); 7699 } finally { 7700 writeUnlock(operationName); 7701 boolean success = effectiveDirective != null; 7702 if (success) { 7703 getEditLog().logSync(); 7704 } 7705 7706 String effectiveDirectiveStr = effectiveDirective != null ? 7707 effectiveDirective.toString() : null; 7708 logAuditEvent(success, operationName, effectiveDirectiveStr, 7709 null, null); 7710 } 7711 return effectiveDirective != null ? effectiveDirective.getId() : 0; 7712 } 7713 7714 void modifyCacheDirective(CacheDirectiveInfo directive, 7715 EnumSet<CacheFlag> flags, boolean logRetryCache) throws IOException { 7716 final String operationName = "modifyCacheDirective"; 7717 boolean success = false; 7718 if (!flags.contains(CacheFlag.FORCE)) { 7719 cacheManager.waitForRescanIfNeeded(); 7720 } 7721 writeLock(); 7722 try { 7723 checkOperation(OperationCategory.WRITE); 7724 if (isInSafeMode()) { 7725 throw new SafeModeException( 7726 "Cannot add cache directive", safeMode); 7727 } 7728 FSNDNCacheOp.modifyCacheDirective(this, cacheManager, directive, flags, 7729 logRetryCache); 7730 success = true; 7731 } finally { 7732 writeUnlock(operationName); 7733 if (success) { 7734 getEditLog().logSync(); 7735 } 7736 String idStr = "{id: " + directive.getId().toString() + "}"; 7737 logAuditEvent(success, "modifyCacheDirective", idStr, 7738 directive.toString(), null); 7739 } 7740 } 7741 7742 void removeCacheDirective(long id, boolean logRetryCache) throws IOException { 7743 final String operationName = "removeCacheDirective"; 7744 boolean success = false; 7745 writeLock(); 7746 try { 7747 checkOperation(OperationCategory.WRITE); 7748 if (isInSafeMode()) { 7749 throw new SafeModeException( 7750 "Cannot remove cache directives", safeMode); 7751 } 7752 FSNDNCacheOp.removeCacheDirective(this, cacheManager, id, logRetryCache); 7753 success = true; 7754 } finally { 7755 writeUnlock(operationName); 7756 String idStr = "{id: " + Long.toString(id) + "}"; 7757 logAuditEvent(success, operationName, idStr, null, 7758 null); 7759 } 7760 getEditLog().logSync(); 7761 } 7762 7763 BatchedListEntries<CacheDirectiveEntry> listCacheDirectives( 7764 long startId, CacheDirectiveInfo filter) throws IOException { 7765 final String operationName = "listCacheDirectives"; 7766 checkOperation(OperationCategory.READ); 7767 BatchedListEntries<CacheDirectiveEntry> results; 7768 cacheManager.waitForRescanIfNeeded(); 7769 readLock(); 7770 boolean success = false; 7771 try { 7772 checkOperation(OperationCategory.READ); 7773 results = FSNDNCacheOp.listCacheDirectives(this, cacheManager, startId, 7774 filter); 7775 success = true; 7776 } finally { 7777 readUnlock(operationName); 7778 logAuditEvent(success, operationName, filter.toString(), null, 7779 null); 7780 } 7781 return results; 7782 } 7783 7784 void addCachePool(CachePoolInfo req, boolean logRetryCache) 7785 throws IOException { 7786 final String operationName = "addCachePool"; 7787 writeLock(); 7788 boolean success = false; 7789 String poolInfoStr = null; 7790 try { 7791 checkOperation(OperationCategory.WRITE); 7792 if (isInSafeMode()) { 7793 throw new SafeModeException( 7794 "Cannot add cache pool " + req.getPoolName(), safeMode); 7795 } 7796 CachePoolInfo info = FSNDNCacheOp.addCachePool(this, cacheManager, req, 7797 logRetryCache); 7798 poolInfoStr = info.toString(); 7799 success = true; 7800 } finally { 7801 writeUnlock(operationName); 7802 logAuditEvent(success, operationName, poolInfoStr, null, null); 7803 } 7804 7805 getEditLog().logSync(); 7806 } 7807 7808 void modifyCachePool(CachePoolInfo req, boolean logRetryCache) 7809 throws IOException { 7810 final String operationName = "modifyCachePool"; 7811 writeLock(); 7812 boolean success = false; 7813 try { 7814 checkOperation(OperationCategory.WRITE); 7815 if (isInSafeMode()) { 7816 throw new SafeModeException( 7817 "Cannot modify cache pool " + req.getPoolName(), safeMode); 7818 } 7819 FSNDNCacheOp.modifyCachePool(this, cacheManager, req, logRetryCache); 7820 success = true; 7821 } finally { 7822 writeUnlock(operationName); 7823 String poolNameStr = "{poolName: " + 7824 (req == null ? null : req.getPoolName()) + "}"; 7825 logAuditEvent(success, operationName, poolNameStr, 7826 req == null ? null : req.toString(), null); 7827 } 7828 7829 getEditLog().logSync(); 7830 } 7831 7832 void removeCachePool(String cachePoolName, boolean logRetryCache) 7833 throws IOException { 7834 final String operationName = "removeCachePool"; 7835 writeLock(); 7836 boolean success = false; 7837 try { 7838 checkOperation(OperationCategory.WRITE); 7839 if (isInSafeMode()) { 7840 throw new SafeModeException( 7841 "Cannot remove cache pool " + cachePoolName, safeMode); 7842 } 7843 FSNDNCacheOp.removeCachePool(this, cacheManager, cachePoolName, 7844 logRetryCache); 7845 success = true; 7846 } finally { 7847 writeUnlock(operationName); 7848 String poolNameStr = "{poolName: " + cachePoolName + "}"; 7849 logAuditEvent(success, operationName, poolNameStr, null, null); 7850 } 7851 7852 getEditLog().logSync(); 7853 } 7854 7855 BatchedListEntries<CachePoolEntry> listCachePools(String prevKey) 7856 throws IOException { 7857 final String operationName = "listCachePools"; 7858 BatchedListEntries<CachePoolEntry> results; 7859 checkOperation(OperationCategory.READ); 7860 boolean success = false; 7861 cacheManager.waitForRescanIfNeeded(); 7862 readLock(); 7863 try { 7864 checkOperation(OperationCategory.READ); 7865 results = FSNDNCacheOp.listCachePools(this, cacheManager, prevKey); 7866 success = true; 7867 } finally { 7868 readUnlock(operationName); 7869 logAuditEvent(success, operationName, null, null, null); 7870 } 7871 return results; 7872 } 7873 7874 void modifyAclEntries(final String src, List<AclEntry> aclSpec) 7875 throws IOException { 7876 final String operationName = "modifyAclEntries"; 7877 HdfsFileStatus auditStat = null; 7878 checkOperation(OperationCategory.WRITE); 7879 writeLock(); 7880 try { 7881 checkOperation(OperationCategory.WRITE); 7882 checkNameNodeSafeMode("Cannot modify ACL entries on " + src); 7883 auditStat = FSDirAclOp.modifyAclEntries(dir, src, aclSpec); 7884 } catch (AccessControlException e) { 7885 logAuditEvent(false, operationName, src); 7886 throw e; 7887 } finally { 7888 writeUnlock(operationName); 7889 } 7890 getEditLog().logSync(); 7891 logAuditEvent(true, operationName, src, null, auditStat); 7892 } 7893 7894 void removeAclEntries(final String src, List<AclEntry> aclSpec) 7895 throws IOException { 7896 final String operationName = "removeAclEntries"; 7897 checkOperation(OperationCategory.WRITE); 7898 HdfsFileStatus auditStat = null; 7899 writeLock(); 7900 try { 7901 checkOperation(OperationCategory.WRITE); 7902 checkNameNodeSafeMode("Cannot remove ACL entries on " + src); 7903 auditStat = FSDirAclOp.removeAclEntries(dir, src, aclSpec); 7904 } catch (AccessControlException e) { 7905 logAuditEvent(false, operationName, src); 7906 throw e; 7907 } finally { 7908 writeUnlock(operationName); 7909 } 7910 getEditLog().logSync(); 7911 logAuditEvent(true, operationName, src, null, auditStat); 7912 } 7913 7914 void removeDefaultAcl(final String src) throws IOException { 7915 final String operationName = "removeDefaultAcl"; 7916 HdfsFileStatus auditStat = null; 7917 checkOperation(OperationCategory.WRITE); 7918 writeLock(); 7919 try { 7920 checkOperation(OperationCategory.WRITE); 7921 checkNameNodeSafeMode("Cannot remove default ACL entries on " + src); 7922 auditStat = FSDirAclOp.removeDefaultAcl(dir, src); 7923 } catch (AccessControlException e) { 7924 logAuditEvent(false, operationName, src); 7925 throw e; 7926 } finally { 7927 writeUnlock(operationName); 7928 } 7929 getEditLog().logSync(); 7930 logAuditEvent(true, operationName, src, null, auditStat); 7931 } 7932 7933 void removeAcl(final String src) throws IOException { 7934 final String operationName = "removeAcl"; 7935 HdfsFileStatus auditStat = null; 7936 checkOperation(OperationCategory.WRITE); 7937 writeLock(); 7938 try { 7939 checkOperation(OperationCategory.WRITE); 7940 checkNameNodeSafeMode("Cannot remove ACL on " + src); 7941 auditStat = FSDirAclOp.removeAcl(dir, src); 7942 } catch (AccessControlException e) { 7943 logAuditEvent(false, operationName, src); 7944 throw e; 7945 } finally { 7946 writeUnlock(operationName); 7947 } 7948 getEditLog().logSync(); 7949 logAuditEvent(true, operationName, src, null, auditStat); 7950 } 7951 7952 void setAcl(final String src, List<AclEntry> aclSpec) throws IOException { 7953 final String operationName = "setAcl"; 7954 HdfsFileStatus auditStat = null; 7955 checkOperation(OperationCategory.WRITE); 7956 writeLock(); 7957 try { 7958 checkOperation(OperationCategory.WRITE); 7959 checkNameNodeSafeMode("Cannot set ACL on " + src); 7960 auditStat = FSDirAclOp.setAcl(dir, src, aclSpec); 7961 } catch (AccessControlException e) { 7962 logAuditEvent(false, operationName, src); 7963 throw e; 7964 } finally { 7965 writeUnlock(operationName); 7966 } 7967 getEditLog().logSync(); 7968 logAuditEvent(true, operationName, src, null, auditStat); 7969 } 7970 7971 AclStatus getAclStatus(String src) throws IOException { 7972 final String operationName = "getAclStatus"; 7973 checkOperation(OperationCategory.READ); 7974 boolean success = false; 7975 readLock(); 7976 try { 7977 checkOperation(OperationCategory.READ); 7978 final AclStatus ret = FSDirAclOp.getAclStatus(dir, src); 7979 success = true; 7980 return ret; 7981 } finally { 7982 readUnlock(operationName); 7983 logAuditEvent(success, operationName, src); 7984 } 7985 } 7986 7987 /** 7988 * Create an encryption zone on directory src using the specified key. 7989 * 7990 * @param src the path of a directory which will be the root of the 7991 * encryption zone. The directory must be empty. 7992 * @param keyName name of a key which must be present in the configured 7993 * KeyProvider. 7994 * @throws AccessControlException if the caller is not the superuser. 7995 * @throws UnresolvedLinkException if the path can't be resolved. 7996 * @throws SafeModeException if the Namenode is in safe mode. 7997 */ 7998 void createEncryptionZone(final String src, final String keyName, 7999 boolean logRetryCache) 8000 throws IOException, UnresolvedLinkException, 8001 SafeModeException, AccessControlException { 8002 try { 8003 if (provider == null) { 8004 throw new IOException( 8005 "Can't create an encryption zone for " + src + 8006 " since no key provider is available."); 8007 } 8008 if (keyName == null || keyName.isEmpty()) { 8009 throw new IOException("Must specify a key name when creating an " + 8010 "encryption zone"); 8011 } 8012 KeyProvider.Metadata metadata = provider.getMetadata(keyName); 8013 if (metadata == null) { 8014 /* 8015 * It would be nice if we threw something more specific than 8016 * IOException when the key is not found, but the KeyProvider API 8017 * doesn't provide for that. If that API is ever changed to throw 8018 * something more specific (e.g. UnknownKeyException) then we can 8019 * update this to match it, or better yet, just rethrow the 8020 * KeyProvider's exception. 8021 */ 8022 throw new IOException("Key " + keyName + " doesn't exist."); 8023 } 8024 // If the provider supports pool for EDEKs, this will fill in the pool 8025 generateEncryptedDataEncryptionKey(keyName); 8026 createEncryptionZoneInt(src, metadata.getCipher(), 8027 keyName, logRetryCache); 8028 } catch (AccessControlException e) { 8029 logAuditEvent(false, "createEncryptionZone", src); 8030 throw e; 8031 } 8032 } 8033 8034 private void createEncryptionZoneInt(final String srcArg, String cipher, 8035 String keyName, final boolean logRetryCache) throws IOException { 8036 final String operationName = "createEncryptionZone"; 8037 String src = srcArg; 8038 HdfsFileStatus resultingStat = null; 8039 checkSuperuserPrivilege(); 8040 FSPermissionChecker pc = getPermissionChecker(); 8041 writeLock(); 8042 try { 8043 checkSuperuserPrivilege(); 8044 checkOperation(OperationCategory.WRITE); 8045 checkNameNodeSafeMode("Cannot create encryption zone on " + src); 8046 final INodesInPath iip = dir.resolvePathForWrite(pc, src); 8047 src = iip.getPath(); 8048 8049 final CipherSuite suite = CipherSuite.convert(cipher); 8050 // For now this is hardcoded, as we only support one method. 8051 final CryptoProtocolVersion version = 8052 CryptoProtocolVersion.ENCRYPTION_ZONES; 8053 final XAttr ezXAttr = dir.createEncryptionZone(src, suite, 8054 version, keyName); 8055 List<XAttr> xAttrs = Lists.newArrayListWithCapacity(1); 8056 xAttrs.add(ezXAttr); 8057 getEditLog().logSetXAttrs(src, xAttrs, logRetryCache); 8058 resultingStat = dir.getAuditFileInfo(iip); 8059 } finally { 8060 writeUnlock(operationName); 8061 } 8062 getEditLog().logSync(); 8063 logAuditEvent(true, operationName, srcArg, null, resultingStat); 8064 } 8065 8066 /** 8067 * Get the encryption zone for the specified path. 8068 * 8069 * @param srcArg the path of a file or directory to get the EZ for. 8070 * @return the EZ of the of the path or null if none. 8071 * @throws AccessControlException if the caller is not the superuser. 8072 * @throws UnresolvedLinkException if the path can't be resolved. 8073 */ 8074 EncryptionZone getEZForPath(final String srcArg) 8075 throws AccessControlException, UnresolvedLinkException, IOException { 8076 String src = srcArg; 8077 final String operationName = "getEZForPath"; 8078 HdfsFileStatus resultingStat = null; 8079 boolean success = false; 8080 final FSPermissionChecker pc = getPermissionChecker(); 8081 checkOperation(OperationCategory.READ); 8082 readLock(); 8083 try { 8084 checkOperation(OperationCategory.READ); 8085 INodesInPath iip = dir.resolvePath(pc, src); 8086 if (isPermissionEnabled) { 8087 dir.checkPathAccess(pc, iip, FsAction.READ); 8088 } 8089 final EncryptionZone ret = dir.getEZForPath(iip); 8090 resultingStat = dir.getAuditFileInfo(iip); 8091 success = true; 8092 return ret; 8093 } finally { 8094 readUnlock(operationName); 8095 logAuditEvent(success, operationName, srcArg, null, resultingStat); 8096 } 8097 } 8098 8099 BatchedListEntries<EncryptionZone> listEncryptionZones(long prevId) 8100 throws IOException { 8101 final String operationName = "listEncryptionZones"; 8102 boolean success = false; 8103 checkSuperuserPrivilege(); 8104 checkOperation(OperationCategory.READ); 8105 readLock(); 8106 try { 8107 checkSuperuserPrivilege(); 8108 checkOperation(OperationCategory.READ); 8109 final BatchedListEntries<EncryptionZone> ret = 8110 dir.listEncryptionZones(prevId); 8111 success = true; 8112 return ret; 8113 } finally { 8114 readUnlock(operationName); 8115 logAuditEvent(success, operationName, null); 8116 } 8117 } 8118 8119 void setXAttr(String src, XAttr xAttr, EnumSet<XAttrSetFlag> flag, 8120 boolean logRetryCache) 8121 throws IOException { 8122 final String operationName = "setXAttr"; 8123 HdfsFileStatus auditStat = null; 8124 writeLock(); 8125 try { 8126 checkOperation(OperationCategory.WRITE); 8127 checkNameNodeSafeMode("Cannot set XAttr on " + src); 8128 auditStat = FSDirXAttrOp.setXAttr(dir, src, xAttr, flag, logRetryCache); 8129 } catch (AccessControlException e) { 8130 logAuditEvent(false, operationName, src); 8131 throw e; 8132 } finally { 8133 writeUnlock(operationName); 8134 } 8135 getEditLog().logSync(); 8136 logAuditEvent(true, operationName, src, null, auditStat); 8137 } 8138 8139 List<XAttr> getXAttrs(final String src, List<XAttr> xAttrs) 8140 throws IOException { 8141 final String operationName = "getXAttrs"; 8142 checkOperation(OperationCategory.READ); 8143 readLock(); 8144 try { 8145 checkOperation(OperationCategory.READ); 8146 return FSDirXAttrOp.getXAttrs(dir, src, xAttrs); 8147 } catch (AccessControlException e) { 8148 logAuditEvent(false, operationName, src); 8149 throw e; 8150 } finally { 8151 readUnlock(operationName); 8152 } 8153 } 8154 8155 List<XAttr> listXAttrs(String src) throws IOException { 8156 final String operationName = "listXAttrs"; 8157 checkOperation(OperationCategory.READ); 8158 readLock(); 8159 try { 8160 checkOperation(OperationCategory.READ); 8161 return FSDirXAttrOp.listXAttrs(dir, src); 8162 } catch (AccessControlException e) { 8163 logAuditEvent(false, operationName, src); 8164 throw e; 8165 } finally { 8166 readUnlock(operationName); 8167 } 8168 } 8169 8170 void removeXAttr(String src, XAttr xAttr, boolean logRetryCache) 8171 throws IOException { 8172 final String operationName = "removeXAttr"; 8173 HdfsFileStatus auditStat = null; 8174 writeLock(); 8175 try { 8176 checkOperation(OperationCategory.WRITE); 8177 checkNameNodeSafeMode("Cannot remove XAttr entry on " + src); 8178 auditStat = FSDirXAttrOp.removeXAttr(dir, src, xAttr, logRetryCache); 8179 } catch (AccessControlException e) { 8180 logAuditEvent(false, operationName, src); 8181 throw e; 8182 } finally { 8183 writeUnlock(operationName); 8184 } 8185 getEditLog().logSync(); 8186 logAuditEvent(true, operationName, src, null, auditStat); 8187 } 8188 8189 void checkAccess(String src, FsAction mode) throws IOException { 8190 final String operationName = "checkAccess"; 8191 checkOperation(OperationCategory.READ); 8192 FSPermissionChecker pc = getPermissionChecker(); 8193 readLock(); 8194 try { 8195 checkOperation(OperationCategory.READ); 8196 final INodesInPath iip = dir.resolvePath(pc, src); 8197 src = iip.getPath(); 8198 INode inode = iip.getLastINode(); 8199 if (inode == null) { 8200 throw new FileNotFoundException("Path not found"); 8201 } 8202 if (isPermissionEnabled) { 8203 dir.checkPathAccess(pc, iip, mode); 8204 } 8205 } catch (AccessControlException e) { 8206 logAuditEvent(false, operationName, src); 8207 throw e; 8208 } finally { 8209 readUnlock(operationName); 8210 } 8211 } 8212 8213 /** 8214 * Default AuditLogger implementation; used when no access logger is 8215 * defined in the config file. It can also be explicitly listed in the 8216 * config file. 8217 */ 8218 private static class DefaultAuditLogger extends HdfsAuditLogger { 8219 8220 private boolean logTokenTrackingId; 8221 8222 @Override 8223 public void initialize(Configuration conf) { 8224 logTokenTrackingId = conf.getBoolean( 8225 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY, 8226 DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT); 8227 } 8228 8229 @Override 8230 public void logAuditEvent(boolean succeeded, String userName, 8231 InetAddress addr, String cmd, String src, String dst, 8232 FileStatus status, UserGroupInformation ugi, 8233 DelegationTokenSecretManager dtSecretManager) { 8234 if (auditLog.isInfoEnabled()) { 8235 final StringBuilder sb = auditBuffer.get(); 8236 sb.setLength(0); 8237 sb.append("allowed=").append(succeeded).append("\t"); 8238 sb.append("ugi=").append(userName).append("\t"); 8239 sb.append("ip=").append(addr).append("\t"); 8240 sb.append("cmd=").append(cmd).append("\t"); 8241 sb.append("src=").append(src).append("\t"); 8242 sb.append("dst=").append(dst).append("\t"); 8243 if (null == status) { 8244 sb.append("perm=null"); 8245 } else { 8246 sb.append("perm="); 8247 sb.append(status.getOwner()).append(":"); 8248 sb.append(status.getGroup()).append(":"); 8249 sb.append(status.getPermission()); 8250 } 8251 if (logTokenTrackingId) { 8252 sb.append("\t").append("trackingId="); 8253 String trackingId = null; 8254 if (ugi != null && dtSecretManager != null 8255 && ugi.getAuthenticationMethod() == AuthenticationMethod.TOKEN) { 8256 for (TokenIdentifier tid: ugi.getTokenIdentifiers()) { 8257 if (tid instanceof DelegationTokenIdentifier) { 8258 DelegationTokenIdentifier dtid = 8259 (DelegationTokenIdentifier)tid; 8260 trackingId = dtSecretManager.getTokenTrackingId(dtid); 8261 break; 8262 } 8263 } 8264 } 8265 sb.append(trackingId); 8266 } 8267 sb.append("\t").append("proto="); 8268 sb.append(NamenodeWebHdfsMethods.isWebHdfsInvocation() ? "webhdfs" : "rpc"); 8269 logAuditMessage(sb.toString()); 8270 } 8271 } 8272 8273 public void logAuditMessage(String message) { 8274 auditLog.info(message); 8275 } 8276 } 8277 8278 private static void enableAsyncAuditLog() { 8279 if (!(auditLog instanceof Log4JLogger)) { 8280 LOG.warn("Log4j is required to enable async auditlog"); 8281 return; 8282 } 8283 Logger logger = ((Log4JLogger)auditLog).getLogger(); 8284 @SuppressWarnings("unchecked") 8285 List<Appender> appenders = Collections.list(logger.getAllAppenders()); 8286 // failsafe against trying to async it more than once 8287 if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) { 8288 AsyncAppender asyncAppender = new AsyncAppender(); 8289 // change logger to have an async appender containing all the 8290 // previously configured appenders 8291 for (Appender appender : appenders) { 8292 logger.removeAppender(appender); 8293 asyncAppender.addAppender(appender); 8294 } 8295 logger.addAppender(asyncAppender); 8296 } 8297 } 8298 8299} 8300