001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.namenode; 019 020import com.google.common.annotations.VisibleForTesting; 021import com.google.common.base.Joiner; 022import com.google.common.base.Preconditions; 023import com.google.common.collect.Lists; 024import org.apache.hadoop.HadoopIllegalArgumentException; 025import org.apache.hadoop.classification.InterfaceAudience; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.fs.FileSystem; 028import org.apache.hadoop.fs.Trash; 029import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; 030import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; 031import org.apache.hadoop.ha.HAServiceStatus; 032import org.apache.hadoop.ha.HealthCheckFailedException; 033import org.apache.hadoop.ha.ServiceFailedException; 034import org.apache.hadoop.hdfs.DFSConfigKeys; 035import org.apache.hadoop.hdfs.DFSUtil; 036import org.apache.hadoop.hdfs.HAUtil; 037import org.apache.hadoop.hdfs.HdfsConfiguration; 038import org.apache.hadoop.hdfs.protocol.ClientProtocol; 039import org.apache.hadoop.hdfs.protocol.HdfsConstants; 040import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; 041import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption; 042import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; 043import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; 044import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState; 045import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby; 046import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; 047import org.apache.hadoop.hdfs.server.namenode.ha.HAState; 048import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState; 049import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; 050import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; 051import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics; 052import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; 053import org.apache.hadoop.hdfs.server.protocol.JournalProtocol; 054import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; 055import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; 056import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; 057import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 058import org.apache.hadoop.ipc.RefreshCallQueueProtocol; 059import org.apache.hadoop.ipc.Server; 060import org.apache.hadoop.ipc.StandbyException; 061import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; 062import org.apache.hadoop.metrics2.util.MBeans; 063import org.apache.hadoop.net.NetUtils; 064import org.apache.hadoop.security.AccessControlException; 065import org.apache.hadoop.security.RefreshUserMappingsProtocol; 066import org.apache.hadoop.security.SecurityUtil; 067import org.apache.hadoop.security.UserGroupInformation; 068import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; 069import org.apache.hadoop.tools.GetUserMappingsProtocol; 070import org.apache.hadoop.tracing.SpanReceiverHost; 071import org.apache.hadoop.tracing.TraceAdminProtocol; 072import org.apache.hadoop.util.ExitUtil.ExitException; 073import org.apache.hadoop.util.GenericOptionsParser; 074import org.apache.hadoop.util.JvmPauseMonitor; 075import org.apache.hadoop.util.ServicePlugin; 076import org.apache.hadoop.util.StringUtils; 077import org.apache.log4j.LogManager; 078import org.slf4j.Logger; 079import org.slf4j.LoggerFactory; 080 081import javax.management.ObjectName; 082 083import java.io.IOException; 084import java.io.PrintStream; 085import java.net.InetSocketAddress; 086import java.net.URI; 087import java.security.PrivilegedExceptionAction; 088import java.util.ArrayList; 089import java.util.Arrays; 090import java.util.Collection; 091import java.util.List; 092import java.util.concurrent.atomic.AtomicBoolean; 093 094import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; 095import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; 096import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; 097import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT; 098import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_AUTO_FAILOVER_ENABLED_KEY; 099import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY; 100import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY; 101import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_ZKFC_PORT_KEY; 102import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY; 103import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY; 104import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY; 105import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY; 106import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY; 107import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY; 108import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY; 109import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY; 110import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTPS_BIND_HOST_KEY; 111import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_DEFAULT; 112import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY; 113import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_BIND_HOST_KEY; 114import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY; 115import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY; 116import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY; 117import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; 118import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PLUGINS_KEY; 119import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY; 120import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_BIND_HOST_KEY; 121import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY; 122import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY; 123import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY; 124import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY; 125import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; 126import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_STARTUP_KEY; 127import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT; 128import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY; 129import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICE_ID; 130import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY; 131import static org.apache.hadoop.hdfs.DFSConfigKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS; 132import static org.apache.hadoop.util.ExitUtil.terminate; 133import static org.apache.hadoop.util.ToolRunner.confirmPrompt; 134 135/********************************************************** 136 * NameNode serves as both directory namespace manager and 137 * "inode table" for the Hadoop DFS. There is a single NameNode 138 * running in any DFS deployment. (Well, except when there 139 * is a second backup/failover NameNode, or when using federated NameNodes.) 140 * 141 * The NameNode controls two critical tables: 142 * 1) filename->blocksequence (namespace) 143 * 2) block->machinelist ("inodes") 144 * 145 * The first table is stored on disk and is very precious. 146 * The second table is rebuilt every time the NameNode comes up. 147 * 148 * 'NameNode' refers to both this class as well as the 'NameNode server'. 149 * The 'FSNamesystem' class actually performs most of the filesystem 150 * management. The majority of the 'NameNode' class itself is concerned 151 * with exposing the IPC interface and the HTTP server to the outside world, 152 * plus some configuration management. 153 * 154 * NameNode implements the 155 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} interface, which 156 * allows clients to ask for DFS services. 157 * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol} is not designed for 158 * direct use by authors of DFS client code. End-users should instead use the 159 * {@link org.apache.hadoop.fs.FileSystem} class. 160 * 161 * NameNode also implements the 162 * {@link org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol} interface, 163 * used by DataNodes that actually store DFS data blocks. These 164 * methods are invoked repeatedly and automatically by all the 165 * DataNodes in a DFS deployment. 166 * 167 * NameNode also implements the 168 * {@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol} interface, 169 * used by secondary namenodes or rebalancing processes to get partial 170 * NameNode state, for example partial blocksMap etc. 171 **********************************************************/ 172@InterfaceAudience.Private 173public class NameNode implements NameNodeStatusMXBean { 174 static{ 175 HdfsConfiguration.init(); 176 } 177 178 /** 179 * Categories of operations supported by the namenode. 180 */ 181 public static enum OperationCategory { 182 /** Operations that are state agnostic */ 183 UNCHECKED, 184 /** Read operation that does not change the namespace state */ 185 READ, 186 /** Write operation that changes the namespace state */ 187 WRITE, 188 /** Operations related to checkpointing */ 189 CHECKPOINT, 190 /** Operations related to {@link JournalProtocol} */ 191 JOURNAL 192 } 193 194 /** 195 * HDFS configuration can have three types of parameters: 196 * <ol> 197 * <li>Parameters that are common for all the name services in the cluster.</li> 198 * <li>Parameters that are specific to a name service. These keys are suffixed 199 * with nameserviceId in the configuration. For example, 200 * "dfs.namenode.rpc-address.nameservice1".</li> 201 * <li>Parameters that are specific to a single name node. These keys are suffixed 202 * with nameserviceId and namenodeId in the configuration. for example, 203 * "dfs.namenode.rpc-address.nameservice1.namenode1"</li> 204 * </ol> 205 * 206 * In the latter cases, operators may specify the configuration without 207 * any suffix, with a nameservice suffix, or with a nameservice and namenode 208 * suffix. The more specific suffix will take precedence. 209 * 210 * These keys are specific to a given namenode, and thus may be configured 211 * globally, for a nameservice, or for a specific namenode within a nameservice. 212 */ 213 public static final String[] NAMENODE_SPECIFIC_KEYS = { 214 DFS_NAMENODE_RPC_ADDRESS_KEY, 215 DFS_NAMENODE_RPC_BIND_HOST_KEY, 216 DFS_NAMENODE_NAME_DIR_KEY, 217 DFS_NAMENODE_EDITS_DIR_KEY, 218 DFS_NAMENODE_SHARED_EDITS_DIR_KEY, 219 DFS_NAMENODE_CHECKPOINT_DIR_KEY, 220 DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY, 221 DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, 222 DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY, 223 DFS_NAMENODE_HTTP_ADDRESS_KEY, 224 DFS_NAMENODE_HTTPS_ADDRESS_KEY, 225 DFS_NAMENODE_HTTP_BIND_HOST_KEY, 226 DFS_NAMENODE_HTTPS_BIND_HOST_KEY, 227 DFS_NAMENODE_KEYTAB_FILE_KEY, 228 DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, 229 DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY, 230 DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY, 231 DFS_NAMENODE_BACKUP_ADDRESS_KEY, 232 DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY, 233 DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY, 234 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, 235 DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY, 236 DFS_HA_FENCE_METHODS_KEY, 237 DFS_HA_ZKFC_PORT_KEY, 238 DFS_HA_FENCE_METHODS_KEY 239 }; 240 241 /** 242 * @see #NAMENODE_SPECIFIC_KEYS 243 * These keys are specific to a nameservice, but may not be overridden 244 * for a specific namenode. 245 */ 246 public static final String[] NAMESERVICE_SPECIFIC_KEYS = { 247 DFS_HA_AUTO_FAILOVER_ENABLED_KEY 248 }; 249 250 private static final String USAGE = "Usage: java NameNode [" 251 + StartupOption.BACKUP.getName() + "] | \n\t[" 252 + StartupOption.CHECKPOINT.getName() + "] | \n\t[" 253 + StartupOption.FORMAT.getName() + " [" 254 + StartupOption.CLUSTERID.getName() + " cid ] [" 255 + StartupOption.FORCE.getName() + "] [" 256 + StartupOption.NONINTERACTIVE.getName() + "] ] | \n\t[" 257 + StartupOption.UPGRADE.getName() + 258 " [" + StartupOption.CLUSTERID.getName() + " cid]" + 259 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t[" 260 + StartupOption.UPGRADEONLY.getName() + 261 " [" + StartupOption.CLUSTERID.getName() + " cid]" + 262 " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | \n\t[" 263 + StartupOption.ROLLBACK.getName() + "] | \n\t[" 264 + StartupOption.ROLLINGUPGRADE.getName() + " " 265 + RollingUpgradeStartupOption.getAllOptionString() + " ] | \n\t[" 266 + StartupOption.FINALIZE.getName() + "] | \n\t[" 267 + StartupOption.IMPORT.getName() + "] | \n\t[" 268 + StartupOption.INITIALIZESHAREDEDITS.getName() + "] | \n\t[" 269 + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | \n\t[" 270 + StartupOption.RECOVER.getName() + " [ " 271 + StartupOption.FORCE.getName() + "] ] | \n\t[" 272 + StartupOption.METADATAVERSION.getName() + " ] " 273 + " ]"; 274 275 276 public long getProtocolVersion(String protocol, 277 long clientVersion) throws IOException { 278 if (protocol.equals(ClientProtocol.class.getName())) { 279 return ClientProtocol.versionID; 280 } else if (protocol.equals(DatanodeProtocol.class.getName())){ 281 return DatanodeProtocol.versionID; 282 } else if (protocol.equals(NamenodeProtocol.class.getName())){ 283 return NamenodeProtocol.versionID; 284 } else if (protocol.equals(RefreshAuthorizationPolicyProtocol.class.getName())){ 285 return RefreshAuthorizationPolicyProtocol.versionID; 286 } else if (protocol.equals(RefreshUserMappingsProtocol.class.getName())){ 287 return RefreshUserMappingsProtocol.versionID; 288 } else if (protocol.equals(RefreshCallQueueProtocol.class.getName())) { 289 return RefreshCallQueueProtocol.versionID; 290 } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){ 291 return GetUserMappingsProtocol.versionID; 292 } else if (protocol.equals(TraceAdminProtocol.class.getName())){ 293 return TraceAdminProtocol.versionID; 294 } else { 295 throw new IOException("Unknown protocol to name node: " + protocol); 296 } 297 } 298 299 public static final int DEFAULT_PORT = 8020; 300 public static final Logger LOG = 301 LoggerFactory.getLogger(NameNode.class.getName()); 302 public static final Logger stateChangeLog = 303 LoggerFactory.getLogger("org.apache.hadoop.hdfs.StateChange"); 304 public static final Logger blockStateChangeLog = 305 LoggerFactory.getLogger("BlockStateChange"); 306 public static final HAState ACTIVE_STATE = new ActiveState(); 307 public static final HAState STANDBY_STATE = new StandbyState(); 308 309 protected FSNamesystem namesystem; 310 protected final Configuration conf; 311 protected final NamenodeRole role; 312 private volatile HAState state; 313 private final boolean haEnabled; 314 private final HAContext haContext; 315 protected final boolean allowStaleStandbyReads; 316 private AtomicBoolean started = new AtomicBoolean(false); 317 318 319 /** httpServer */ 320 protected NameNodeHttpServer httpServer; 321 private Thread emptier; 322 /** only used for testing purposes */ 323 protected boolean stopRequested = false; 324 /** Registration information of this name-node */ 325 protected NamenodeRegistration nodeRegistration; 326 /** Activated plug-ins. */ 327 private List<ServicePlugin> plugins; 328 329 private NameNodeRpcServer rpcServer; 330 331 private JvmPauseMonitor pauseMonitor; 332 private ObjectName nameNodeStatusBeanName; 333 SpanReceiverHost spanReceiverHost; 334 /** 335 * The namenode address that clients will use to access this namenode 336 * or the name service. For HA configurations using logical URI, it 337 * will be the logical address. 338 */ 339 private String clientNamenodeAddress; 340 341 /** Format a new filesystem. Destroys any filesystem that may already 342 * exist at this location. **/ 343 public static void format(Configuration conf) throws IOException { 344 format(conf, true, true); 345 } 346 347 static NameNodeMetrics metrics; 348 private static final StartupProgress startupProgress = new StartupProgress(); 349 /** Return the {@link FSNamesystem} object. 350 * @return {@link FSNamesystem} object. 351 */ 352 public FSNamesystem getNamesystem() { 353 return namesystem; 354 } 355 356 public NamenodeProtocols getRpcServer() { 357 return rpcServer; 358 } 359 360 static void initMetrics(Configuration conf, NamenodeRole role) { 361 metrics = NameNodeMetrics.create(conf, role); 362 } 363 364 public static NameNodeMetrics getNameNodeMetrics() { 365 return metrics; 366 } 367 368 /** 369 * Returns object used for reporting namenode startup progress. 370 * 371 * @return StartupProgress for reporting namenode startup progress 372 */ 373 public static StartupProgress getStartupProgress() { 374 return startupProgress; 375 } 376 377 /** 378 * Return the service name of the issued delegation token. 379 * 380 * @return The name service id in HA-mode, or the rpc address in non-HA mode 381 */ 382 public String getTokenServiceName() { 383 return getClientNamenodeAddress(); 384 } 385 386 /** 387 * Set the namenode address that will be used by clients to access this 388 * namenode or name service. This needs to be called before the config 389 * is overriden. 390 */ 391 public void setClientNamenodeAddress(Configuration conf) { 392 String nnAddr = conf.get(FS_DEFAULT_NAME_KEY); 393 if (nnAddr == null) { 394 // default fs is not set. 395 clientNamenodeAddress = null; 396 return; 397 } 398 399 LOG.info("{} is {}", FS_DEFAULT_NAME_KEY, nnAddr); 400 URI nnUri = URI.create(nnAddr); 401 402 String nnHost = nnUri.getHost(); 403 if (nnHost == null) { 404 clientNamenodeAddress = null; 405 return; 406 } 407 408 if (DFSUtil.getNameServiceIds(conf).contains(nnHost)) { 409 // host name is logical 410 clientNamenodeAddress = nnHost; 411 } else if (nnUri.getPort() > 0) { 412 // physical address with a valid port 413 clientNamenodeAddress = nnUri.getAuthority(); 414 } else { 415 // the port is missing or 0. Figure out real bind address later. 416 clientNamenodeAddress = null; 417 return; 418 } 419 LOG.info("Clients are to use {} to access" 420 + " this namenode/service.", clientNamenodeAddress ); 421 } 422 423 /** 424 * Get the namenode address to be used by clients. 425 * @return nn address 426 */ 427 public String getClientNamenodeAddress() { 428 return clientNamenodeAddress; 429 } 430 431 public static InetSocketAddress getAddress(String address) { 432 return NetUtils.createSocketAddr(address, DEFAULT_PORT); 433 } 434 435 /** 436 * Set the configuration property for the service rpc address 437 * to address 438 */ 439 public static void setServiceAddress(Configuration conf, 440 String address) { 441 LOG.info("Setting ADDRESS {}", address); 442 conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, address); 443 } 444 445 /** 446 * Fetches the address for services to use when connecting to namenode 447 * based on the value of fallback returns null if the special 448 * address is not specified or returns the default namenode address 449 * to be used by both clients and services. 450 * Services here are datanodes, backup node, any non client connection 451 */ 452 public static InetSocketAddress getServiceAddress(Configuration conf, 453 boolean fallback) { 454 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY); 455 if (addr == null || addr.isEmpty()) { 456 return fallback ? getAddress(conf) : null; 457 } 458 return getAddress(addr); 459 } 460 461 public static InetSocketAddress getAddress(Configuration conf) { 462 URI filesystemURI = FileSystem.getDefaultUri(conf); 463 return getAddress(filesystemURI); 464 } 465 466 467 /** 468 * @return address of file system 469 */ 470 public static InetSocketAddress getAddress(URI filesystemURI) { 471 String authority = filesystemURI.getAuthority(); 472 if (authority == null) { 473 throw new IllegalArgumentException(String.format( 474 "Invalid URI for NameNode address (check %s): %s has no authority.", 475 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString())); 476 } 477 if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase( 478 filesystemURI.getScheme())) { 479 throw new IllegalArgumentException(String.format( 480 "Invalid URI for NameNode address (check %s): %s is not of scheme '%s'.", 481 FileSystem.FS_DEFAULT_NAME_KEY, filesystemURI.toString(), 482 HdfsConstants.HDFS_URI_SCHEME)); 483 } 484 return getAddress(authority); 485 } 486 487 public static URI getUri(InetSocketAddress namenode) { 488 int port = namenode.getPort(); 489 String portString = port == DEFAULT_PORT ? "" : (":"+port); 490 return URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 491 + namenode.getHostName()+portString); 492 } 493 494 // 495 // Common NameNode methods implementation for the active name-node role. 496 // 497 public NamenodeRole getRole() { 498 return role; 499 } 500 501 boolean isRole(NamenodeRole that) { 502 return role.equals(that); 503 } 504 505 /** 506 * Given a configuration get the address of the service rpc server 507 * If the service rpc is not configured returns null 508 */ 509 protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) { 510 return NameNode.getServiceAddress(conf, false); 511 } 512 513 protected InetSocketAddress getRpcServerAddress(Configuration conf) { 514 return getAddress(conf); 515 } 516 517 /** Given a configuration get the bind host of the service rpc server 518 * If the bind host is not configured returns null. 519 */ 520 protected String getServiceRpcServerBindHost(Configuration conf) { 521 String addr = conf.getTrimmed(DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY); 522 if (addr == null || addr.isEmpty()) { 523 return null; 524 } 525 return addr; 526 } 527 528 /** Given a configuration get the bind host of the client rpc server 529 * If the bind host is not configured returns null. 530 */ 531 protected String getRpcServerBindHost(Configuration conf) { 532 String addr = conf.getTrimmed(DFS_NAMENODE_RPC_BIND_HOST_KEY); 533 if (addr == null || addr.isEmpty()) { 534 return null; 535 } 536 return addr; 537 } 538 539 /** 540 * Modifies the configuration passed to contain the service rpc address setting 541 */ 542 protected void setRpcServiceServerAddress(Configuration conf, 543 InetSocketAddress serviceRPCAddress) { 544 setServiceAddress(conf, NetUtils.getHostPortString(serviceRPCAddress)); 545 } 546 547 protected void setRpcServerAddress(Configuration conf, 548 InetSocketAddress rpcAddress) { 549 FileSystem.setDefaultUri(conf, getUri(rpcAddress)); 550 } 551 552 protected InetSocketAddress getHttpServerAddress(Configuration conf) { 553 return getHttpAddress(conf); 554 } 555 556 /** 557 * HTTP server address for binding the endpoint. This method is 558 * for use by the NameNode and its derivatives. It may return 559 * a different address than the one that should be used by clients to 560 * connect to the NameNode. See 561 * {@link DFSConfigKeys#DFS_NAMENODE_HTTP_BIND_HOST_KEY} 562 * 563 * @param conf 564 * @return 565 */ 566 protected InetSocketAddress getHttpServerBindAddress(Configuration conf) { 567 InetSocketAddress bindAddress = getHttpServerAddress(conf); 568 569 // If DFS_NAMENODE_HTTP_BIND_HOST_KEY exists then it overrides the 570 // host name portion of DFS_NAMENODE_HTTP_ADDRESS_KEY. 571 final String bindHost = conf.getTrimmed(DFS_NAMENODE_HTTP_BIND_HOST_KEY); 572 if (bindHost != null && !bindHost.isEmpty()) { 573 bindAddress = new InetSocketAddress(bindHost, bindAddress.getPort()); 574 } 575 576 return bindAddress; 577 } 578 579 /** @return the NameNode HTTP address. */ 580 public static InetSocketAddress getHttpAddress(Configuration conf) { 581 return NetUtils.createSocketAddr( 582 conf.getTrimmed(DFS_NAMENODE_HTTP_ADDRESS_KEY, DFS_NAMENODE_HTTP_ADDRESS_DEFAULT)); 583 } 584 585 protected void loadNamesystem(Configuration conf) throws IOException { 586 this.namesystem = FSNamesystem.loadFromDisk(conf); 587 } 588 589 NamenodeRegistration getRegistration() { 590 return nodeRegistration; 591 } 592 593 NamenodeRegistration setRegistration() { 594 nodeRegistration = new NamenodeRegistration( 595 NetUtils.getHostPortString(rpcServer.getRpcAddress()), 596 NetUtils.getHostPortString(getHttpAddress()), 597 getFSImage().getStorage(), getRole()); 598 return nodeRegistration; 599 } 600 601 /* optimize ugi lookup for RPC operations to avoid a trip through 602 * UGI.getCurrentUser which is synch'ed 603 */ 604 public static UserGroupInformation getRemoteUser() throws IOException { 605 UserGroupInformation ugi = Server.getRemoteUser(); 606 return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser(); 607 } 608 609 610 /** 611 * Login as the configured user for the NameNode. 612 */ 613 void loginAsNameNodeUser(Configuration conf) throws IOException { 614 InetSocketAddress socAddr = getRpcServerAddress(conf); 615 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 616 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 617 } 618 619 /** 620 * Initialize name-node. 621 * 622 * @param conf the configuration 623 */ 624 protected void initialize(Configuration conf) throws IOException { 625 if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) { 626 String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY); 627 if (intervals != null) { 628 conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS, 629 intervals); 630 } 631 } 632 633 UserGroupInformation.setConfiguration(conf); 634 loginAsNameNodeUser(conf); 635 636 NameNode.initMetrics(conf, this.getRole()); 637 StartupProgressMetrics.register(startupProgress); 638 639 if (NamenodeRole.NAMENODE == role) { 640 startHttpServer(conf); 641 } 642 643 this.spanReceiverHost = 644 SpanReceiverHost.get(conf, DFSConfigKeys.DFS_SERVER_HTRACE_PREFIX); 645 646 loadNamesystem(conf); 647 648 rpcServer = createRpcServer(conf); 649 if (clientNamenodeAddress == null) { 650 // This is expected for MiniDFSCluster. Set it now using 651 // the RPC server's bind address. 652 clientNamenodeAddress = 653 NetUtils.getHostPortString(rpcServer.getRpcAddress()); 654 LOG.info("Clients are to use " + clientNamenodeAddress + " to access" 655 + " this namenode/service."); 656 } 657 if (NamenodeRole.NAMENODE == role) { 658 httpServer.setNameNodeAddress(getNameNodeAddress()); 659 httpServer.setFSImage(getFSImage()); 660 } 661 662 pauseMonitor = new JvmPauseMonitor(conf); 663 pauseMonitor.start(); 664 metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); 665 666 startCommonServices(conf); 667 } 668 669 /** 670 * Create the RPC server implementation. Used as an extension point for the 671 * BackupNode. 672 */ 673 protected NameNodeRpcServer createRpcServer(Configuration conf) 674 throws IOException { 675 return new NameNodeRpcServer(conf, this); 676 } 677 678 /** Start the services common to active and standby states */ 679 private void startCommonServices(Configuration conf) throws IOException { 680 namesystem.startCommonServices(conf, haContext); 681 registerNNSMXBean(); 682 if (NamenodeRole.NAMENODE != role) { 683 startHttpServer(conf); 684 httpServer.setNameNodeAddress(getNameNodeAddress()); 685 httpServer.setFSImage(getFSImage()); 686 } 687 rpcServer.start(); 688 try { 689 plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY, 690 ServicePlugin.class); 691 } catch (RuntimeException e) { 692 String pluginsValue = conf.get(DFS_NAMENODE_PLUGINS_KEY); 693 LOG.error("Unable to load NameNode plugins. Specified list of plugins: " + 694 pluginsValue, e); 695 throw e; 696 } 697 for (ServicePlugin p: plugins) { 698 try { 699 p.start(this); 700 } catch (Throwable t) { 701 LOG.warn("ServicePlugin " + p + " could not be started", t); 702 } 703 } 704 LOG.info(getRole() + " RPC up at: " + rpcServer.getRpcAddress()); 705 if (rpcServer.getServiceRpcAddress() != null) { 706 LOG.info(getRole() + " service RPC up at: " 707 + rpcServer.getServiceRpcAddress()); 708 } 709 } 710 711 private void stopCommonServices() { 712 if(rpcServer != null) rpcServer.stop(); 713 if(namesystem != null) namesystem.close(); 714 if (pauseMonitor != null) pauseMonitor.stop(); 715 if (plugins != null) { 716 for (ServicePlugin p : plugins) { 717 try { 718 p.stop(); 719 } catch (Throwable t) { 720 LOG.warn("ServicePlugin " + p + " could not be stopped", t); 721 } 722 } 723 } 724 stopHttpServer(); 725 } 726 727 private void startTrashEmptier(final Configuration conf) throws IOException { 728 long trashInterval = 729 conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT); 730 if (trashInterval == 0) { 731 return; 732 } else if (trashInterval < 0) { 733 throw new IOException("Cannot start trash emptier with negative interval." 734 + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value."); 735 } 736 737 // This may be called from the transitionToActive code path, in which 738 // case the current user is the administrator, not the NN. The trash 739 // emptier needs to run as the NN. See HDFS-3972. 740 FileSystem fs = SecurityUtil.doAsLoginUser( 741 new PrivilegedExceptionAction<FileSystem>() { 742 @Override 743 public FileSystem run() throws IOException { 744 return FileSystem.get(conf); 745 } 746 }); 747 this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier"); 748 this.emptier.setDaemon(true); 749 this.emptier.start(); 750 } 751 752 private void stopTrashEmptier() { 753 if (this.emptier != null) { 754 emptier.interrupt(); 755 emptier = null; 756 } 757 } 758 759 private void startHttpServer(final Configuration conf) throws IOException { 760 httpServer = new NameNodeHttpServer(conf, this, getHttpServerBindAddress(conf)); 761 httpServer.start(); 762 httpServer.setStartupProgress(startupProgress); 763 } 764 765 private void stopHttpServer() { 766 try { 767 if (httpServer != null) httpServer.stop(); 768 } catch (Exception e) { 769 LOG.error("Exception while stopping httpserver", e); 770 } 771 } 772 773 /** 774 * Start NameNode. 775 * <p> 776 * The name-node can be started with one of the following startup options: 777 * <ul> 778 * <li>{@link StartupOption#REGULAR REGULAR} - normal name node startup</li> 779 * <li>{@link StartupOption#FORMAT FORMAT} - format name node</li> 780 * <li>{@link StartupOption#BACKUP BACKUP} - start backup node</li> 781 * <li>{@link StartupOption#CHECKPOINT CHECKPOINT} - start checkpoint node</li> 782 * <li>{@link StartupOption#UPGRADE UPGRADE} - start the cluster 783 * <li>{@link StartupOption#UPGRADEONLY UPGRADEONLY} - upgrade the cluster 784 * upgrade and create a snapshot of the current file system state</li> 785 * <li>{@link StartupOption#RECOVER RECOVERY} - recover name node 786 * metadata</li> 787 * <li>{@link StartupOption#ROLLBACK ROLLBACK} - roll the 788 * cluster back to the previous state</li> 789 * <li>{@link StartupOption#FINALIZE FINALIZE} - finalize 790 * previous upgrade</li> 791 * <li>{@link StartupOption#IMPORT IMPORT} - import checkpoint</li> 792 * </ul> 793 * The option is passed via configuration field: 794 * <tt>dfs.namenode.startup</tt> 795 * 796 * The conf will be modified to reflect the actual ports on which 797 * the NameNode is up and running if the user passes the port as 798 * <code>zero</code> in the conf. 799 * 800 * @param conf confirguration 801 * @throws IOException 802 */ 803 public NameNode(Configuration conf) throws IOException { 804 this(conf, NamenodeRole.NAMENODE); 805 } 806 807 protected NameNode(Configuration conf, NamenodeRole role) 808 throws IOException { 809 this.conf = conf; 810 this.role = role; 811 setClientNamenodeAddress(conf); 812 String nsId = getNameServiceId(conf); 813 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 814 this.haEnabled = HAUtil.isHAEnabled(conf, nsId); 815 state = createHAState(getStartupOption(conf)); 816 this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf); 817 this.haContext = createHAContext(); 818 try { 819 initializeGenericKeys(conf, nsId, namenodeId); 820 initialize(conf); 821 try { 822 haContext.writeLock(); 823 state.prepareToEnterState(haContext); 824 state.enterState(haContext); 825 } finally { 826 haContext.writeUnlock(); 827 } 828 } catch (IOException e) { 829 this.stop(); 830 throw e; 831 } catch (HadoopIllegalArgumentException e) { 832 this.stop(); 833 throw e; 834 } 835 this.started.set(true); 836 } 837 838 protected HAState createHAState(StartupOption startOpt) { 839 if (!haEnabled || startOpt == StartupOption.UPGRADE 840 || startOpt == StartupOption.UPGRADEONLY) { 841 return ACTIVE_STATE; 842 } else { 843 return STANDBY_STATE; 844 } 845 } 846 847 protected HAContext createHAContext() { 848 return new NameNodeHAContext(); 849 } 850 851 /** 852 * Wait for service to finish. 853 * (Normally, it runs forever.) 854 */ 855 public void join() { 856 try { 857 rpcServer.join(); 858 } catch (InterruptedException ie) { 859 LOG.info("Caught interrupted exception ", ie); 860 } 861 } 862 863 /** 864 * Stop all NameNode threads and wait for all to finish. 865 */ 866 public void stop() { 867 synchronized(this) { 868 if (stopRequested) 869 return; 870 stopRequested = true; 871 } 872 try { 873 if (state != null) { 874 state.exitState(haContext); 875 } 876 } catch (ServiceFailedException e) { 877 LOG.warn("Encountered exception while exiting state ", e); 878 } finally { 879 stopCommonServices(); 880 if (metrics != null) { 881 metrics.shutdown(); 882 } 883 if (namesystem != null) { 884 namesystem.shutdown(); 885 } 886 if (nameNodeStatusBeanName != null) { 887 MBeans.unregister(nameNodeStatusBeanName); 888 nameNodeStatusBeanName = null; 889 } 890 if (this.spanReceiverHost != null) { 891 this.spanReceiverHost.closeReceivers(); 892 } 893 } 894 } 895 896 synchronized boolean isStopRequested() { 897 return stopRequested; 898 } 899 900 /** 901 * Is the cluster currently in safe mode? 902 */ 903 public boolean isInSafeMode() { 904 return namesystem.isInSafeMode(); 905 } 906 907 /** get FSImage */ 908 @VisibleForTesting 909 public FSImage getFSImage() { 910 return namesystem.getFSImage(); 911 } 912 913 /** 914 * @return NameNode RPC address 915 */ 916 public InetSocketAddress getNameNodeAddress() { 917 return rpcServer.getRpcAddress(); 918 } 919 920 /** 921 * @return NameNode RPC address in "host:port" string form 922 */ 923 public String getNameNodeAddressHostPortString() { 924 return NetUtils.getHostPortString(rpcServer.getRpcAddress()); 925 } 926 927 /** 928 * @return NameNode service RPC address if configured, the 929 * NameNode RPC address otherwise 930 */ 931 public InetSocketAddress getServiceRpcAddress() { 932 final InetSocketAddress serviceAddr = rpcServer.getServiceRpcAddress(); 933 return serviceAddr == null ? rpcServer.getRpcAddress() : serviceAddr; 934 } 935 936 /** 937 * @return NameNode HTTP address, used by the Web UI, image transfer, 938 * and HTTP-based file system clients like Hftp and WebHDFS 939 */ 940 public InetSocketAddress getHttpAddress() { 941 return httpServer.getHttpAddress(); 942 } 943 944 /** 945 * @return NameNode HTTPS address, used by the Web UI, image transfer, 946 * and HTTP-based file system clients like Hftp and WebHDFS 947 */ 948 public InetSocketAddress getHttpsAddress() { 949 return httpServer.getHttpsAddress(); 950 } 951 952 /** 953 * Verify that configured directories exist, then 954 * Interactively confirm that formatting is desired 955 * for each existing directory and format them. 956 * 957 * @param conf configuration to use 958 * @param force if true, format regardless of whether dirs exist 959 * @return true if formatting was aborted, false otherwise 960 * @throws IOException 961 */ 962 private static boolean format(Configuration conf, boolean force, 963 boolean isInteractive) throws IOException { 964 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 965 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 966 initializeGenericKeys(conf, nsId, namenodeId); 967 checkAllowFormat(conf); 968 969 if (UserGroupInformation.isSecurityEnabled()) { 970 InetSocketAddress socAddr = getAddress(conf); 971 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 972 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 973 } 974 975 Collection<URI> nameDirsToFormat = FSNamesystem.getNamespaceDirs(conf); 976 List<URI> sharedDirs = FSNamesystem.getSharedEditsDirs(conf); 977 List<URI> dirsToPrompt = new ArrayList<URI>(); 978 dirsToPrompt.addAll(nameDirsToFormat); 979 dirsToPrompt.addAll(sharedDirs); 980 List<URI> editDirsToFormat = 981 FSNamesystem.getNamespaceEditsDirs(conf); 982 983 // if clusterID is not provided - see if you can find the current one 984 String clusterId = StartupOption.FORMAT.getClusterId(); 985 if(clusterId == null || clusterId.equals("")) { 986 //Generate a new cluster id 987 clusterId = NNStorage.newClusterID(); 988 } 989 System.out.println("Formatting using clusterid: " + clusterId); 990 991 FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat); 992 try { 993 FSNamesystem fsn = new FSNamesystem(conf, fsImage); 994 fsImage.getEditLog().initJournalsForWrite(); 995 996 // Abort NameNode format if reformat is disabled and if 997 // meta-dir already exists 998 if (conf.getBoolean(DFSConfigKeys.DFS_REFORMAT_DISABLED, 999 DFSConfigKeys.DFS_REFORMAT_DISABLED_DEFAULT)) { 1000 force = false; 1001 isInteractive = false; 1002 for (StorageDirectory sd : fsImage.storage.dirIterable(null)) { 1003 if (sd.hasSomeData()) { 1004 throw new NameNodeFormatException( 1005 "NameNode format aborted as reformat is disabled for " 1006 + "this cluster."); 1007 } 1008 } 1009 } 1010 1011 if (!fsImage.confirmFormat(force, isInteractive)) { 1012 return true; // aborted 1013 } 1014 1015 fsImage.format(fsn, clusterId); 1016 } catch (IOException ioe) { 1017 LOG.warn("Encountered exception during format: ", ioe); 1018 fsImage.close(); 1019 throw ioe; 1020 } 1021 return false; 1022 } 1023 1024 public static void checkAllowFormat(Configuration conf) throws IOException { 1025 if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 1026 DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) { 1027 throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY 1028 + " is set to false for this filesystem, so it " 1029 + "cannot be formatted. You will need to set " 1030 + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter " 1031 + "to true in order to format this filesystem"); 1032 } 1033 } 1034 1035 @VisibleForTesting 1036 public static boolean initializeSharedEdits(Configuration conf) throws IOException { 1037 return initializeSharedEdits(conf, true); 1038 } 1039 1040 @VisibleForTesting 1041 public static boolean initializeSharedEdits(Configuration conf, 1042 boolean force) throws IOException { 1043 return initializeSharedEdits(conf, force, false); 1044 } 1045 1046 /** 1047 * Clone the supplied configuration but remove the shared edits dirs. 1048 * 1049 * @param conf Supplies the original configuration. 1050 * @return Cloned configuration without the shared edit dirs. 1051 * @throws IOException on failure to generate the configuration. 1052 */ 1053 private static Configuration getConfigurationWithoutSharedEdits( 1054 Configuration conf) 1055 throws IOException { 1056 List<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf, false); 1057 String editsDirsString = Joiner.on(",").join(editsDirs); 1058 1059 Configuration confWithoutShared = new Configuration(conf); 1060 confWithoutShared.unset(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY); 1061 confWithoutShared.setStrings(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, 1062 editsDirsString); 1063 return confWithoutShared; 1064 } 1065 1066 /** 1067 * Format a new shared edits dir and copy in enough edit log segments so that 1068 * the standby NN can start up. 1069 * 1070 * @param conf configuration 1071 * @param force format regardless of whether or not the shared edits dir exists 1072 * @param interactive prompt the user when a dir exists 1073 * @return true if the command aborts, false otherwise 1074 */ 1075 private static boolean initializeSharedEdits(Configuration conf, 1076 boolean force, boolean interactive) throws IOException { 1077 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1078 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1079 initializeGenericKeys(conf, nsId, namenodeId); 1080 1081 if (conf.get(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY) == null) { 1082 LOG.error("No shared edits directory configured for namespace " + 1083 nsId + " namenode " + namenodeId); 1084 return false; 1085 } 1086 1087 if (UserGroupInformation.isSecurityEnabled()) { 1088 InetSocketAddress socAddr = getAddress(conf); 1089 SecurityUtil.login(conf, DFS_NAMENODE_KEYTAB_FILE_KEY, 1090 DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); 1091 } 1092 1093 NNStorage existingStorage = null; 1094 FSImage sharedEditsImage = null; 1095 try { 1096 FSNamesystem fsns = 1097 FSNamesystem.loadFromDisk(getConfigurationWithoutSharedEdits(conf)); 1098 1099 existingStorage = fsns.getFSImage().getStorage(); 1100 NamespaceInfo nsInfo = existingStorage.getNamespaceInfo(); 1101 1102 List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf); 1103 1104 sharedEditsImage = new FSImage(conf, 1105 Lists.<URI>newArrayList(), 1106 sharedEditsDirs); 1107 sharedEditsImage.getEditLog().initJournalsForWrite(); 1108 1109 if (!sharedEditsImage.confirmFormat(force, interactive)) { 1110 return true; // abort 1111 } 1112 1113 NNStorage newSharedStorage = sharedEditsImage.getStorage(); 1114 // Call Storage.format instead of FSImage.format here, since we don't 1115 // actually want to save a checkpoint - just prime the dirs with 1116 // the existing namespace info 1117 newSharedStorage.format(nsInfo); 1118 sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo); 1119 1120 // Need to make sure the edit log segments are in good shape to initialize 1121 // the shared edits dir. 1122 fsns.getFSImage().getEditLog().close(); 1123 fsns.getFSImage().getEditLog().initJournalsForWrite(); 1124 fsns.getFSImage().getEditLog().recoverUnclosedStreams(); 1125 1126 copyEditLogSegmentsToSharedDir(fsns, sharedEditsDirs, newSharedStorage, 1127 conf); 1128 } catch (IOException ioe) { 1129 LOG.error("Could not initialize shared edits dir", ioe); 1130 return true; // aborted 1131 } finally { 1132 if (sharedEditsImage != null) { 1133 try { 1134 sharedEditsImage.close(); 1135 } catch (IOException ioe) { 1136 LOG.warn("Could not close sharedEditsImage", ioe); 1137 } 1138 } 1139 // Have to unlock storage explicitly for the case when we're running in a 1140 // unit test, which runs in the same JVM as NNs. 1141 if (existingStorage != null) { 1142 try { 1143 existingStorage.unlockAll(); 1144 } catch (IOException ioe) { 1145 LOG.warn("Could not unlock storage directories", ioe); 1146 return true; // aborted 1147 } 1148 } 1149 } 1150 return false; // did not abort 1151 } 1152 1153 private static void copyEditLogSegmentsToSharedDir(FSNamesystem fsns, 1154 Collection<URI> sharedEditsDirs, NNStorage newSharedStorage, 1155 Configuration conf) throws IOException { 1156 Preconditions.checkArgument(!sharedEditsDirs.isEmpty(), 1157 "No shared edits specified"); 1158 // Copy edit log segments into the new shared edits dir. 1159 List<URI> sharedEditsUris = new ArrayList<URI>(sharedEditsDirs); 1160 FSEditLog newSharedEditLog = new FSEditLog(conf, newSharedStorage, 1161 sharedEditsUris); 1162 newSharedEditLog.initJournalsForWrite(); 1163 newSharedEditLog.recoverUnclosedStreams(); 1164 1165 FSEditLog sourceEditLog = fsns.getFSImage().editLog; 1166 1167 long fromTxId = fsns.getFSImage().getMostRecentCheckpointTxId(); 1168 1169 Collection<EditLogInputStream> streams = null; 1170 try { 1171 streams = sourceEditLog.selectInputStreams(fromTxId + 1, 0); 1172 1173 // Set the nextTxid to the CheckpointTxId+1 1174 newSharedEditLog.setNextTxId(fromTxId + 1); 1175 1176 // Copy all edits after last CheckpointTxId to shared edits dir 1177 for (EditLogInputStream stream : streams) { 1178 LOG.debug("Beginning to copy stream " + stream + " to shared edits"); 1179 FSEditLogOp op; 1180 boolean segmentOpen = false; 1181 while ((op = stream.readOp()) != null) { 1182 if (LOG.isTraceEnabled()) { 1183 LOG.trace("copying op: " + op); 1184 } 1185 if (!segmentOpen) { 1186 newSharedEditLog.startLogSegment(op.txid, false); 1187 segmentOpen = true; 1188 } 1189 1190 newSharedEditLog.logEdit(op); 1191 1192 if (op.opCode == FSEditLogOpCodes.OP_END_LOG_SEGMENT) { 1193 newSharedEditLog.logSync(); 1194 newSharedEditLog.endCurrentLogSegment(false); 1195 LOG.debug("ending log segment because of END_LOG_SEGMENT op in " 1196 + stream); 1197 segmentOpen = false; 1198 } 1199 } 1200 1201 if (segmentOpen) { 1202 LOG.debug("ending log segment because of end of stream in " + stream); 1203 newSharedEditLog.logSync(); 1204 newSharedEditLog.endCurrentLogSegment(false); 1205 segmentOpen = false; 1206 } 1207 } 1208 } finally { 1209 if (streams != null) { 1210 FSEditLog.closeAllStreams(streams); 1211 } 1212 } 1213 } 1214 1215 @VisibleForTesting 1216 public static boolean doRollback(Configuration conf, 1217 boolean isConfirmationNeeded) throws IOException { 1218 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1219 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1220 initializeGenericKeys(conf, nsId, namenodeId); 1221 1222 FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf)); 1223 System.err.print( 1224 "\"rollBack\" will remove the current state of the file system,\n" 1225 + "returning you to the state prior to initiating your recent.\n" 1226 + "upgrade. This action is permanent and cannot be undone. If you\n" 1227 + "are performing a rollback in an HA environment, you should be\n" 1228 + "certain that no NameNode process is running on any host."); 1229 if (isConfirmationNeeded) { 1230 if (!confirmPrompt("Roll back file system state?")) { 1231 System.err.println("Rollback aborted."); 1232 return true; 1233 } 1234 } 1235 nsys.getFSImage().doRollback(nsys); 1236 return false; 1237 } 1238 1239 private static void printUsage(PrintStream out) { 1240 out.println(USAGE + "\n"); 1241 } 1242 1243 @VisibleForTesting 1244 static StartupOption parseArguments(String args[]) { 1245 int argsLen = (args == null) ? 0 : args.length; 1246 StartupOption startOpt = StartupOption.REGULAR; 1247 for(int i=0; i < argsLen; i++) { 1248 String cmd = args[i]; 1249 if (StartupOption.FORMAT.getName().equalsIgnoreCase(cmd)) { 1250 startOpt = StartupOption.FORMAT; 1251 for (i = i + 1; i < argsLen; i++) { 1252 if (args[i].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1253 i++; 1254 if (i >= argsLen) { 1255 // if no cluster id specified, return null 1256 LOG.error("Must specify a valid cluster ID after the " 1257 + StartupOption.CLUSTERID.getName() + " flag"); 1258 return null; 1259 } 1260 String clusterId = args[i]; 1261 // Make sure an id is specified and not another flag 1262 if (clusterId.isEmpty() || 1263 clusterId.equalsIgnoreCase(StartupOption.FORCE.getName()) || 1264 clusterId.equalsIgnoreCase( 1265 StartupOption.NONINTERACTIVE.getName())) { 1266 LOG.error("Must specify a valid cluster ID after the " 1267 + StartupOption.CLUSTERID.getName() + " flag"); 1268 return null; 1269 } 1270 startOpt.setClusterId(clusterId); 1271 } 1272 1273 if (args[i].equalsIgnoreCase(StartupOption.FORCE.getName())) { 1274 startOpt.setForceFormat(true); 1275 } 1276 1277 if (args[i].equalsIgnoreCase(StartupOption.NONINTERACTIVE.getName())) { 1278 startOpt.setInteractiveFormat(false); 1279 } 1280 } 1281 } else if (StartupOption.GENCLUSTERID.getName().equalsIgnoreCase(cmd)) { 1282 startOpt = StartupOption.GENCLUSTERID; 1283 } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) { 1284 startOpt = StartupOption.REGULAR; 1285 } else if (StartupOption.BACKUP.getName().equalsIgnoreCase(cmd)) { 1286 startOpt = StartupOption.BACKUP; 1287 } else if (StartupOption.CHECKPOINT.getName().equalsIgnoreCase(cmd)) { 1288 startOpt = StartupOption.CHECKPOINT; 1289 } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd) 1290 || StartupOption.UPGRADEONLY.getName().equalsIgnoreCase(cmd)) { 1291 startOpt = StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd) ? 1292 StartupOption.UPGRADE : StartupOption.UPGRADEONLY; 1293 /* Can be followed by CLUSTERID with a required parameter or 1294 * RENAMERESERVED with an optional parameter 1295 */ 1296 while (i + 1 < argsLen) { 1297 String flag = args[i + 1]; 1298 if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) { 1299 if (i + 2 < argsLen) { 1300 i += 2; 1301 startOpt.setClusterId(args[i]); 1302 } else { 1303 LOG.error("Must specify a valid cluster ID after the " 1304 + StartupOption.CLUSTERID.getName() + " flag"); 1305 return null; 1306 } 1307 } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED 1308 .getName())) { 1309 if (i + 2 < argsLen) { 1310 FSImageFormat.setRenameReservedPairs(args[i + 2]); 1311 i += 2; 1312 } else { 1313 FSImageFormat.useDefaultRenameReservedPairs(); 1314 i += 1; 1315 } 1316 } else { 1317 LOG.error("Unknown upgrade flag " + flag); 1318 return null; 1319 } 1320 } 1321 } else if (StartupOption.ROLLINGUPGRADE.getName().equalsIgnoreCase(cmd)) { 1322 startOpt = StartupOption.ROLLINGUPGRADE; 1323 ++i; 1324 if (i >= argsLen) { 1325 LOG.error("Must specify a rolling upgrade startup option " 1326 + RollingUpgradeStartupOption.getAllOptionString()); 1327 return null; 1328 } 1329 startOpt.setRollingUpgradeStartupOption(args[i]); 1330 } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) { 1331 startOpt = StartupOption.ROLLBACK; 1332 } else if (StartupOption.FINALIZE.getName().equalsIgnoreCase(cmd)) { 1333 startOpt = StartupOption.FINALIZE; 1334 } else if (StartupOption.IMPORT.getName().equalsIgnoreCase(cmd)) { 1335 startOpt = StartupOption.IMPORT; 1336 } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) { 1337 startOpt = StartupOption.BOOTSTRAPSTANDBY; 1338 return startOpt; 1339 } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) { 1340 startOpt = StartupOption.INITIALIZESHAREDEDITS; 1341 for (i = i + 1 ; i < argsLen; i++) { 1342 if (StartupOption.NONINTERACTIVE.getName().equals(args[i])) { 1343 startOpt.setInteractiveFormat(false); 1344 } else if (StartupOption.FORCE.getName().equals(args[i])) { 1345 startOpt.setForceFormat(true); 1346 } else { 1347 LOG.error("Invalid argument: " + args[i]); 1348 return null; 1349 } 1350 } 1351 return startOpt; 1352 } else if (StartupOption.RECOVER.getName().equalsIgnoreCase(cmd)) { 1353 if (startOpt != StartupOption.REGULAR) { 1354 throw new RuntimeException("Can't combine -recover with " + 1355 "other startup options."); 1356 } 1357 startOpt = StartupOption.RECOVER; 1358 while (++i < argsLen) { 1359 if (args[i].equalsIgnoreCase( 1360 StartupOption.FORCE.getName())) { 1361 startOpt.setForce(MetaRecoveryContext.FORCE_FIRST_CHOICE); 1362 } else { 1363 throw new RuntimeException("Error parsing recovery options: " + 1364 "can't understand option \"" + args[i] + "\""); 1365 } 1366 } 1367 } else if (StartupOption.METADATAVERSION.getName().equalsIgnoreCase(cmd)) { 1368 startOpt = StartupOption.METADATAVERSION; 1369 } else { 1370 return null; 1371 } 1372 } 1373 return startOpt; 1374 } 1375 1376 private static void setStartupOption(Configuration conf, StartupOption opt) { 1377 conf.set(DFS_NAMENODE_STARTUP_KEY, opt.name()); 1378 } 1379 1380 static StartupOption getStartupOption(Configuration conf) { 1381 return StartupOption.valueOf(conf.get(DFS_NAMENODE_STARTUP_KEY, 1382 StartupOption.REGULAR.toString())); 1383 } 1384 1385 private static void doRecovery(StartupOption startOpt, Configuration conf) 1386 throws IOException { 1387 String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1388 String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1389 initializeGenericKeys(conf, nsId, namenodeId); 1390 if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) { 1391 if (!confirmPrompt("You have selected Metadata Recovery mode. " + 1392 "This mode is intended to recover lost metadata on a corrupt " + 1393 "filesystem. Metadata recovery mode often permanently deletes " + 1394 "data from your HDFS filesystem. Please back up your edit log " + 1395 "and fsimage before trying this!\n\n" + 1396 "Are you ready to proceed? (Y/N)\n")) { 1397 System.err.println("Recovery aborted at user request.\n"); 1398 return; 1399 } 1400 } 1401 MetaRecoveryContext.LOG.info("starting recovery..."); 1402 UserGroupInformation.setConfiguration(conf); 1403 NameNode.initMetrics(conf, startOpt.toNodeRole()); 1404 FSNamesystem fsn = null; 1405 try { 1406 fsn = FSNamesystem.loadFromDisk(conf); 1407 fsn.getFSImage().saveNamespace(fsn); 1408 MetaRecoveryContext.LOG.info("RECOVERY COMPLETE"); 1409 } catch (IOException e) { 1410 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1411 throw e; 1412 } catch (RuntimeException e) { 1413 MetaRecoveryContext.LOG.info("RECOVERY FAILED: caught exception", e); 1414 throw e; 1415 } finally { 1416 if (fsn != null) 1417 fsn.close(); 1418 } 1419 } 1420 1421 /** 1422 * Verify that configured directories exist, then print the metadata versions 1423 * of the software and the image. 1424 * 1425 * @param conf configuration to use 1426 * @throws IOException 1427 */ 1428 private static boolean printMetadataVersion(Configuration conf) 1429 throws IOException { 1430 final String nsId = DFSUtil.getNamenodeNameServiceId(conf); 1431 final String namenodeId = HAUtil.getNameNodeId(conf, nsId); 1432 NameNode.initializeGenericKeys(conf, nsId, namenodeId); 1433 final FSImage fsImage = new FSImage(conf); 1434 final FSNamesystem fs = new FSNamesystem(conf, fsImage, false); 1435 return fsImage.recoverTransitionRead( 1436 StartupOption.METADATAVERSION, fs, null); 1437 } 1438 1439 public static NameNode createNameNode(String argv[], Configuration conf) 1440 throws IOException { 1441 LOG.info("createNameNode " + Arrays.asList(argv)); 1442 if (conf == null) 1443 conf = new HdfsConfiguration(); 1444 // Parse out some generic args into Configuration. 1445 GenericOptionsParser hParser = new GenericOptionsParser(conf, argv); 1446 argv = hParser.getRemainingArgs(); 1447 // Parse the rest, NN specific args. 1448 StartupOption startOpt = parseArguments(argv); 1449 if (startOpt == null) { 1450 printUsage(System.err); 1451 return null; 1452 } 1453 setStartupOption(conf, startOpt); 1454 1455 switch (startOpt) { 1456 case FORMAT: { 1457 boolean aborted = format(conf, startOpt.getForceFormat(), 1458 startOpt.getInteractiveFormat()); 1459 terminate(aborted ? 1 : 0); 1460 return null; // avoid javac warning 1461 } 1462 case GENCLUSTERID: { 1463 System.err.println("Generating new cluster id:"); 1464 System.out.println(NNStorage.newClusterID()); 1465 terminate(0); 1466 return null; 1467 } 1468 case FINALIZE: { 1469 System.err.println("Use of the argument '" + StartupOption.FINALIZE + 1470 "' is no longer supported. To finalize an upgrade, start the NN " + 1471 " and then run `hdfs dfsadmin -finalizeUpgrade'"); 1472 terminate(1); 1473 return null; // avoid javac warning 1474 } 1475 case ROLLBACK: { 1476 boolean aborted = doRollback(conf, true); 1477 terminate(aborted ? 1 : 0); 1478 return null; // avoid warning 1479 } 1480 case BOOTSTRAPSTANDBY: { 1481 String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length); 1482 int rc = BootstrapStandby.run(toolArgs, conf); 1483 terminate(rc); 1484 return null; // avoid warning 1485 } 1486 case INITIALIZESHAREDEDITS: { 1487 boolean aborted = initializeSharedEdits(conf, 1488 startOpt.getForceFormat(), 1489 startOpt.getInteractiveFormat()); 1490 terminate(aborted ? 1 : 0); 1491 return null; // avoid warning 1492 } 1493 case BACKUP: 1494 case CHECKPOINT: { 1495 NamenodeRole role = startOpt.toNodeRole(); 1496 DefaultMetricsSystem.initialize(role.toString().replace(" ", "")); 1497 return new BackupNode(conf, role); 1498 } 1499 case RECOVER: { 1500 NameNode.doRecovery(startOpt, conf); 1501 return null; 1502 } 1503 case METADATAVERSION: { 1504 printMetadataVersion(conf); 1505 terminate(0); 1506 return null; // avoid javac warning 1507 } 1508 case UPGRADEONLY: { 1509 DefaultMetricsSystem.initialize("NameNode"); 1510 new NameNode(conf); 1511 terminate(0); 1512 return null; 1513 } 1514 default: { 1515 DefaultMetricsSystem.initialize("NameNode"); 1516 return new NameNode(conf); 1517 } 1518 } 1519 } 1520 1521 /** 1522 * In federation configuration is set for a set of 1523 * namenode and secondary namenode/backup/checkpointer, which are 1524 * grouped under a logical nameservice ID. The configuration keys specific 1525 * to them have suffix set to configured nameserviceId. 1526 * 1527 * This method copies the value from specific key of format key.nameserviceId 1528 * to key, to set up the generic configuration. Once this is done, only 1529 * generic version of the configuration is read in rest of the code, for 1530 * backward compatibility and simpler code changes. 1531 * 1532 * @param conf 1533 * Configuration object to lookup specific key and to set the value 1534 * to the key passed. Note the conf object is modified 1535 * @param nameserviceId name service Id (to distinguish federated NNs) 1536 * @param namenodeId the namenode ID (to distinguish HA NNs) 1537 * @see DFSUtil#setGenericConf(Configuration, String, String, String...) 1538 */ 1539 public static void initializeGenericKeys(Configuration conf, 1540 String nameserviceId, String namenodeId) { 1541 if ((nameserviceId != null && !nameserviceId.isEmpty()) || 1542 (namenodeId != null && !namenodeId.isEmpty())) { 1543 if (nameserviceId != null) { 1544 conf.set(DFS_NAMESERVICE_ID, nameserviceId); 1545 } 1546 if (namenodeId != null) { 1547 conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId); 1548 } 1549 1550 DFSUtil.setGenericConf(conf, nameserviceId, namenodeId, 1551 NAMENODE_SPECIFIC_KEYS); 1552 DFSUtil.setGenericConf(conf, nameserviceId, null, 1553 NAMESERVICE_SPECIFIC_KEYS); 1554 } 1555 1556 // If the RPC address is set use it to (re-)configure the default FS 1557 if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) { 1558 URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" 1559 + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY)); 1560 conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); 1561 LOG.debug("Setting " + FS_DEFAULT_NAME_KEY + " to " + defaultUri.toString()); 1562 } 1563 } 1564 1565 /** 1566 * Get the name service Id for the node 1567 * @return name service Id or null if federation is not configured 1568 */ 1569 protected String getNameServiceId(Configuration conf) { 1570 return DFSUtil.getNamenodeNameServiceId(conf); 1571 } 1572 1573 /** 1574 */ 1575 public static void main(String argv[]) throws Exception { 1576 if (DFSUtil.parseHelpArgument(argv, NameNode.USAGE, System.out, true)) { 1577 System.exit(0); 1578 } 1579 1580 try { 1581 StringUtils.startupShutdownMessage(NameNode.class, argv, LOG); 1582 NameNode namenode = createNameNode(argv, null); 1583 if (namenode != null) { 1584 namenode.join(); 1585 } 1586 } catch (Throwable e) { 1587 LOG.error("Failed to start namenode.", e); 1588 terminate(1, e); 1589 } 1590 } 1591 1592 synchronized void monitorHealth() 1593 throws HealthCheckFailedException, AccessControlException { 1594 namesystem.checkSuperuserPrivilege(); 1595 if (!haEnabled) { 1596 return; // no-op, if HA is not enabled 1597 } 1598 getNamesystem().checkAvailableResources(); 1599 if (!getNamesystem().nameNodeHasResourcesAvailable()) { 1600 throw new HealthCheckFailedException( 1601 "The NameNode has no resources available"); 1602 } 1603 } 1604 1605 synchronized void transitionToActive() 1606 throws ServiceFailedException, AccessControlException { 1607 namesystem.checkSuperuserPrivilege(); 1608 if (!haEnabled) { 1609 throw new ServiceFailedException("HA for namenode is not enabled"); 1610 } 1611 state.setState(haContext, ACTIVE_STATE); 1612 } 1613 1614 synchronized void transitionToStandby() 1615 throws ServiceFailedException, AccessControlException { 1616 namesystem.checkSuperuserPrivilege(); 1617 if (!haEnabled) { 1618 throw new ServiceFailedException("HA for namenode is not enabled"); 1619 } 1620 state.setState(haContext, STANDBY_STATE); 1621 } 1622 1623 synchronized HAServiceStatus getServiceStatus() 1624 throws ServiceFailedException, AccessControlException { 1625 namesystem.checkSuperuserPrivilege(); 1626 if (!haEnabled) { 1627 throw new ServiceFailedException("HA for namenode is not enabled"); 1628 } 1629 if (state == null) { 1630 return new HAServiceStatus(HAServiceState.INITIALIZING); 1631 } 1632 HAServiceState retState = state.getServiceState(); 1633 HAServiceStatus ret = new HAServiceStatus(retState); 1634 if (retState == HAServiceState.STANDBY) { 1635 String safemodeTip = namesystem.getSafeModeTip(); 1636 if (!safemodeTip.isEmpty()) { 1637 ret.setNotReadyToBecomeActive( 1638 "The NameNode is in safemode. " + 1639 safemodeTip); 1640 } else { 1641 ret.setReadyToBecomeActive(); 1642 } 1643 } else if (retState == HAServiceState.ACTIVE) { 1644 ret.setReadyToBecomeActive(); 1645 } else { 1646 ret.setNotReadyToBecomeActive("State is " + state); 1647 } 1648 return ret; 1649 } 1650 1651 synchronized HAServiceState getServiceState() { 1652 if (state == null) { 1653 return HAServiceState.INITIALIZING; 1654 } 1655 return state.getServiceState(); 1656 } 1657 1658 /** 1659 * Register NameNodeStatusMXBean 1660 */ 1661 private void registerNNSMXBean() { 1662 nameNodeStatusBeanName = MBeans.register("NameNode", "NameNodeStatus", this); 1663 } 1664 1665 @Override // NameNodeStatusMXBean 1666 public String getNNRole() { 1667 String roleStr = ""; 1668 NamenodeRole role = getRole(); 1669 if (null != role) { 1670 roleStr = role.toString(); 1671 } 1672 return roleStr; 1673 } 1674 1675 @Override // NameNodeStatusMXBean 1676 public String getState() { 1677 String servStateStr = ""; 1678 HAServiceState servState = getServiceState(); 1679 if (null != servState) { 1680 servStateStr = servState.toString(); 1681 } 1682 return servStateStr; 1683 } 1684 1685 @Override // NameNodeStatusMXBean 1686 public String getHostAndPort() { 1687 return getNameNodeAddressHostPortString(); 1688 } 1689 1690 @Override // NameNodeStatusMXBean 1691 public boolean isSecurityEnabled() { 1692 return UserGroupInformation.isSecurityEnabled(); 1693 } 1694 1695 @Override // NameNodeStatusMXBean 1696 public long getLastHATransitionTime() { 1697 return state.getLastHATransitionTime(); 1698 } 1699 1700 /** 1701 * Shutdown the NN immediately in an ungraceful way. Used when it would be 1702 * unsafe for the NN to continue operating, e.g. during a failed HA state 1703 * transition. 1704 * 1705 * @param t exception which warrants the shutdown. Printed to the NN log 1706 * before exit. 1707 * @throws ExitException thrown only for testing. 1708 */ 1709 protected synchronized void doImmediateShutdown(Throwable t) 1710 throws ExitException { 1711 String message = "Error encountered requiring NN shutdown. " + 1712 "Shutting down immediately."; 1713 try { 1714 LOG.error(message, t); 1715 } catch (Throwable ignored) { 1716 // This is unlikely to happen, but there's nothing we can do if it does. 1717 } 1718 terminate(1, t); 1719 } 1720 1721 /** 1722 * Class used to expose {@link NameNode} as context to {@link HAState} 1723 */ 1724 protected class NameNodeHAContext implements HAContext { 1725 @Override 1726 public void setState(HAState s) { 1727 state = s; 1728 } 1729 1730 @Override 1731 public HAState getState() { 1732 return state; 1733 } 1734 1735 @Override 1736 public void startActiveServices() throws IOException { 1737 try { 1738 namesystem.startActiveServices(); 1739 startTrashEmptier(conf); 1740 } catch (Throwable t) { 1741 doImmediateShutdown(t); 1742 } 1743 } 1744 1745 @Override 1746 public void stopActiveServices() throws IOException { 1747 try { 1748 if (namesystem != null) { 1749 namesystem.stopActiveServices(); 1750 } 1751 stopTrashEmptier(); 1752 } catch (Throwable t) { 1753 doImmediateShutdown(t); 1754 } 1755 } 1756 1757 @Override 1758 public void startStandbyServices() throws IOException { 1759 try { 1760 namesystem.startStandbyServices(conf); 1761 } catch (Throwable t) { 1762 doImmediateShutdown(t); 1763 } 1764 } 1765 1766 @Override 1767 public void prepareToStopStandbyServices() throws ServiceFailedException { 1768 try { 1769 namesystem.prepareToStopStandbyServices(); 1770 } catch (Throwable t) { 1771 doImmediateShutdown(t); 1772 } 1773 } 1774 1775 @Override 1776 public void stopStandbyServices() throws IOException { 1777 try { 1778 if (namesystem != null) { 1779 namesystem.stopStandbyServices(); 1780 } 1781 } catch (Throwable t) { 1782 doImmediateShutdown(t); 1783 } 1784 } 1785 1786 @Override 1787 public void writeLock() { 1788 namesystem.writeLock(); 1789 namesystem.lockRetryCache(); 1790 } 1791 1792 @Override 1793 public void writeUnlock() { 1794 namesystem.unlockRetryCache(); 1795 namesystem.writeUnlock(); 1796 } 1797 1798 /** Check if an operation of given category is allowed */ 1799 @Override 1800 public void checkOperation(final OperationCategory op) 1801 throws StandbyException { 1802 state.checkOperation(haContext, op); 1803 } 1804 1805 @Override 1806 public boolean allowStaleReads() { 1807 return allowStaleStandbyReads; 1808 } 1809 1810 } 1811 1812 public boolean isStandbyState() { 1813 return (state.equals(STANDBY_STATE)); 1814 } 1815 1816 public boolean isActiveState() { 1817 return (state.equals(ACTIVE_STATE)); 1818 } 1819 1820 /** 1821 * Returns whether the NameNode is completely started 1822 */ 1823 boolean isStarted() { 1824 return this.started.get(); 1825 } 1826 1827 /** 1828 * Check that a request to change this node's HA state is valid. 1829 * In particular, verifies that, if auto failover is enabled, non-forced 1830 * requests from the HAAdmin CLI are rejected, and vice versa. 1831 * 1832 * @param req the request to check 1833 * @throws AccessControlException if the request is disallowed 1834 */ 1835 void checkHaStateChange(StateChangeRequestInfo req) 1836 throws AccessControlException { 1837 boolean autoHaEnabled = conf.getBoolean(DFS_HA_AUTO_FAILOVER_ENABLED_KEY, 1838 DFS_HA_AUTO_FAILOVER_ENABLED_DEFAULT); 1839 switch (req.getSource()) { 1840 case REQUEST_BY_USER: 1841 if (autoHaEnabled) { 1842 throw new AccessControlException( 1843 "Manual HA control for this NameNode is disallowed, because " + 1844 "automatic HA is enabled."); 1845 } 1846 break; 1847 case REQUEST_BY_USER_FORCED: 1848 if (autoHaEnabled) { 1849 LOG.warn("Allowing manual HA control from " + 1850 Server.getRemoteAddress() + 1851 " even though automatic HA is enabled, because the user " + 1852 "specified the force flag"); 1853 } 1854 break; 1855 case REQUEST_BY_ZKFC: 1856 if (!autoHaEnabled) { 1857 throw new AccessControlException( 1858 "Request from ZK failover controller at " + 1859 Server.getRemoteAddress() + " denied since automatic HA " + 1860 "is not enabled"); 1861 } 1862 break; 1863 } 1864 } 1865}