001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.blockmanagement; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Iterator; 023import java.util.List; 024 025import org.apache.hadoop.hdfs.protocol.Block; 026import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; 027import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; 028import org.apache.hadoop.hdfs.server.namenode.NameNode; 029 030/** 031 * Represents a block that is currently being constructed.<br> 032 * This is usually the last block of a file opened for write or append. 033 */ 034public class BlockInfoContiguousUnderConstruction extends BlockInfoContiguous { 035 /** Block state. See {@link BlockUCState} */ 036 private BlockUCState blockUCState; 037 038 /** 039 * Block replicas as assigned when the block was allocated. 040 * This defines the pipeline order. 041 */ 042 private List<ReplicaUnderConstruction> replicas; 043 044 /** 045 * Index of the primary data node doing the recovery. Useful for log 046 * messages. 047 */ 048 private int primaryNodeIndex = -1; 049 050 /** 051 * The new generation stamp, which this block will have 052 * after the recovery succeeds. Also used as a recovery id to identify 053 * the right recovery if any of the abandoned recoveries re-appear. 054 */ 055 private long blockRecoveryId = 0; 056 057 /** 058 * The block source to use in the event of copy-on-write truncate. 059 */ 060 private Block truncateBlock; 061 062 /** 063 * ReplicaUnderConstruction contains information about replicas while 064 * they are under construction. 065 * The GS, the length and the state of the replica is as reported by 066 * the data-node. 067 * It is not guaranteed, but expected, that data-nodes actually have 068 * corresponding replicas. 069 */ 070 static class ReplicaUnderConstruction extends Block { 071 private final DatanodeStorageInfo expectedLocation; 072 private ReplicaState state; 073 private boolean chosenAsPrimary; 074 075 ReplicaUnderConstruction(Block block, 076 DatanodeStorageInfo target, 077 ReplicaState state) { 078 super(block); 079 this.expectedLocation = target; 080 this.state = state; 081 this.chosenAsPrimary = false; 082 } 083 084 /** 085 * Expected block replica location as assigned when the block was allocated. 086 * This defines the pipeline order. 087 * It is not guaranteed, but expected, that the data-node actually has 088 * the replica. 089 */ 090 public DatanodeStorageInfo getExpectedStorageLocation() { 091 return expectedLocation; 092 } 093 094 /** 095 * Get replica state as reported by the data-node. 096 */ 097 ReplicaState getState() { 098 return state; 099 } 100 101 /** 102 * Whether the replica was chosen for recovery. 103 */ 104 boolean getChosenAsPrimary() { 105 return chosenAsPrimary; 106 } 107 108 /** 109 * Set replica state. 110 */ 111 void setState(ReplicaState s) { 112 state = s; 113 } 114 115 /** 116 * Set whether this replica was chosen for recovery. 117 */ 118 void setChosenAsPrimary(boolean chosenAsPrimary) { 119 this.chosenAsPrimary = chosenAsPrimary; 120 } 121 122 /** 123 * Is data-node the replica belongs to alive. 124 */ 125 boolean isAlive() { 126 return expectedLocation.getDatanodeDescriptor().isAlive; 127 } 128 129 @Override // Block 130 public int hashCode() { 131 return super.hashCode(); 132 } 133 134 @Override // Block 135 public boolean equals(Object obj) { 136 // Sufficient to rely on super's implementation 137 return (this == obj) || super.equals(obj); 138 } 139 140 @Override 141 public String toString() { 142 final StringBuilder b = new StringBuilder(50); 143 appendStringTo(b); 144 return b.toString(); 145 } 146 147 @Override 148 public void appendStringTo(StringBuilder sb) { 149 sb.append("ReplicaUC[") 150 .append(expectedLocation) 151 .append("|") 152 .append(state) 153 .append("]"); 154 } 155 } 156 157 /** 158 * Create block and set its state to 159 * {@link BlockUCState#UNDER_CONSTRUCTION}. 160 */ 161 public BlockInfoContiguousUnderConstruction(Block blk, short replication) { 162 this(blk, replication, BlockUCState.UNDER_CONSTRUCTION, null); 163 } 164 165 /** 166 * Create a block that is currently being constructed. 167 */ 168 public BlockInfoContiguousUnderConstruction(Block blk, short replication, BlockUCState state, DatanodeStorageInfo[] targets) { 169 super(blk, replication); 170 assert getBlockUCState() != BlockUCState.COMPLETE : 171 "BlockInfoUnderConstruction cannot be in COMPLETE state"; 172 this.blockUCState = state; 173 setExpectedLocations(targets); 174 } 175 176 /** 177 * Convert an under construction block to a complete block. 178 * 179 * @return BlockInfo - a complete block. 180 * @throws IOException if the state of the block 181 * (the generation stamp and the length) has not been committed by 182 * the client or it does not have at least a minimal number of replicas 183 * reported from data-nodes. 184 */ 185 BlockInfoContiguous convertToCompleteBlock() throws IOException { 186 assert getBlockUCState() != BlockUCState.COMPLETE : 187 "Trying to convert a COMPLETE block"; 188 return new BlockInfoContiguous(this); 189 } 190 191 /** Set expected locations */ 192 public void setExpectedLocations(DatanodeStorageInfo[] targets) { 193 int numLocations = targets == null ? 0 : targets.length; 194 this.replicas = new ArrayList<ReplicaUnderConstruction>(numLocations); 195 for(int i = 0; i < numLocations; i++) 196 replicas.add( 197 new ReplicaUnderConstruction(this, targets[i], ReplicaState.RBW)); 198 } 199 200 /** 201 * Create array of expected replica locations 202 * (as has been assigned by chooseTargets()). 203 */ 204 public DatanodeStorageInfo[] getExpectedStorageLocations() { 205 int numLocations = replicas == null ? 0 : replicas.size(); 206 DatanodeStorageInfo[] storages = new DatanodeStorageInfo[numLocations]; 207 for(int i = 0; i < numLocations; i++) 208 storages[i] = replicas.get(i).getExpectedStorageLocation(); 209 return storages; 210 } 211 212 /** Get the number of expected locations */ 213 public int getNumExpectedLocations() { 214 return replicas == null ? 0 : replicas.size(); 215 } 216 217 /** 218 * Return the state of the block under construction. 219 * @see BlockUCState 220 */ 221 @Override // BlockInfo 222 public BlockUCState getBlockUCState() { 223 return blockUCState; 224 } 225 226 void setBlockUCState(BlockUCState s) { 227 blockUCState = s; 228 } 229 230 /** Get block recovery ID */ 231 public long getBlockRecoveryId() { 232 return blockRecoveryId; 233 } 234 235 /** Get recover block */ 236 public Block getTruncateBlock() { 237 return truncateBlock; 238 } 239 240 public void setTruncateBlock(Block recoveryBlock) { 241 this.truncateBlock = recoveryBlock; 242 } 243 244 /** 245 * Process the recorded replicas. When about to commit or finish the 246 * pipeline recovery sort out bad replicas. 247 * @param genStamp The final generation stamp for the block. 248 * @return staleReplica's List. 249 */ 250 public List<ReplicaUnderConstruction> setGenerationStampAndVerifyReplicas( 251 long genStamp) { 252 // Set the generation stamp for the block. 253 setGenerationStamp(genStamp); 254 if (replicas == null) 255 return null; 256 257 List<ReplicaUnderConstruction> staleReplicas = new ArrayList<>(); 258 // Remove replicas with wrong gen stamp. The replica list is unchanged. 259 for (ReplicaUnderConstruction r : replicas) { 260 if (genStamp != r.getGenerationStamp()) { 261 staleReplicas.add(r); 262 } 263 } 264 return staleReplicas; 265 } 266 267 /** 268 * Commit block's length and generation stamp as reported by the client. 269 * Set block state to {@link BlockUCState#COMMITTED}. 270 * @param block - contains client reported block length and generation 271 * @return staleReplica's List. 272 * @throws IOException if block ids are inconsistent. 273 */ 274 List<ReplicaUnderConstruction> commitBlock(Block block) throws IOException { 275 if(getBlockId() != block.getBlockId()) 276 throw new IOException("Trying to commit inconsistent block: id = " 277 + block.getBlockId() + ", expected id = " + getBlockId()); 278 blockUCState = BlockUCState.COMMITTED; 279 this.setNumBytes(block.getNumBytes()); 280 // Sort out invalid replicas. 281 return setGenerationStampAndVerifyReplicas(block.getGenerationStamp()); 282 } 283 284 /** 285 * Initialize lease recovery for this block. 286 * Find the first alive data-node starting from the previous primary and 287 * make it primary. 288 */ 289 public void initializeBlockRecovery(long recoveryId) { 290 setBlockUCState(BlockUCState.UNDER_RECOVERY); 291 blockRecoveryId = recoveryId; 292 if (replicas.size() == 0) { 293 NameNode.blockStateChangeLog.warn("BLOCK*" 294 + " BlockInfoUnderConstruction.initLeaseRecovery:" 295 + " No blocks found, lease removed."); 296 } 297 boolean allLiveReplicasTriedAsPrimary = true; 298 for (int i = 0; i < replicas.size(); i++) { 299 // Check if all replicas have been tried or not. 300 if (replicas.get(i).isAlive()) { 301 allLiveReplicasTriedAsPrimary = 302 (allLiveReplicasTriedAsPrimary && replicas.get(i).getChosenAsPrimary()); 303 } 304 } 305 if (allLiveReplicasTriedAsPrimary) { 306 // Just set all the replicas to be chosen whether they are alive or not. 307 for (int i = 0; i < replicas.size(); i++) { 308 replicas.get(i).setChosenAsPrimary(false); 309 } 310 } 311 long mostRecentLastUpdate = 0; 312 ReplicaUnderConstruction primary = null; 313 primaryNodeIndex = -1; 314 for(int i = 0; i < replicas.size(); i++) { 315 // Skip alive replicas which have been chosen for recovery. 316 if (!(replicas.get(i).isAlive() && !replicas.get(i).getChosenAsPrimary())) { 317 continue; 318 } 319 final ReplicaUnderConstruction ruc = replicas.get(i); 320 final long lastUpdate = ruc.getExpectedStorageLocation() 321 .getDatanodeDescriptor().getLastUpdateMonotonic(); 322 if (lastUpdate > mostRecentLastUpdate) { 323 primaryNodeIndex = i; 324 primary = ruc; 325 mostRecentLastUpdate = lastUpdate; 326 } 327 } 328 if (primary != null) { 329 primary.getExpectedStorageLocation().getDatanodeDescriptor().addBlockToBeRecovered(this); 330 primary.setChosenAsPrimary(true); 331 NameNode.blockStateChangeLog.info( 332 "BLOCK* {} recovery started, primary={}", this, primary); 333 } 334 } 335 336 void addReplicaIfNotPresent(DatanodeStorageInfo storage, 337 Block block, 338 ReplicaState rState) { 339 Iterator<ReplicaUnderConstruction> it = replicas.iterator(); 340 while (it.hasNext()) { 341 ReplicaUnderConstruction r = it.next(); 342 DatanodeStorageInfo expectedLocation = r.getExpectedStorageLocation(); 343 if(expectedLocation == storage) { 344 // Record the gen stamp from the report 345 r.setGenerationStamp(block.getGenerationStamp()); 346 return; 347 } else if (expectedLocation != null && 348 expectedLocation.getDatanodeDescriptor() == 349 storage.getDatanodeDescriptor()) { 350 351 // The Datanode reported that the block is on a different storage 352 // than the one chosen by BlockPlacementPolicy. This can occur as 353 // we allow Datanodes to choose the target storage. Update our 354 // state by removing the stale entry and adding a new one. 355 it.remove(); 356 break; 357 } 358 } 359 replicas.add(new ReplicaUnderConstruction(block, storage, rState)); 360 } 361 362 @Override // BlockInfo 363 // BlockInfoUnderConstruction participates in maps the same way as BlockInfo 364 public int hashCode() { 365 return super.hashCode(); 366 } 367 368 @Override // BlockInfo 369 public boolean equals(Object obj) { 370 // Sufficient to rely on super's implementation 371 return (this == obj) || super.equals(obj); 372 } 373 374 @Override 375 public String toString() { 376 final StringBuilder b = new StringBuilder(100); 377 appendStringTo(b); 378 return b.toString(); 379 } 380 381 @Override 382 public void appendStringTo(StringBuilder sb) { 383 super.appendStringTo(sb); 384 appendUCParts(sb); 385 } 386 387 private void appendUCParts(StringBuilder sb) { 388 sb.append("{UCState=").append(blockUCState) 389 .append(", truncateBlock=" + truncateBlock) 390 .append(", primaryNodeIndex=").append(primaryNodeIndex) 391 .append(", replicas=["); 392 if (replicas != null) { 393 Iterator<ReplicaUnderConstruction> iter = replicas.iterator(); 394 if (iter.hasNext()) { 395 iter.next().appendStringTo(sb); 396 while (iter.hasNext()) { 397 sb.append(", "); 398 iter.next().appendStringTo(sb); 399 } 400 } 401 } 402 sb.append("]}"); 403 } 404}