001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hdfs.server.blockmanagement;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Iterator;
023import java.util.List;
024
025import org.apache.hadoop.hdfs.protocol.Block;
026import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
027import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
028import org.apache.hadoop.hdfs.server.namenode.NameNode;
029
030/**
031 * Represents a block that is currently being constructed.<br>
032 * This is usually the last block of a file opened for write or append.
033 */
034public class BlockInfoContiguousUnderConstruction extends BlockInfoContiguous {
035  /** Block state. See {@link BlockUCState} */
036  private BlockUCState blockUCState;
037
038  /**
039   * Block replicas as assigned when the block was allocated.
040   * This defines the pipeline order.
041   */
042  private List<ReplicaUnderConstruction> replicas;
043
044  /**
045   * Index of the primary data node doing the recovery. Useful for log
046   * messages.
047   */
048  private int primaryNodeIndex = -1;
049
050  /**
051   * The new generation stamp, which this block will have
052   * after the recovery succeeds. Also used as a recovery id to identify
053   * the right recovery if any of the abandoned recoveries re-appear.
054   */
055  private long blockRecoveryId = 0;
056
057  /**
058   * The block source to use in the event of copy-on-write truncate.
059   */
060  private Block truncateBlock;
061
062  /**
063   * ReplicaUnderConstruction contains information about replicas while
064   * they are under construction.
065   * The GS, the length and the state of the replica is as reported by 
066   * the data-node.
067   * It is not guaranteed, but expected, that data-nodes actually have
068   * corresponding replicas.
069   */
070  static class ReplicaUnderConstruction extends Block {
071    private final DatanodeStorageInfo expectedLocation;
072    private ReplicaState state;
073    private boolean chosenAsPrimary;
074
075    ReplicaUnderConstruction(Block block,
076                             DatanodeStorageInfo target,
077                             ReplicaState state) {
078      super(block);
079      this.expectedLocation = target;
080      this.state = state;
081      this.chosenAsPrimary = false;
082    }
083
084    /**
085     * Expected block replica location as assigned when the block was allocated.
086     * This defines the pipeline order.
087     * It is not guaranteed, but expected, that the data-node actually has
088     * the replica.
089     */
090    public DatanodeStorageInfo getExpectedStorageLocation() {
091      return expectedLocation;
092    }
093
094    /**
095     * Get replica state as reported by the data-node.
096     */
097    ReplicaState getState() {
098      return state;
099    }
100
101    /**
102     * Whether the replica was chosen for recovery.
103     */
104    boolean getChosenAsPrimary() {
105      return chosenAsPrimary;
106    }
107
108    /**
109     * Set replica state.
110     */
111    void setState(ReplicaState s) {
112      state = s;
113    }
114
115    /**
116     * Set whether this replica was chosen for recovery.
117     */
118    void setChosenAsPrimary(boolean chosenAsPrimary) {
119      this.chosenAsPrimary = chosenAsPrimary;
120    }
121
122    /**
123     * Is data-node the replica belongs to alive.
124     */
125    boolean isAlive() {
126      return expectedLocation.getDatanodeDescriptor().isAlive;
127    }
128
129    @Override // Block
130    public int hashCode() {
131      return super.hashCode();
132    }
133
134    @Override // Block
135    public boolean equals(Object obj) {
136      // Sufficient to rely on super's implementation
137      return (this == obj) || super.equals(obj);
138    }
139
140    @Override
141    public String toString() {
142      final StringBuilder b = new StringBuilder(50);
143      appendStringTo(b);
144      return b.toString();
145    }
146    
147    @Override
148    public void appendStringTo(StringBuilder sb) {
149      sb.append("ReplicaUC[")
150        .append(expectedLocation)
151        .append("|")
152        .append(state)
153        .append("]");
154    }
155  }
156
157  /**
158   * Create block and set its state to
159   * {@link BlockUCState#UNDER_CONSTRUCTION}.
160   */
161  public BlockInfoContiguousUnderConstruction(Block blk, short replication) {
162    this(blk, replication, BlockUCState.UNDER_CONSTRUCTION, null);
163  }
164
165  /**
166   * Create a block that is currently being constructed.
167   */
168  public BlockInfoContiguousUnderConstruction(Block blk, short replication, BlockUCState state, DatanodeStorageInfo[] targets) {
169    super(blk, replication);
170    assert getBlockUCState() != BlockUCState.COMPLETE :
171      "BlockInfoUnderConstruction cannot be in COMPLETE state";
172    this.blockUCState = state;
173    setExpectedLocations(targets);
174  }
175
176  /**
177   * Convert an under construction block to a complete block.
178   * 
179   * @return BlockInfo - a complete block.
180   * @throws IOException if the state of the block 
181   * (the generation stamp and the length) has not been committed by 
182   * the client or it does not have at least a minimal number of replicas 
183   * reported from data-nodes. 
184   */
185  BlockInfoContiguous convertToCompleteBlock() throws IOException {
186    assert getBlockUCState() != BlockUCState.COMPLETE :
187      "Trying to convert a COMPLETE block";
188    return new BlockInfoContiguous(this);
189  }
190
191  /** Set expected locations */
192  public void setExpectedLocations(DatanodeStorageInfo[] targets) {
193    int numLocations = targets == null ? 0 : targets.length;
194    this.replicas = new ArrayList<ReplicaUnderConstruction>(numLocations);
195    for(int i = 0; i < numLocations; i++)
196      replicas.add(
197        new ReplicaUnderConstruction(this, targets[i], ReplicaState.RBW));
198  }
199
200  /**
201   * Create array of expected replica locations
202   * (as has been assigned by chooseTargets()).
203   */
204  public DatanodeStorageInfo[] getExpectedStorageLocations() {
205    int numLocations = replicas == null ? 0 : replicas.size();
206    DatanodeStorageInfo[] storages = new DatanodeStorageInfo[numLocations];
207    for(int i = 0; i < numLocations; i++)
208      storages[i] = replicas.get(i).getExpectedStorageLocation();
209    return storages;
210  }
211
212  /** Get the number of expected locations */
213  public int getNumExpectedLocations() {
214    return replicas == null ? 0 : replicas.size();
215  }
216
217  /**
218   * Return the state of the block under construction.
219   * @see BlockUCState
220   */
221  @Override // BlockInfo
222  public BlockUCState getBlockUCState() {
223    return blockUCState;
224  }
225
226  void setBlockUCState(BlockUCState s) {
227    blockUCState = s;
228  }
229
230  /** Get block recovery ID */
231  public long getBlockRecoveryId() {
232    return blockRecoveryId;
233  }
234
235  /** Get recover block */
236  public Block getTruncateBlock() {
237    return truncateBlock;
238  }
239
240  public void setTruncateBlock(Block recoveryBlock) {
241    this.truncateBlock = recoveryBlock;
242  }
243
244  /**
245   * Process the recorded replicas. When about to commit or finish the
246   * pipeline recovery sort out bad replicas.
247   * @param genStamp  The final generation stamp for the block.
248   * @return staleReplica's List.
249   */
250  public List<ReplicaUnderConstruction> setGenerationStampAndVerifyReplicas(
251      long genStamp) {
252    // Set the generation stamp for the block.
253    setGenerationStamp(genStamp);
254    if (replicas == null)
255      return null;
256
257    List<ReplicaUnderConstruction> staleReplicas = new ArrayList<>();
258    // Remove replicas with wrong gen stamp. The replica list is unchanged.
259    for (ReplicaUnderConstruction r : replicas) {
260      if (genStamp != r.getGenerationStamp()) {
261        staleReplicas.add(r);
262      }
263    }
264    return staleReplicas;
265  }
266
267  /**
268   * Commit block's length and generation stamp as reported by the client.
269   * Set block state to {@link BlockUCState#COMMITTED}.
270   * @param block - contains client reported block length and generation 
271   * @return staleReplica's List.
272   * @throws IOException if block ids are inconsistent.
273   */
274  List<ReplicaUnderConstruction> commitBlock(Block block) throws IOException {
275    if(getBlockId() != block.getBlockId())
276      throw new IOException("Trying to commit inconsistent block: id = "
277          + block.getBlockId() + ", expected id = " + getBlockId());
278    blockUCState = BlockUCState.COMMITTED;
279    this.setNumBytes(block.getNumBytes());
280    // Sort out invalid replicas.
281    return setGenerationStampAndVerifyReplicas(block.getGenerationStamp());
282  }
283
284  /**
285   * Initialize lease recovery for this block.
286   * Find the first alive data-node starting from the previous primary and
287   * make it primary.
288   */
289  public void initializeBlockRecovery(long recoveryId) {
290    setBlockUCState(BlockUCState.UNDER_RECOVERY);
291    blockRecoveryId = recoveryId;
292    if (replicas.size() == 0) {
293      NameNode.blockStateChangeLog.warn("BLOCK*"
294        + " BlockInfoUnderConstruction.initLeaseRecovery:"
295        + " No blocks found, lease removed.");
296    }
297    boolean allLiveReplicasTriedAsPrimary = true;
298    for (int i = 0; i < replicas.size(); i++) {
299      // Check if all replicas have been tried or not.
300      if (replicas.get(i).isAlive()) {
301        allLiveReplicasTriedAsPrimary =
302            (allLiveReplicasTriedAsPrimary && replicas.get(i).getChosenAsPrimary());
303      }
304    }
305    if (allLiveReplicasTriedAsPrimary) {
306      // Just set all the replicas to be chosen whether they are alive or not.
307      for (int i = 0; i < replicas.size(); i++) {
308        replicas.get(i).setChosenAsPrimary(false);
309      }
310    }
311    long mostRecentLastUpdate = 0;
312    ReplicaUnderConstruction primary = null;
313    primaryNodeIndex = -1;
314    for(int i = 0; i < replicas.size(); i++) {
315      // Skip alive replicas which have been chosen for recovery.
316      if (!(replicas.get(i).isAlive() && !replicas.get(i).getChosenAsPrimary())) {
317        continue;
318      }
319      final ReplicaUnderConstruction ruc = replicas.get(i);
320      final long lastUpdate = ruc.getExpectedStorageLocation()
321          .getDatanodeDescriptor().getLastUpdateMonotonic();
322      if (lastUpdate > mostRecentLastUpdate) {
323        primaryNodeIndex = i;
324        primary = ruc;
325        mostRecentLastUpdate = lastUpdate;
326      }
327    }
328    if (primary != null) {
329      primary.getExpectedStorageLocation().getDatanodeDescriptor().addBlockToBeRecovered(this);
330      primary.setChosenAsPrimary(true);
331      NameNode.blockStateChangeLog.info(
332          "BLOCK* {} recovery started, primary={}", this, primary);
333    }
334  }
335
336  void addReplicaIfNotPresent(DatanodeStorageInfo storage,
337                     Block block,
338                     ReplicaState rState) {
339    Iterator<ReplicaUnderConstruction> it = replicas.iterator();
340    while (it.hasNext()) {
341      ReplicaUnderConstruction r = it.next();
342      DatanodeStorageInfo expectedLocation = r.getExpectedStorageLocation();
343      if(expectedLocation == storage) {
344        // Record the gen stamp from the report
345        r.setGenerationStamp(block.getGenerationStamp());
346        return;
347      } else if (expectedLocation != null &&
348                 expectedLocation.getDatanodeDescriptor() ==
349                     storage.getDatanodeDescriptor()) {
350
351        // The Datanode reported that the block is on a different storage
352        // than the one chosen by BlockPlacementPolicy. This can occur as
353        // we allow Datanodes to choose the target storage. Update our
354        // state by removing the stale entry and adding a new one.
355        it.remove();
356        break;
357      }
358    }
359    replicas.add(new ReplicaUnderConstruction(block, storage, rState));
360  }
361
362  @Override // BlockInfo
363  // BlockInfoUnderConstruction participates in maps the same way as BlockInfo
364  public int hashCode() {
365    return super.hashCode();
366  }
367
368  @Override // BlockInfo
369  public boolean equals(Object obj) {
370    // Sufficient to rely on super's implementation
371    return (this == obj) || super.equals(obj);
372  }
373
374  @Override
375  public String toString() {
376    final StringBuilder b = new StringBuilder(100);
377    appendStringTo(b);
378    return b.toString();
379  }
380
381  @Override
382  public void appendStringTo(StringBuilder sb) {
383    super.appendStringTo(sb);
384    appendUCParts(sb);
385  }
386
387  private void appendUCParts(StringBuilder sb) {
388    sb.append("{UCState=").append(blockUCState)
389      .append(", truncateBlock=" + truncateBlock)
390      .append(", primaryNodeIndex=").append(primaryNodeIndex)
391      .append(", replicas=[");
392    if (replicas != null) {
393      Iterator<ReplicaUnderConstruction> iter = replicas.iterator();
394      if (iter.hasNext()) {
395        iter.next().appendStringTo(sb);
396        while (iter.hasNext()) {
397          sb.append(", ");
398          iter.next().appendStringTo(sb);
399        }
400      }
401    }
402    sb.append("]}");
403  }
404}