001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.datanode.fsdataset; 019 020 021import java.io.EOFException; 022import java.io.File; 023import java.io.FileDescriptor; 024import java.io.FileNotFoundException; 025import java.io.IOException; 026import java.io.InputStream; 027import java.util.Collection; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031 032import org.apache.hadoop.classification.InterfaceAudience; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.fs.StorageType; 035import org.apache.hadoop.hdfs.DFSConfigKeys; 036import org.apache.hadoop.hdfs.protocol.Block; 037import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; 038import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo; 039import org.apache.hadoop.hdfs.protocol.ExtendedBlock; 040import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata; 041import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; 042import org.apache.hadoop.hdfs.server.datanode.DataNode; 043import org.apache.hadoop.hdfs.server.datanode.DataStorage; 044import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica; 045import org.apache.hadoop.hdfs.server.datanode.Replica; 046import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipelineInterface; 047import org.apache.hadoop.hdfs.server.datanode.ReplicaHandler; 048import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; 049import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException; 050import org.apache.hadoop.hdfs.server.datanode.StorageLocation; 051import org.apache.hadoop.hdfs.server.datanode.UnexpectedReplicaStateException; 052import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetFactory; 053import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; 054import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean; 055import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock; 056import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; 057import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; 058import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo; 059import org.apache.hadoop.hdfs.server.protocol.StorageReport; 060import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; 061import org.apache.hadoop.util.DiskChecker.DiskErrorException; 062import org.apache.hadoop.util.ReflectionUtils; 063 064/** 065 * This is a service provider interface for the underlying storage that 066 * stores replicas for a data node. 067 * The default implementation stores replicas on local drives. 068 */ 069@InterfaceAudience.Private 070public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean { 071 /** 072 * A factory for creating {@link FsDatasetSpi} objects. 073 */ 074 public static abstract class Factory<D extends FsDatasetSpi<?>> { 075 /** @return the configured factory. */ 076 public static Factory<?> getFactory(Configuration conf) { 077 @SuppressWarnings("rawtypes") 078 final Class<? extends Factory> clazz = conf.getClass( 079 DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY, 080 FsDatasetFactory.class, 081 Factory.class); 082 return ReflectionUtils.newInstance(clazz, conf); 083 } 084 085 /** Create a new object. */ 086 public abstract D newInstance(DataNode datanode, DataStorage storage, 087 Configuration conf) throws IOException; 088 089 /** Does the factory create simulated objects? */ 090 public boolean isSimulated() { 091 return false; 092 } 093 } 094 095 /** @return a list of volumes. */ 096 public List<V> getVolumes(); 097 098 /** 099 * Add a new volume to the FsDataset.<p/> 100 * 101 * If the FSDataset supports block scanning, this function registers 102 * the new volume with the block scanner. 103 * 104 * @param location The storage location for the new volume. 105 * @param nsInfos Namespace information for the new volume. 106 */ 107 public void addVolume( 108 final StorageLocation location, 109 final List<NamespaceInfo> nsInfos) throws IOException; 110 111 /** 112 * Removes a collection of volumes from FsDataset. 113 * 114 * If the FSDataset supports block scanning, this function removes 115 * the volumes from the block scanner. 116 * 117 * @param volumes The paths of the volumes to be removed. 118 * @param clearFailure set true to clear the failure information about the 119 * volumes. 120 */ 121 public void removeVolumes(Set<File> volumes, boolean clearFailure); 122 123 /** @return a storage with the given storage ID */ 124 public DatanodeStorage getStorage(final String storageUuid); 125 126 /** @return one or more storage reports for attached volumes. */ 127 public StorageReport[] getStorageReports(String bpid) 128 throws IOException; 129 130 /** @return the volume that contains a replica of the block. */ 131 public V getVolume(ExtendedBlock b); 132 133 /** @return a volume information map (name => info). */ 134 public Map<String, Object> getVolumeInfoMap(); 135 136 /** 137 * Returns info about volume failures. 138 * 139 * @return info about volume failures, possibly null 140 */ 141 VolumeFailureSummary getVolumeFailureSummary(); 142 143 /** @return a list of finalized blocks for the given block pool. */ 144 public List<FinalizedReplica> getFinalizedBlocks(String bpid); 145 146 /** @return a list of finalized blocks for the given block pool. */ 147 public List<FinalizedReplica> getFinalizedBlocksOnPersistentStorage(String bpid); 148 149 /** 150 * Check whether the in-memory block record matches the block on the disk, 151 * and, in case that they are not matched, update the record or mark it 152 * as corrupted. 153 */ 154 public void checkAndUpdate(String bpid, long blockId, File diskFile, 155 File diskMetaFile, FsVolumeSpi vol) throws IOException; 156 157 /** 158 * @param b - the block 159 * @return a stream if the meta-data of the block exists; 160 * otherwise, return null. 161 * @throws IOException 162 */ 163 public LengthInputStream getMetaDataInputStream(ExtendedBlock b 164 ) throws IOException; 165 166 /** 167 * Returns the specified block's on-disk length (excluding metadata) 168 * @return the specified block's on-disk length (excluding metadta) 169 * @throws IOException on error 170 */ 171 public long getLength(ExtendedBlock b) throws IOException; 172 173 /** 174 * Get reference to the replica meta info in the replicasMap. 175 * To be called from methods that are synchronized on {@link FSDataset} 176 * @return replica from the replicas map 177 */ 178 @Deprecated 179 public Replica getReplica(String bpid, long blockId); 180 181 /** 182 * @return replica meta information 183 */ 184 public String getReplicaString(String bpid, long blockId); 185 186 /** 187 * @return the generation stamp stored with the block. 188 */ 189 public Block getStoredBlock(String bpid, long blkid) throws IOException; 190 191 /** 192 * Returns an input stream at specified offset of the specified block 193 * @param b block 194 * @param seekOffset offset with in the block to seek to 195 * @return an input stream to read the contents of the specified block, 196 * starting at the offset 197 * @throws IOException 198 */ 199 public InputStream getBlockInputStream(ExtendedBlock b, long seekOffset) 200 throws IOException; 201 202 /** 203 * Returns an input stream at specified offset of the specified block 204 * The block is still in the tmp directory and is not finalized 205 * @return an input stream to read the contents of the specified block, 206 * starting at the offset 207 * @throws IOException 208 */ 209 public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkoff, 210 long ckoff) throws IOException; 211 212 /** 213 * Creates a temporary replica and returns the meta information of the replica 214 * @param b block 215 * @param isTransfer whether for transfer 216 * 217 * @return the meta info of the replica which is being written to 218 * @throws IOException if an error occurs 219 */ 220 public ReplicaHandler createTemporary(StorageType storageType, 221 ExtendedBlock b, boolean isTransfer) throws IOException; 222 223 /** 224 * Creates a RBW replica and returns the meta info of the replica 225 * 226 * @param b block 227 * @return the meta info of the replica which is being written to 228 * @throws IOException if an error occurs 229 */ 230 public ReplicaHandler createRbw(StorageType storageType, 231 ExtendedBlock b, boolean allowLazyPersist) throws IOException; 232 233 /** 234 * Recovers a RBW replica and returns the meta info of the replica 235 * 236 * @param b block 237 * @param newGS the new generation stamp for the replica 238 * @param minBytesRcvd the minimum number of bytes that the replica could have 239 * @param maxBytesRcvd the maximum number of bytes that the replica could have 240 * @return the meta info of the replica which is being written to 241 * @throws IOException if an error occurs 242 */ 243 public ReplicaHandler recoverRbw(ExtendedBlock b, 244 long newGS, long minBytesRcvd, long maxBytesRcvd) throws IOException; 245 246 /** 247 * Covert a temporary replica to a RBW. 248 * @param temporary the temporary replica being converted 249 * @return the result RBW 250 */ 251 public ReplicaInPipelineInterface convertTemporaryToRbw( 252 ExtendedBlock temporary) throws IOException; 253 254 /** 255 * Append to a finalized replica and returns the meta info of the replica 256 * 257 * @param b block 258 * @param newGS the new generation stamp for the replica 259 * @param expectedBlockLen the number of bytes the replica is expected to have 260 * @return the meata info of the replica which is being written to 261 * @throws IOException 262 */ 263 public ReplicaHandler append(ExtendedBlock b, long newGS, 264 long expectedBlockLen) throws IOException; 265 266 /** 267 * Recover a failed append to a finalized replica 268 * and returns the meta info of the replica 269 * 270 * @param b block 271 * @param newGS the new generation stamp for the replica 272 * @param expectedBlockLen the number of bytes the replica is expected to have 273 * @return the meta info of the replica which is being written to 274 * @throws IOException 275 */ 276 public ReplicaHandler recoverAppend( 277 ExtendedBlock b, long newGS, long expectedBlockLen) throws IOException; 278 279 /** 280 * Recover a failed pipeline close 281 * It bumps the replica's generation stamp and finalize it if RBW replica 282 * 283 * @param b block 284 * @param newGS the new generation stamp for the replica 285 * @param expectedBlockLen the number of bytes the replica is expected to have 286 * @return the storage uuid of the replica. 287 * @throws IOException 288 */ 289 Replica recoverClose(ExtendedBlock b, long newGS, long expectedBlockLen 290 ) throws IOException; 291 292 /** 293 * Finalizes the block previously opened for writing using writeToBlock. 294 * The block size is what is in the parameter b and it must match the amount 295 * of data written 296 * @param block Block to be finalized 297 * @param fsyncDir whether to sync the directory changes to durable device. 298 * @throws IOException 299 * @throws ReplicaNotFoundException if the replica can not be found when the 300 * block is been finalized. For instance, the block resides on an HDFS volume 301 * that has been removed. 302 */ 303 void finalizeBlock(ExtendedBlock b, boolean fsyncDir) throws IOException; 304 305 /** 306 * Unfinalizes the block previously opened for writing using writeToBlock. 307 * The temporary file associated with this block is deleted. 308 * @throws IOException 309 */ 310 public void unfinalizeBlock(ExtendedBlock b) throws IOException; 311 312 /** 313 * Returns one block report per volume. 314 * @param bpid Block Pool Id 315 * @return - a map of DatanodeStorage to block report for the volume. 316 */ 317 public Map<DatanodeStorage, BlockListAsLongs> getBlockReports(String bpid); 318 319 /** 320 * Returns the cache report - the full list of cached block IDs of a 321 * block pool. 322 * @param bpid Block Pool Id 323 * @return the cache report - the full list of cached block IDs. 324 */ 325 public List<Long> getCacheReport(String bpid); 326 327 /** Does the dataset contain the block? */ 328 public boolean contains(ExtendedBlock block); 329 330 /** 331 * Check if a block is valid. 332 * 333 * @param b The block to check. 334 * @param minLength The minimum length that the block must have. May be 0. 335 * @param state If this is null, it is ignored. If it is non-null, we 336 * will check that the replica has this state. 337 * 338 * @throws ReplicaNotFoundException If the replica is not found 339 * 340 * @throws UnexpectedReplicaStateException If the replica is not in the 341 * expected state. 342 * @throws FileNotFoundException If the block file is not found or there 343 * was an error locating it. 344 * @throws EOFException If the replica length is too short. 345 * 346 * @throws IOException May be thrown from the methods called. 347 */ 348 public void checkBlock(ExtendedBlock b, long minLength, ReplicaState state) 349 throws ReplicaNotFoundException, UnexpectedReplicaStateException, 350 FileNotFoundException, EOFException, IOException; 351 352 353 /** 354 * Is the block valid? 355 * @return - true if the specified block is valid 356 */ 357 public boolean isValidBlock(ExtendedBlock b); 358 359 /** 360 * Is the block a valid RBW? 361 * @return - true if the specified block is a valid RBW 362 */ 363 public boolean isValidRbw(ExtendedBlock b); 364 365 /** 366 * Invalidates the specified blocks 367 * @param bpid Block pool Id 368 * @param invalidBlks - the blocks to be invalidated 369 * @throws IOException 370 */ 371 public void invalidate(String bpid, Block invalidBlks[]) throws IOException; 372 373 /** 374 * Caches the specified blocks 375 * @param bpid Block pool id 376 * @param blockIds - block ids to cache 377 */ 378 public void cache(String bpid, long[] blockIds); 379 380 /** 381 * Uncaches the specified blocks 382 * @param bpid Block pool id 383 * @param blockIds - blocks ids to uncache 384 */ 385 public void uncache(String bpid, long[] blockIds); 386 387 /** 388 * Determine if the specified block is cached. 389 * @param bpid Block pool id 390 * @param blockIds - block id 391 * @return true if the block is cached 392 */ 393 public boolean isCached(String bpid, long blockId); 394 395 /** 396 * Check if all the data directories are healthy 397 * @return A set of unhealthy data directories. 398 */ 399 public Set<File> checkDataDir(); 400 401 /** 402 * Shutdown the FSDataset 403 */ 404 public void shutdown(); 405 406 /** 407 * Sets the file pointer of the checksum stream so that the last checksum 408 * will be overwritten 409 * @param b block 410 * @param outs The streams for the data file and checksum file 411 * @param checksumSize number of bytes each checksum has 412 * @throws IOException 413 */ 414 public void adjustCrcChannelPosition(ExtendedBlock b, 415 ReplicaOutputStreams outs, int checksumSize) throws IOException; 416 417 /** 418 * Checks how many valid storage volumes there are in the DataNode. 419 * @return true if more than the minimum number of valid volumes are left 420 * in the FSDataSet. 421 */ 422 public boolean hasEnoughResource(); 423 424 /** 425 * Get visible length of the specified replica. 426 */ 427 long getReplicaVisibleLength(final ExtendedBlock block) throws IOException; 428 429 /** 430 * Initialize a replica recovery. 431 * @return actual state of the replica on this data-node or 432 * null if data-node does not have the replica. 433 */ 434 public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock 435 ) throws IOException; 436 437 /** 438 * Update replica's generation stamp and length and finalize it. 439 * @return the ID of storage that stores the block 440 */ 441 Replica updateReplicaUnderRecovery(ExtendedBlock oldBlock, 442 long recoveryId, long newBlockId, long newLength) throws IOException; 443 444 /** 445 * add new block pool ID 446 * @param bpid Block pool Id 447 * @param conf Configuration 448 */ 449 public void addBlockPool(String bpid, Configuration conf) throws IOException; 450 451 /** 452 * Shutdown and remove the block pool from underlying storage. 453 * @param bpid Block pool Id to be removed 454 */ 455 public void shutdownBlockPool(String bpid) ; 456 457 /** 458 * Deletes the block pool directories. If force is false, directories are 459 * deleted only if no block files exist for the block pool. If force 460 * is true entire directory for the blockpool is deleted along with its 461 * contents. 462 * @param bpid BlockPool Id to be deleted. 463 * @param force If force is false, directories are deleted only if no 464 * block files exist for the block pool, otherwise entire 465 * directory for the blockpool is deleted along with its contents. 466 * @throws IOException 467 */ 468 public void deleteBlockPool(String bpid, boolean force) throws IOException; 469 470 /** 471 * Get {@link BlockLocalPathInfo} for the given block. 472 */ 473 public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b 474 ) throws IOException; 475 476 /** 477 * Get a {@link HdfsBlocksMetadata} corresponding to the list of blocks in 478 * <code>blocks</code>. 479 * 480 * @param bpid pool to query 481 * @param blockIds List of block ids for which to return metadata 482 * @return metadata Metadata for the list of blocks 483 * @throws IOException 484 */ 485 public HdfsBlocksMetadata getHdfsBlocksMetadata(String bpid, 486 long[] blockIds) throws IOException; 487 488 /** 489 * Enable 'trash' for the given dataset. When trash is enabled, files are 490 * moved to a separate trash directory instead of being deleted immediately. 491 * This can be useful for example during rolling upgrades. 492 */ 493 public void enableTrash(String bpid); 494 495 /** 496 * Clear trash 497 */ 498 public void clearTrash(String bpid); 499 500 /** 501 * @return true when trash is enabled 502 */ 503 public boolean trashEnabled(String bpid); 504 505 /** 506 * Create a marker file indicating that a rolling upgrade is in progress. 507 */ 508 public void setRollingUpgradeMarker(String bpid) throws IOException; 509 510 /** 511 * Delete the rolling upgrade marker file if it exists. 512 * @param bpid 513 */ 514 public void clearRollingUpgradeMarker(String bpid) throws IOException; 515 516 /** 517 * submit a sync_file_range request to AsyncDiskService 518 */ 519 public void submitBackgroundSyncFileRangeRequest(final ExtendedBlock block, 520 final FileDescriptor fd, final long offset, final long nbytes, 521 final int flags); 522 523 /** 524 * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task end 525 */ 526 public void onCompleteLazyPersist(String bpId, long blockId, 527 long creationTime, File[] savedFiles, V targetVolume); 528 529 /** 530 * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task fail 531 */ 532 public void onFailLazyPersist(String bpId, long blockId); 533 534 /** 535 * Move block from one storage to another storage 536 */ 537 public ReplicaInfo moveBlockAcrossStorage(final ExtendedBlock block, 538 StorageType targetStorageType) throws IOException; 539 540 /** 541 * Set a block to be pinned on this datanode so that it cannot be moved 542 * by Balancer/Mover. 543 * 544 * It is a no-op when dfs.datanode.block-pinning.enabled is set to false. 545 */ 546 public void setPinning(ExtendedBlock block) throws IOException; 547 548 /** 549 * Check whether the block was pinned 550 */ 551 public boolean getPinning(ExtendedBlock block) throws IOException; 552 553 /** 554 * Confirm whether the block is deleting 555 */ 556 public boolean isDeletingBlock(String bpid, long blockId); 557}