public class DistCpUtils extends Object
| Constructor and Description |
|---|
DistCpUtils() |
| Modifier and Type | Method and Description |
|---|---|
static void |
checkFileSystemAclSupport(org.apache.hadoop.fs.FileSystem fs)
Determines if a file system supports ACLs by running a canary getAclStatus
request on the file system root.
|
static void |
checkFileSystemXAttrSupport(org.apache.hadoop.fs.FileSystem fs)
Determines if a file system supports XAttrs by running a getXAttrs request
on the file system root.
|
static boolean |
checksumsAreEqual(org.apache.hadoop.fs.FileSystem sourceFS,
org.apache.hadoop.fs.Path source,
org.apache.hadoop.fs.FileChecksum sourceChecksum,
org.apache.hadoop.fs.FileSystem targetFS,
org.apache.hadoop.fs.Path target)
Utility to compare checksums for the paths specified.
|
static void |
compareFileLengthsAndChecksums(org.apache.hadoop.fs.FileSystem sourceFS,
org.apache.hadoop.fs.Path source,
org.apache.hadoop.fs.FileChecksum sourceChecksum,
org.apache.hadoop.fs.FileSystem targetFS,
org.apache.hadoop.fs.Path target,
boolean skipCrc)
Utility to compare file lengths and checksums for source and target.
|
static List<org.apache.hadoop.fs.permission.AclEntry> |
getAcl(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.FileStatus fileStatus)
Returns a file's full logical ACL.
|
static long |
getFileSize(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration configuration)
Retrieves size of the file at the specified path.
|
static DecimalFormat |
getFormatter() |
static int |
getInt(org.apache.hadoop.conf.Configuration configuration,
String label)
Utility to retrieve a specified key from a Configuration.
|
static long |
getLong(org.apache.hadoop.conf.Configuration configuration,
String label)
Utility to retrieve a specified key from a Configuration.
|
static String |
getRelativePath(org.apache.hadoop.fs.Path sourceRootPath,
org.apache.hadoop.fs.Path childPath)
Gets relative path of child path with respect to a root path
For ex.
|
static org.apache.hadoop.fs.Path |
getSplitChunkPath(org.apache.hadoop.fs.Path targetFile,
CopyListingFileStatus srcFileStatus) |
static Class<? extends org.apache.hadoop.mapreduce.InputFormat> |
getStrategy(org.apache.hadoop.conf.Configuration conf,
DistCpContext context)
Returns the class that implements a copy strategy.
|
static String |
getStringDescriptionFor(long nBytes) |
static Map<String,byte[]> |
getXAttrs(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.Path path)
Returns a file's all xAttrs.
|
static String |
packAttributes(EnumSet<DistCpOptions.FileAttribute> attributes)
Pack file preservation attributes into a string, containing
just the first character of each preservation attribute
|
static void |
preserve(org.apache.hadoop.fs.FileSystem targetFS,
org.apache.hadoop.fs.Path path,
CopyListingFileStatus srcFileStatus,
EnumSet<DistCpOptions.FileAttribute> attributes,
boolean preserveRawXattrs)
Preserve attribute on file matching that of the file status being sent
as argument.
|
static <T> void |
publish(org.apache.hadoop.conf.Configuration configuration,
String label,
T value)
Utility to publish a value to a configuration.
|
static org.apache.hadoop.fs.Path |
sortListing(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path sourceListing)
Sort sequence file containing FileStatus and Text as key and value
respectively.
|
static void |
sortListing(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path sourceListing,
org.apache.hadoop.fs.Path output)
Sort sequence file containing FileStatus and Text as key and value
respectively, saving the result to the
output path, which
will be deleted first. |
static LinkedList<CopyListingFileStatus> |
toCopyListingFileStatus(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.FileStatus fileStatus,
boolean preserveAcls,
boolean preserveXAttrs,
boolean preserveRawXAttrs,
int blocksPerChunk)
Converts FileStatus to a list of CopyListingFileStatus.
|
static CopyListingFileStatus |
toCopyListingFileStatusHelper(org.apache.hadoop.fs.FileSystem fileSystem,
org.apache.hadoop.fs.FileStatus fileStatus,
boolean preserveAcls,
boolean preserveXAttrs,
boolean preserveRawXAttrs,
long chunkOffset,
long chunkLength)
Converts a FileStatus to a CopyListingFileStatus.
|
static EnumSet<DistCpOptions.FileAttribute> |
unpackAttributes(String attributes)
Unpacks preservation attribute string containing the first character of
each preservation attribute back to a set of attributes to preserve
|
public static long getFileSize(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration configuration)
throws IOException
path - The path of the file whose size is sought.configuration - Configuration, to retrieve the appropriate FileSystem.IOExceptionpublic static <T> void publish(org.apache.hadoop.conf.Configuration configuration,
String label,
T value)
T - The type of the value.configuration - The Configuration to which the value must be written.label - The label for the value being published.value - The value being published.public static int getInt(org.apache.hadoop.conf.Configuration configuration,
String label)
configuration - The Configuration in which the key is sought.label - The key being sought.public static long getLong(org.apache.hadoop.conf.Configuration configuration,
String label)
configuration - The Configuration in which the key is sought.label - The key being sought.public static Class<? extends org.apache.hadoop.mapreduce.InputFormat> getStrategy(org.apache.hadoop.conf.Configuration conf, DistCpContext context)
conf - - Configuration objectcontext - - Distcp context with associated input optionspublic static String getRelativePath(org.apache.hadoop.fs.Path sourceRootPath, org.apache.hadoop.fs.Path childPath)
sourceRootPath - - Source root pathchildPath - - Path for which relative path is requiredpublic static String packAttributes(EnumSet<DistCpOptions.FileAttribute> attributes)
attributes - - Attribute set to preservepublic static EnumSet<DistCpOptions.FileAttribute> unpackAttributes(String attributes)
attributes - - Attribute stringpublic static void preserve(org.apache.hadoop.fs.FileSystem targetFS,
org.apache.hadoop.fs.Path path,
CopyListingFileStatus srcFileStatus,
EnumSet<DistCpOptions.FileAttribute> attributes,
boolean preserveRawXattrs)
throws IOException
targetFS - - File systempath - - Path that needs to preserve original file statussrcFileStatus - - Original file statusattributes - - Attribute set that needs to be preservedpreserveRawXattrs - if true, raw.* xattrs should be preservedIOException - - Exception if any (particularly relating to group/owner
change or any transient error)public static List<org.apache.hadoop.fs.permission.AclEntry> getAcl(org.apache.hadoop.fs.FileSystem fileSystem, org.apache.hadoop.fs.FileStatus fileStatus) throws IOException
fileSystem - FileSystem containing the filefileStatus - FileStatus of fileIOException - if there is an I/O errorpublic static Map<String,byte[]> getXAttrs(org.apache.hadoop.fs.FileSystem fileSystem, org.apache.hadoop.fs.Path path) throws IOException
fileSystem - FileSystem containing the filepath - file pathIOException - if there is an I/O errorpublic static LinkedList<CopyListingFileStatus> toCopyListingFileStatus(org.apache.hadoop.fs.FileSystem fileSystem, org.apache.hadoop.fs.FileStatus fileStatus, boolean preserveAcls, boolean preserveXAttrs, boolean preserveRawXAttrs, int blocksPerChunk) throws IOException
fileSystem - FileSystem containing the filefileStatus - FileStatus of filepreserveAcls - boolean true if preserving ACLspreserveXAttrs - boolean true if preserving XAttrspreserveRawXAttrs - boolean true if preserving raw.* XAttrsblocksPerChunk - size of chunks when copying chunks in parallelIOException - if there is an I/O errorpublic static CopyListingFileStatus toCopyListingFileStatusHelper(org.apache.hadoop.fs.FileSystem fileSystem, org.apache.hadoop.fs.FileStatus fileStatus, boolean preserveAcls, boolean preserveXAttrs, boolean preserveRawXAttrs, long chunkOffset, long chunkLength) throws IOException
fileSystem - FileSystem containing the filefileStatus - FileStatus of filepreserveAcls - boolean true if preserving ACLspreserveXAttrs - boolean true if preserving XAttrspreserveRawXAttrs - boolean true if preserving raw.* XAttrschunkOffset - chunk offset in byteschunkLength - chunk length in bytesIOException - if there is an I/O errorpublic static org.apache.hadoop.fs.Path sortListing(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path sourceListing)
throws IOException
conf - - ConfigurationsourceListing - - Source listing fileIOException - - Any exception during sort.public static void sortListing(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path sourceListing,
org.apache.hadoop.fs.Path output)
throws IOException
output path, which
will be deleted first.conf - - ConfigurationsourceListing - - Source listing fileoutput - output pathIOException - - Any exception during sort.public static void checkFileSystemAclSupport(org.apache.hadoop.fs.FileSystem fs)
throws CopyListing.AclsNotSupportedException
fs - FileSystem to checkCopyListing.AclsNotSupportedException - if fs does not support ACLspublic static void checkFileSystemXAttrSupport(org.apache.hadoop.fs.FileSystem fs)
throws CopyListing.XAttrsNotSupportedException
fs - FileSystem to checkCopyListing.XAttrsNotSupportedException - if fs does not support XAttrspublic static DecimalFormat getFormatter()
public static String getStringDescriptionFor(long nBytes)
public static boolean checksumsAreEqual(org.apache.hadoop.fs.FileSystem sourceFS,
org.apache.hadoop.fs.Path source,
org.apache.hadoop.fs.FileChecksum sourceChecksum,
org.apache.hadoop.fs.FileSystem targetFS,
org.apache.hadoop.fs.Path target)
throws IOException
sourceFS - FileSystem for the source path.source - The source path.sourceChecksum - The checksum of the source file. If it is null we
still need to retrieve it through sourceFS.targetFS - FileSystem for the target path.target - The target path.IOException - if there's an exception while retrieving checksums.public static void compareFileLengthsAndChecksums(org.apache.hadoop.fs.FileSystem sourceFS,
org.apache.hadoop.fs.Path source,
org.apache.hadoop.fs.FileChecksum sourceChecksum,
org.apache.hadoop.fs.FileSystem targetFS,
org.apache.hadoop.fs.Path target,
boolean skipCrc)
throws IOException
sourceFS - FileSystem for the source path.source - The source path.sourceChecksum - The checksum of the source file. If it is null we
still need to retrieve it through sourceFS.targetFS - FileSystem for the target path.target - The target path.skipCrc - The flag to indicate whether to skip checksums.IOException - if there's a mismatch in file lengths or checksums.public static org.apache.hadoop.fs.Path getSplitChunkPath(org.apache.hadoop.fs.Path targetFile,
CopyListingFileStatus srcFileStatus)
Copyright © 2008–2020 Apache Software Foundation. All rights reserved.