001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hdfs.server.datanode.fsdataset; 019 020import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT; 021import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY; 022import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT; 023import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY; 024 025import java.io.IOException; 026import java.util.ArrayList; 027import java.util.List; 028import java.util.Random; 029 030import org.apache.commons.logging.Log; 031import org.apache.commons.logging.LogFactory; 032import org.apache.hadoop.conf.Configurable; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; 035 036/** 037 * A DN volume choosing policy which takes into account the amount of free 038 * space on each of the available volumes when considering where to assign a 039 * new replica allocation. By default this policy prefers assigning replicas to 040 * those volumes with more available free space, so as to over time balance the 041 * available space of all the volumes within a DN. 042 */ 043public class AvailableSpaceVolumeChoosingPolicy<V extends FsVolumeSpi> 044 implements VolumeChoosingPolicy<V>, Configurable { 045 046 private static final Log LOG = LogFactory.getLog(AvailableSpaceVolumeChoosingPolicy.class); 047 048 private final Random random; 049 050 private long balancedSpaceThreshold = DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT; 051 private float balancedPreferencePercent = DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT; 052 053 AvailableSpaceVolumeChoosingPolicy(Random random) { 054 this.random = random; 055 } 056 057 public AvailableSpaceVolumeChoosingPolicy() { 058 this(new Random()); 059 } 060 061 @Override 062 public synchronized void setConf(Configuration conf) { 063 balancedSpaceThreshold = conf.getLong( 064 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY, 065 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_DEFAULT); 066 balancedPreferencePercent = conf.getFloat( 067 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY, 068 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_DEFAULT); 069 070 LOG.info("Available space volume choosing policy initialized: " + 071 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_THRESHOLD_KEY + 072 " = " + balancedSpaceThreshold + ", " + 073 DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + 074 " = " + balancedPreferencePercent); 075 076 if (balancedPreferencePercent > 1.0) { 077 LOG.warn("The value of " + DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + 078 " is greater than 1.0 but should be in the range 0.0 - 1.0"); 079 } 080 081 if (balancedPreferencePercent < 0.5) { 082 LOG.warn("The value of " + DFS_DATANODE_AVAILABLE_SPACE_VOLUME_CHOOSING_POLICY_BALANCED_SPACE_PREFERENCE_FRACTION_KEY + 083 " is less than 0.5 so volumes with less available disk space will receive more block allocations"); 084 } 085 } 086 087 @Override 088 public synchronized Configuration getConf() { 089 // Nothing to do. Only added to fulfill the Configurable contract. 090 return null; 091 } 092 093 private final VolumeChoosingPolicy<V> roundRobinPolicyBalanced = 094 new RoundRobinVolumeChoosingPolicy<V>(); 095 private final VolumeChoosingPolicy<V> roundRobinPolicyHighAvailable = 096 new RoundRobinVolumeChoosingPolicy<V>(); 097 private final VolumeChoosingPolicy<V> roundRobinPolicyLowAvailable = 098 new RoundRobinVolumeChoosingPolicy<V>(); 099 100 @Override 101 public synchronized V chooseVolume(List<V> volumes, 102 long replicaSize) throws IOException { 103 if (volumes.size() < 1) { 104 throw new DiskOutOfSpaceException("No more available volumes"); 105 } 106 107 AvailableSpaceVolumeList volumesWithSpaces = 108 new AvailableSpaceVolumeList(volumes); 109 110 if (volumesWithSpaces.areAllVolumesWithinFreeSpaceThreshold()) { 111 // If they're actually not too far out of whack, fall back on pure round 112 // robin. 113 V volume = roundRobinPolicyBalanced.chooseVolume(volumes, replicaSize); 114 if (LOG.isDebugEnabled()) { 115 LOG.debug("All volumes are within the configured free space balance " + 116 "threshold. Selecting " + volume + " for write of block size " + 117 replicaSize); 118 } 119 return volume; 120 } else { 121 V volume = null; 122 // If none of the volumes with low free space have enough space for the 123 // replica, always try to choose a volume with a lot of free space. 124 long mostAvailableAmongLowVolumes = volumesWithSpaces 125 .getMostAvailableSpaceAmongVolumesWithLowAvailableSpace(); 126 127 List<V> highAvailableVolumes = extractVolumesFromPairs( 128 volumesWithSpaces.getVolumesWithHighAvailableSpace()); 129 List<V> lowAvailableVolumes = extractVolumesFromPairs( 130 volumesWithSpaces.getVolumesWithLowAvailableSpace()); 131 132 float preferencePercentScaler = 133 (highAvailableVolumes.size() * balancedPreferencePercent) + 134 (lowAvailableVolumes.size() * (1 - balancedPreferencePercent)); 135 float scaledPreferencePercent = 136 (highAvailableVolumes.size() * balancedPreferencePercent) / 137 preferencePercentScaler; 138 if (mostAvailableAmongLowVolumes < replicaSize || 139 random.nextFloat() < scaledPreferencePercent) { 140 volume = roundRobinPolicyHighAvailable.chooseVolume( 141 highAvailableVolumes, replicaSize); 142 if (LOG.isDebugEnabled()) { 143 LOG.debug("Volumes are imbalanced. Selecting " + volume + 144 " from high available space volumes for write of block size " 145 + replicaSize); 146 } 147 } else { 148 volume = roundRobinPolicyLowAvailable.chooseVolume( 149 lowAvailableVolumes, replicaSize); 150 if (LOG.isDebugEnabled()) { 151 LOG.debug("Volumes are imbalanced. Selecting " + volume + 152 " from low available space volumes for write of block size " 153 + replicaSize); 154 } 155 } 156 return volume; 157 } 158 } 159 160 /** 161 * Used to keep track of the list of volumes we're choosing from. 162 */ 163 private class AvailableSpaceVolumeList { 164 private final List<AvailableSpaceVolumePair> volumes; 165 166 public AvailableSpaceVolumeList(List<V> volumes) throws IOException { 167 this.volumes = new ArrayList<AvailableSpaceVolumePair>(); 168 for (V volume : volumes) { 169 this.volumes.add(new AvailableSpaceVolumePair(volume)); 170 } 171 } 172 173 /** 174 * @return true if all volumes' free space is within the 175 * configured threshold, false otherwise. 176 */ 177 public boolean areAllVolumesWithinFreeSpaceThreshold() { 178 long leastAvailable = Long.MAX_VALUE; 179 long mostAvailable = 0; 180 for (AvailableSpaceVolumePair volume : volumes) { 181 leastAvailable = Math.min(leastAvailable, volume.getAvailable()); 182 mostAvailable = Math.max(mostAvailable, volume.getAvailable()); 183 } 184 return (mostAvailable - leastAvailable) < balancedSpaceThreshold; 185 } 186 187 /** 188 * @return the minimum amount of space available on a single volume, 189 * across all volumes. 190 */ 191 private long getLeastAvailableSpace() { 192 long leastAvailable = Long.MAX_VALUE; 193 for (AvailableSpaceVolumePair volume : volumes) { 194 leastAvailable = Math.min(leastAvailable, volume.getAvailable()); 195 } 196 return leastAvailable; 197 } 198 199 /** 200 * @return the maximum amount of space available across volumes with low space. 201 */ 202 public long getMostAvailableSpaceAmongVolumesWithLowAvailableSpace() { 203 long mostAvailable = Long.MIN_VALUE; 204 for (AvailableSpaceVolumePair volume : getVolumesWithLowAvailableSpace()) { 205 mostAvailable = Math.max(mostAvailable, volume.getAvailable()); 206 } 207 return mostAvailable; 208 } 209 210 /** 211 * @return the list of volumes with relatively low available space. 212 */ 213 public List<AvailableSpaceVolumePair> getVolumesWithLowAvailableSpace() { 214 long leastAvailable = getLeastAvailableSpace(); 215 List<AvailableSpaceVolumePair> ret = new ArrayList<AvailableSpaceVolumePair>(); 216 for (AvailableSpaceVolumePair volume : volumes) { 217 if (volume.getAvailable() <= leastAvailable + balancedSpaceThreshold) { 218 ret.add(volume); 219 } 220 } 221 return ret; 222 } 223 224 /** 225 * @return the list of volumes with a lot of available space. 226 */ 227 public List<AvailableSpaceVolumePair> getVolumesWithHighAvailableSpace() { 228 long leastAvailable = getLeastAvailableSpace(); 229 List<AvailableSpaceVolumePair> ret = new ArrayList<AvailableSpaceVolumePair>(); 230 for (AvailableSpaceVolumePair volume : volumes) { 231 if (volume.getAvailable() > leastAvailable + balancedSpaceThreshold) { 232 ret.add(volume); 233 } 234 } 235 return ret; 236 } 237 238 } 239 240 /** 241 * Used so that we only check the available space on a given volume once, at 242 * the beginning of {@link AvailableSpaceVolumeChoosingPolicy#chooseVolume(List, long)}. 243 */ 244 private class AvailableSpaceVolumePair { 245 private final V volume; 246 private final long availableSpace; 247 248 public AvailableSpaceVolumePair(V volume) throws IOException { 249 this.volume = volume; 250 this.availableSpace = volume.getAvailable(); 251 } 252 253 public long getAvailable() { 254 return availableSpace; 255 } 256 257 public V getVolume() { 258 return volume; 259 } 260 } 261 262 private List<V> extractVolumesFromPairs(List<AvailableSpaceVolumePair> volumes) { 263 List<V> ret = new ArrayList<V>(); 264 for (AvailableSpaceVolumePair volume : volumes) { 265 ret.add(volume.getVolume()); 266 } 267 return ret; 268 } 269 270}