001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.util;
020
021import java.io.DataInputStream;
022import java.io.DataOutputStream;
023import java.io.IOException;
024import java.nio.ByteBuffer;
025import java.util.zip.CRC32;
026import java.util.zip.Checksum;
027
028import org.apache.hadoop.classification.InterfaceAudience;
029import org.apache.hadoop.classification.InterfaceStability;
030import org.apache.hadoop.fs.ChecksumException;
031
032/**
033 * This class provides interface and utilities for processing checksums for
034 * DFS data transfers.
035 */
036@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
037@InterfaceStability.Evolving
038public class DataChecksum implements Checksum {
039  
040  // checksum types
041  public static final int CHECKSUM_NULL    = 0;
042  public static final int CHECKSUM_CRC32   = 1;
043  public static final int CHECKSUM_CRC32C  = 2;
044  public static final int CHECKSUM_DEFAULT = 3; 
045  public static final int CHECKSUM_MIXED   = 4;
046 
047  /** The checksum types */
048  public static enum Type {
049    NULL  (CHECKSUM_NULL, 0),
050    CRC32 (CHECKSUM_CRC32, 4),
051    CRC32C(CHECKSUM_CRC32C, 4),
052    DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
053    MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
054
055    public final int id;
056    public final int size;
057    
058    private Type(int id, int size) {
059      this.id = id;
060      this.size = size;
061    }
062
063    /** @return the type corresponding to the id. */
064    public static Type valueOf(int id) {
065      if (id < 0 || id >= values().length) {
066        throw new IllegalArgumentException("id=" + id
067            + " out of range [0, " + values().length + ")");
068      }
069      return values()[id];
070    }
071  }
072
073  /**
074   * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm
075   * is chosen depending on the platform.
076   */
077  public static Checksum newCrc32() {
078    return Shell.isJava7OrAbove()? new CRC32(): new PureJavaCrc32();
079  }
080
081  public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
082    if ( bytesPerChecksum <= 0 ) {
083      return null;
084    }
085    
086    switch ( type ) {
087    case NULL :
088      return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
089    case CRC32 :
090      return new DataChecksum(type, newCrc32(), bytesPerChecksum );
091    case CRC32C:
092      return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
093    default:
094      return null;  
095    }
096  }
097  
098  /**
099   * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
100   * @return DataChecksum of the type in the array or null in case of an error.
101   */
102  public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
103    if (offset < 0 || bytes.length < offset + getChecksumHeaderSize()) {
104      return null;
105    }
106    
107    // like readInt():
108    int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 
109                           ( (bytes[offset+2] & 0xff) << 16 ) |
110                           ( (bytes[offset+3] & 0xff) << 8 )  |
111                           ( (bytes[offset+4] & 0xff) );
112    return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
113  }
114  
115  /**
116   * This constructs a DataChecksum by reading HEADER_LEN bytes from input
117   * stream <i>in</i>
118   */
119  public static DataChecksum newDataChecksum( DataInputStream in )
120                                 throws IOException {
121    int type = in.readByte();
122    int bpc = in.readInt();
123    DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
124    if ( summer == null ) {
125      throw new InvalidChecksumSizeException("Could not create DataChecksum "
126          + "of type " + type + " with bytesPerChecksum " + bpc);
127    }
128    return summer;
129  }
130  
131  /**
132   * Writes the checksum header to the output stream <i>out</i>.
133   */
134  public void writeHeader( DataOutputStream out ) 
135                           throws IOException { 
136    out.writeByte( type.id );
137    out.writeInt( bytesPerChecksum );
138  }
139
140  public byte[] getHeader() {
141    byte[] header = new byte[getChecksumHeaderSize()];
142    header[0] = (byte) (type.id & 0xff);
143    // Writing in buffer just like DataOutput.WriteInt()
144    header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
145    header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
146    header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
147    header[1+3] = (byte) (bytesPerChecksum & 0xff);
148    return header;
149  }
150  
151  /**
152   * Writes the current checksum to the stream.
153   * If <i>reset</i> is true, then resets the checksum.
154   * @return number of bytes written. Will be equal to getChecksumSize();
155   */
156   public int writeValue( DataOutputStream out, boolean reset )
157                          throws IOException {
158     if ( type.size <= 0 ) {
159       return 0;
160     }
161
162     if ( type.size == 4 ) {
163       out.writeInt( (int) summer.getValue() );
164     } else {
165       throw new IOException( "Unknown Checksum " + type );
166     }
167     
168     if ( reset ) {
169       reset();
170     }
171     
172     return type.size;
173   }
174   
175   /**
176    * Writes the current checksum to a buffer.
177    * If <i>reset</i> is true, then resets the checksum.
178    * @return number of bytes written. Will be equal to getChecksumSize();
179    */
180    public int writeValue( byte[] buf, int offset, boolean reset )
181                           throws IOException {
182      if ( type.size <= 0 ) {
183        return 0;
184      }
185
186      if ( type.size == 4 ) {
187        int checksum = (int) summer.getValue();
188        buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
189        buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
190        buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
191        buf[offset+3] = (byte) (checksum & 0xff);
192      } else {
193        throw new IOException( "Unknown Checksum " + type );
194      }
195      
196      if ( reset ) {
197        reset();
198      }
199      
200      return type.size;
201    }
202   
203   /**
204    * Compares the checksum located at buf[offset] with the current checksum.
205    * @return true if the checksum matches and false otherwise.
206    */
207   public boolean compare( byte buf[], int offset ) {
208     if ( type.size == 4 ) {
209       int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 
210                      ( (buf[offset+1] & 0xff) << 16 ) |
211                      ( (buf[offset+2] & 0xff) << 8 )  |
212                      ( (buf[offset+3] & 0xff) );
213       return checksum == (int) summer.getValue();
214     }
215     return type.size == 0;
216   }
217   
218  private final Type type;
219  private final Checksum summer;
220  private final int bytesPerChecksum;
221  private int inSum = 0;
222  
223  private DataChecksum( Type type, Checksum checksum, int chunkSize ) {
224    this.type = type;
225    summer = checksum;
226    bytesPerChecksum = chunkSize;
227  }
228  
229  /** @return the checksum algorithm type. */
230  public Type getChecksumType() {
231    return type;
232  }
233  /** @return the size for a checksum. */
234  public int getChecksumSize() {
235    return type.size;
236  }
237  /** @return the required checksum size given the data length. */
238  public int getChecksumSize(int dataSize) {
239    return ((dataSize - 1)/getBytesPerChecksum() + 1) * getChecksumSize(); 
240  }
241  public int getBytesPerChecksum() {
242    return bytesPerChecksum;
243  }
244  public int getNumBytesInSum() {
245    return inSum;
246  }
247  
248  public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
249  static public int getChecksumHeaderSize() {
250    return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
251  }
252  //Checksum Interface. Just a wrapper around member summer.
253  @Override
254  public long getValue() {
255    return summer.getValue();
256  }
257  @Override
258  public void reset() {
259    summer.reset();
260    inSum = 0;
261  }
262  @Override
263  public void update( byte[] b, int off, int len ) {
264    if ( len > 0 ) {
265      summer.update( b, off, len );
266      inSum += len;
267    }
268  }
269  @Override
270  public void update( int b ) {
271    summer.update( b );
272    inSum += 1;
273  }
274  
275  /**
276   * Verify that the given checksums match the given data.
277   * 
278   * The 'mark' of the ByteBuffer parameters may be modified by this function,.
279   * but the position is maintained.
280   *  
281   * @param data the DirectByteBuffer pointing to the data to verify.
282   * @param checksums the DirectByteBuffer pointing to a series of stored
283   *                  checksums
284   * @param fileName the name of the file being read, for error-reporting
285   * @param basePos the file position to which the start of 'data' corresponds
286   * @throws ChecksumException if the checksums do not match
287   */
288  public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
289      String fileName, long basePos)
290  throws ChecksumException {
291    if (type.size == 0) return;
292    
293    if (data.hasArray() && checksums.hasArray()) {
294      verifyChunkedSums(
295          data.array(), data.arrayOffset() + data.position(), data.remaining(),
296          checksums.array(), checksums.arrayOffset() + checksums.position(),
297          fileName, basePos);
298      return;
299    }
300    if (NativeCrc32.isAvailable()) {
301      NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
302          fileName, basePos);
303      return;
304    }
305    
306    int startDataPos = data.position();
307    data.mark();
308    checksums.mark();
309    try {
310      byte[] buf = new byte[bytesPerChecksum];
311      byte[] sum = new byte[type.size];
312      while (data.remaining() > 0) {
313        int n = Math.min(data.remaining(), bytesPerChecksum);
314        checksums.get(sum);
315        data.get(buf, 0, n);
316        summer.reset();
317        summer.update(buf, 0, n);
318        int calculated = (int)summer.getValue();
319        int stored = (sum[0] << 24 & 0xff000000) |
320          (sum[1] << 16 & 0xff0000) |
321          (sum[2] << 8 & 0xff00) |
322          sum[3] & 0xff;
323        if (calculated != stored) {
324          long errPos = basePos + data.position() - startDataPos - n;
325          throw new ChecksumException(
326              "Checksum error: "+ fileName + " at "+ errPos +
327              " exp: " + stored + " got: " + calculated, errPos);
328        }
329      }
330    } finally {
331      data.reset();
332      checksums.reset();
333    }
334  }
335  
336  /**
337   * Implementation of chunked verification specifically on byte arrays. This
338   * is to avoid the copy when dealing with ByteBuffers that have array backing.
339   */
340  private void verifyChunkedSums(
341      byte[] data, int dataOff, int dataLen,
342      byte[] checksums, int checksumsOff, String fileName,
343      long basePos) throws ChecksumException {
344    if (type.size == 0) return;
345
346    if (NativeCrc32.isAvailable()) {
347      NativeCrc32.verifyChunkedSumsByteArray(bytesPerChecksum, type.id,
348          checksums, checksumsOff, data, dataOff, dataLen, fileName, basePos);
349      return;
350    }
351    
352    int remaining = dataLen;
353    int dataPos = 0;
354    while (remaining > 0) {
355      int n = Math.min(remaining, bytesPerChecksum);
356      
357      summer.reset();
358      summer.update(data, dataOff + dataPos, n);
359      dataPos += n;
360      remaining -= n;
361      
362      int calculated = (int)summer.getValue();
363      int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
364        (checksums[checksumsOff + 1] << 16 & 0xff0000) |
365        (checksums[checksumsOff + 2] << 8 & 0xff00) |
366        checksums[checksumsOff + 3] & 0xff;
367      checksumsOff += 4;
368      if (calculated != stored) {
369        long errPos = basePos + dataPos - n;
370        throw new ChecksumException(
371            "Checksum error: "+ fileName + " at "+ errPos +
372            " exp: " + stored + " got: " + calculated, errPos);
373      }
374    }
375  }
376
377  /**
378   * Calculate checksums for the given data.
379   * 
380   * The 'mark' of the ByteBuffer parameters may be modified by this function,
381   * but the position is maintained.
382   * 
383   * @param data the DirectByteBuffer pointing to the data to checksum.
384   * @param checksums the DirectByteBuffer into which checksums will be
385   *                  stored. Enough space must be available in this
386   *                  buffer to put the checksums.
387   */
388  public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
389    if (type.size == 0) return;
390    
391    if (data.hasArray() && checksums.hasArray()) {
392      calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
393          checksums.array(), checksums.arrayOffset() + checksums.position());
394      return;
395    }
396
397    if (NativeCrc32.isAvailable()) {
398      NativeCrc32.calculateChunkedSums(bytesPerChecksum, type.id,
399          checksums, data);
400      return;
401    }
402    
403    data.mark();
404    checksums.mark();
405    try {
406      byte[] buf = new byte[bytesPerChecksum];
407      while (data.remaining() > 0) {
408        int n = Math.min(data.remaining(), bytesPerChecksum);
409        data.get(buf, 0, n);
410        summer.reset();
411        summer.update(buf, 0, n);
412        checksums.putInt((int)summer.getValue());
413      }
414    } finally {
415      data.reset();
416      checksums.reset();
417    }
418  }
419
420  /**
421   * Implementation of chunked calculation specifically on byte arrays. This
422   * is to avoid the copy when dealing with ByteBuffers that have array backing.
423   */
424  public void calculateChunkedSums(
425      byte[] data, int dataOffset, int dataLength,
426      byte[] sums, int sumsOffset) {
427    if (type.size == 0) return;
428
429    if (NativeCrc32.isAvailable()) {
430      NativeCrc32.calculateChunkedSumsByteArray(bytesPerChecksum, type.id,
431          sums, sumsOffset, data, dataOffset, dataLength);
432      return;
433    }
434
435    int remaining = dataLength;
436    while (remaining > 0) {
437      int n = Math.min(remaining, bytesPerChecksum);
438      summer.reset();
439      summer.update(data, dataOffset, n);
440      dataOffset += n;
441      remaining -= n;
442      long calculated = summer.getValue();
443      sums[sumsOffset++] = (byte) (calculated >> 24);
444      sums[sumsOffset++] = (byte) (calculated >> 16);
445      sums[sumsOffset++] = (byte) (calculated >> 8);
446      sums[sumsOffset++] = (byte) (calculated);
447    }
448  }
449
450  @Override
451  public boolean equals(Object other) {
452    if (!(other instanceof DataChecksum)) {
453      return false;
454    }
455    DataChecksum o = (DataChecksum)other;
456    return o.bytesPerChecksum == this.bytesPerChecksum &&
457      o.type == this.type;
458  }
459  
460  @Override
461  public int hashCode() {
462    return (this.type.id + 31) * this.bytesPerChecksum;
463  }
464  
465  @Override
466  public String toString() {
467    return "DataChecksum(type=" + type +
468      ", chunkSize=" + bytesPerChecksum + ")";
469  }
470  
471  /**
472   * This just provides a dummy implimentation for Checksum class
473   * This is used when there is no checksum available or required for 
474   * data
475   */
476  static class ChecksumNull implements Checksum {
477    
478    public ChecksumNull() {}
479    
480    //Dummy interface
481    @Override
482    public long getValue() { return 0; }
483    @Override
484    public void reset() {}
485    @Override
486    public void update(byte[] b, int off, int len) {}
487    @Override
488    public void update(int b) {}
489  };
490}