/*
 * Decompiled with CFR 0.152.
 */
package boofcv.alg.filter.convolve.noborder;

import boofcv.struct.convolve.Kernel1D_F32;
import boofcv.struct.convolve.Kernel1D_F64;
import boofcv.struct.convolve.Kernel1D_I32;
import boofcv.struct.convolve.Kernel2D_F32;
import boofcv.struct.convolve.Kernel2D_F64;
import boofcv.struct.convolve.Kernel2D_I32;
import boofcv.struct.image.ImageFloat32;
import boofcv.struct.image.ImageFloat64;
import boofcv.struct.image.ImageInt16;
import boofcv.struct.image.ImageInt8;
import boofcv.struct.image.ImageSInt16;
import boofcv.struct.image.ImageSInt32;
import boofcv.struct.image.ImageUInt8;

public class ConvolveImageStandard {
    public static void horizontal(Kernel1D_F32 kernel, ImageFloat32 image, ImageFloat32 dest, boolean includeBorder) {
        float[] dataSrc = image.data;
        float[] dataDst = dest.data;
        float[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int yBorder = includeBorder ? 0 : radius;
        int width = image.getWidth();
        int height = image.getHeight() - yBorder;
        for (int i = yBorder; i < height; ++i) {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                float total = 0.0f;
                int indexSrc = j;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc++] * dataKer[k];
                }
                dataDst[indexDst++] = total;
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_F32 kernel, ImageFloat32 image, ImageFloat32 dest, boolean includeBorder) {
        float[] dataSrc = image.data;
        float[] dataDst = dest.data;
        float[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int yEnd = imgHeight - radius;
        int xBorder = includeBorder ? 0 : radius;
        for (int y = radius; y < yEnd; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + xBorder;
            int i = image.startIndex + (y - radius) * image.stride;
            int iEnd = i + imgWidth - xBorder;
            i += xBorder;
            while (i < iEnd) {
                float total = 0.0f;
                int indexSrc = i;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    indexSrc += image.stride;
                }
                dataDst[indexDst++] = total;
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_F32 kernel, ImageFloat32 src, ImageFloat32 dest) {
        int kernelRadius;
        float[] dataKernel = kernel.data;
        float[] dataSrc = src.data;
        float[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        for (int y = kernelRadius = kernel.width / 2; y < height - kernelRadius; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
            for (int x = kernelRadius; x < width - kernelRadius; ++x) {
                float total = 0.0f;
                int indexKer = 0;
                for (int ki = -kernelRadius; ki <= kernelRadius; ++ki) {
                    int indexSrc = src.startIndex + (y + ki) * src.stride + x;
                    for (int kj = -kernelRadius; kj <= kernelRadius; ++kj) {
                        total += dataSrc[indexSrc + kj] * dataKernel[indexKer++];
                    }
                }
                dataDst[indexDst++] = total;
            }
        }
    }

    public static void horizontal(Kernel1D_F64 kernel, ImageFloat64 image, ImageFloat64 dest, boolean includeBorder) {
        double[] dataSrc = image.data;
        double[] dataDst = dest.data;
        double[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int yBorder = includeBorder ? 0 : radius;
        int width = image.getWidth();
        int height = image.getHeight() - yBorder;
        for (int i = yBorder; i < height; ++i) {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                double total = 0.0;
                int indexSrc = j;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc++] * dataKer[k];
                }
                dataDst[indexDst++] = total;
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_F64 kernel, ImageFloat64 image, ImageFloat64 dest, boolean includeBorder) {
        double[] dataSrc = image.data;
        double[] dataDst = dest.data;
        double[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int yEnd = imgHeight - radius;
        int xBorder = includeBorder ? 0 : radius;
        for (int y = radius; y < yEnd; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + xBorder;
            int i = image.startIndex + (y - radius) * image.stride;
            int iEnd = i + imgWidth - xBorder;
            i += xBorder;
            while (i < iEnd) {
                double total = 0.0;
                int indexSrc = i;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    indexSrc += image.stride;
                }
                dataDst[indexDst++] = total;
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_F64 kernel, ImageFloat64 src, ImageFloat64 dest) {
        int kernelRadius;
        double[] dataKernel = kernel.data;
        double[] dataSrc = src.data;
        double[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        for (int y = kernelRadius = kernel.width / 2; y < height - kernelRadius; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
            for (int x = kernelRadius; x < width - kernelRadius; ++x) {
                double total = 0.0;
                int indexKer = 0;
                for (int ki = -kernelRadius; ki <= kernelRadius; ++ki) {
                    int indexSrc = src.startIndex + (y + ki) * src.stride + x;
                    for (int kj = -kernelRadius; kj <= kernelRadius; ++kj) {
                        total += dataSrc[indexSrc + kj] * dataKernel[indexKer++];
                    }
                }
                dataDst[indexDst++] = total;
            }
        }
    }

    public static void horizontal(Kernel1D_I32 kernel, ImageUInt8 image, ImageInt16 dest, boolean includeBorder) {
        byte[] dataSrc = image.data;
        short[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int yBorder = includeBorder ? 0 : radius;
        int width = image.getWidth();
        int height = image.getHeight() - yBorder;
        for (int i = yBorder; i < height; ++i) {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int total = 0;
                int indexSrc = j;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += (dataSrc[indexSrc++] & 0xFF) * dataKer[k];
                }
                dataDst[indexDst++] = (short)total;
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_I32 kernel, ImageUInt8 image, ImageInt16 dest, boolean includeBorder) {
        byte[] dataSrc = image.data;
        short[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int yEnd = imgHeight - radius;
        int xBorder = includeBorder ? 0 : radius;
        for (int y = radius; y < yEnd; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + xBorder;
            int i = image.startIndex + (y - radius) * image.stride;
            int iEnd = i + imgWidth - xBorder;
            i += xBorder;
            while (i < iEnd) {
                int total = 0;
                int indexSrc = i;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += (dataSrc[indexSrc] & 0xFF) * dataKer[k];
                    indexSrc += image.stride;
                }
                dataDst[indexDst++] = (short)total;
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_I32 kernel, ImageUInt8 src, ImageInt16 dest) {
        int kernelRadius;
        int[] dataKernel = kernel.data;
        byte[] dataSrc = src.data;
        short[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        for (int y = kernelRadius = kernel.width / 2; y < height - kernelRadius; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
            for (int x = kernelRadius; x < width - kernelRadius; ++x) {
                int total = 0;
                int indexKer = 0;
                for (int ki = -kernelRadius; ki <= kernelRadius; ++ki) {
                    int indexSrc = src.startIndex + (y + ki) * src.stride + x;
                    for (int kj = -kernelRadius; kj <= kernelRadius; ++kj) {
                        total += (dataSrc[indexSrc + kj] & 0xFF) * dataKernel[indexKer++];
                    }
                }
                dataDst[indexDst++] = (short)total;
            }
        }
    }

    public static void horizontal(Kernel1D_I32 kernel, ImageUInt8 image, ImageSInt32 dest, boolean includeBorder) {
        byte[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int yBorder = includeBorder ? 0 : radius;
        int width = image.getWidth();
        int height = image.getHeight() - yBorder;
        for (int i = yBorder; i < height; ++i) {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int total = 0;
                int indexSrc = j;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += (dataSrc[indexSrc++] & 0xFF) * dataKer[k];
                }
                dataDst[indexDst++] = total;
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_I32 kernel, ImageUInt8 image, ImageSInt32 dest, boolean includeBorder) {
        byte[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int yEnd = imgHeight - radius;
        int xBorder = includeBorder ? 0 : radius;
        for (int y = radius; y < yEnd; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + xBorder;
            int i = image.startIndex + (y - radius) * image.stride;
            int iEnd = i + imgWidth - xBorder;
            i += xBorder;
            while (i < iEnd) {
                int total = 0;
                int indexSrc = i;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += (dataSrc[indexSrc] & 0xFF) * dataKer[k];
                    indexSrc += image.stride;
                }
                dataDst[indexDst++] = total;
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_I32 kernel, ImageUInt8 src, ImageSInt32 dest) {
        int kernelRadius;
        int[] dataKernel = kernel.data;
        byte[] dataSrc = src.data;
        int[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        for (int y = kernelRadius = kernel.width / 2; y < height - kernelRadius; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
            for (int x = kernelRadius; x < width - kernelRadius; ++x) {
                int total = 0;
                int indexKer = 0;
                for (int ki = -kernelRadius; ki <= kernelRadius; ++ki) {
                    int indexSrc = src.startIndex + (y + ki) * src.stride + x;
                    for (int kj = -kernelRadius; kj <= kernelRadius; ++kj) {
                        total += (dataSrc[indexSrc + kj] & 0xFF) * dataKernel[indexKer++];
                    }
                }
                dataDst[indexDst++] = total;
            }
        }
    }

    public static void horizontal(Kernel1D_I32 kernel, ImageSInt16 image, ImageInt16 dest, boolean includeBorder) {
        short[] dataSrc = image.data;
        short[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int yBorder = includeBorder ? 0 : radius;
        int width = image.getWidth();
        int height = image.getHeight() - yBorder;
        for (int i = yBorder; i < height; ++i) {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int total = 0;
                int indexSrc = j;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc++] * dataKer[k];
                }
                dataDst[indexDst++] = (short)total;
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_I32 kernel, ImageSInt16 image, ImageInt16 dest, boolean includeBorder) {
        short[] dataSrc = image.data;
        short[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int yEnd = imgHeight - radius;
        int xBorder = includeBorder ? 0 : radius;
        for (int y = radius; y < yEnd; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + xBorder;
            int i = image.startIndex + (y - radius) * image.stride;
            int iEnd = i + imgWidth - xBorder;
            i += xBorder;
            while (i < iEnd) {
                int total = 0;
                int indexSrc = i;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    indexSrc += image.stride;
                }
                dataDst[indexDst++] = (short)total;
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_I32 kernel, ImageSInt16 src, ImageInt16 dest) {
        int kernelRadius;
        int[] dataKernel = kernel.data;
        short[] dataSrc = src.data;
        short[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        for (int y = kernelRadius = kernel.width / 2; y < height - kernelRadius; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
            for (int x = kernelRadius; x < width - kernelRadius; ++x) {
                int total = 0;
                int indexKer = 0;
                for (int ki = -kernelRadius; ki <= kernelRadius; ++ki) {
                    int indexSrc = src.startIndex + (y + ki) * src.stride + x;
                    for (int kj = -kernelRadius; kj <= kernelRadius; ++kj) {
                        total += dataSrc[indexSrc + kj] * dataKernel[indexKer++];
                    }
                }
                dataDst[indexDst++] = (short)total;
            }
        }
    }

    public static void horizontal(Kernel1D_I32 kernel, ImageUInt8 image, ImageInt8 dest, int divisor, boolean includeBorder) {
        byte[] dataSrc = image.data;
        byte[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int halfDivisor = divisor / 2;
        int yBorder = includeBorder ? 0 : radius;
        int width = image.getWidth();
        int height = image.getHeight() - yBorder;
        for (int i = yBorder; i < height; ++i) {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int total = 0;
                int indexSrc = j;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += (dataSrc[indexSrc++] & 0xFF) * dataKer[k];
                }
                dataDst[indexDst++] = (byte)((total + halfDivisor) / divisor);
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_I32 kernel, ImageUInt8 image, ImageInt8 dest, int divisor, boolean includeBorder) {
        byte[] dataSrc = image.data;
        byte[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int halfDivisor = divisor / 2;
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int yEnd = imgHeight - radius;
        int xBorder = includeBorder ? 0 : radius;
        for (int y = radius; y < yEnd; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + xBorder;
            int i = image.startIndex + (y - radius) * image.stride;
            int iEnd = i + imgWidth - xBorder;
            i += xBorder;
            while (i < iEnd) {
                int total = 0;
                int indexSrc = i;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += (dataSrc[indexSrc] & 0xFF) * dataKer[k];
                    indexSrc += image.stride;
                }
                dataDst[indexDst++] = (byte)((total + halfDivisor) / divisor);
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_I32 kernel, ImageUInt8 src, ImageInt8 dest, int divisor) {
        int kernelRadius;
        int[] dataKernel = kernel.data;
        byte[] dataSrc = src.data;
        byte[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        int halfDivisor = divisor / 2;
        for (int y = kernelRadius = kernel.width / 2; y < height - kernelRadius; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
            for (int x = kernelRadius; x < width - kernelRadius; ++x) {
                int total = 0;
                int indexKer = 0;
                for (int ki = -kernelRadius; ki <= kernelRadius; ++ki) {
                    int indexSrc = src.startIndex + (y + ki) * src.stride + x;
                    for (int kj = -kernelRadius; kj <= kernelRadius; ++kj) {
                        total += (dataSrc[indexSrc + kj] & 0xFF) * dataKernel[indexKer++];
                    }
                }
                dataDst[indexDst++] = (byte)((total + halfDivisor) / divisor);
            }
        }
    }

    public static void horizontal(Kernel1D_I32 kernel, ImageSInt16 image, ImageInt16 dest, int divisor, boolean includeBorder) {
        short[] dataSrc = image.data;
        short[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int halfDivisor = divisor / 2;
        int yBorder = includeBorder ? 0 : radius;
        int width = image.getWidth();
        int height = image.getHeight() - yBorder;
        for (int i = yBorder; i < height; ++i) {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int total = 0;
                int indexSrc = j;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc++] * dataKer[k];
                }
                dataDst[indexDst++] = (short)((total + halfDivisor) / divisor);
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_I32 kernel, ImageSInt16 image, ImageInt16 dest, int divisor, boolean includeBorder) {
        short[] dataSrc = image.data;
        short[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int halfDivisor = divisor / 2;
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int yEnd = imgHeight - radius;
        int xBorder = includeBorder ? 0 : radius;
        for (int y = radius; y < yEnd; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + xBorder;
            int i = image.startIndex + (y - radius) * image.stride;
            int iEnd = i + imgWidth - xBorder;
            i += xBorder;
            while (i < iEnd) {
                int total = 0;
                int indexSrc = i;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    indexSrc += image.stride;
                }
                dataDst[indexDst++] = (short)((total + halfDivisor) / divisor);
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_I32 kernel, ImageSInt16 src, ImageInt16 dest, int divisor) {
        int kernelRadius;
        int[] dataKernel = kernel.data;
        short[] dataSrc = src.data;
        short[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        int halfDivisor = divisor / 2;
        for (int y = kernelRadius = kernel.width / 2; y < height - kernelRadius; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
            for (int x = kernelRadius; x < width - kernelRadius; ++x) {
                int total = 0;
                int indexKer = 0;
                for (int ki = -kernelRadius; ki <= kernelRadius; ++ki) {
                    int indexSrc = src.startIndex + (y + ki) * src.stride + x;
                    for (int kj = -kernelRadius; kj <= kernelRadius; ++kj) {
                        total += dataSrc[indexSrc + kj] * dataKernel[indexKer++];
                    }
                }
                dataDst[indexDst++] = (short)((total + halfDivisor) / divisor);
            }
        }
    }

    public static void horizontal(Kernel1D_I32 kernel, ImageSInt32 image, ImageSInt32 dest, boolean includeBorder) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int yBorder = includeBorder ? 0 : radius;
        int width = image.getWidth();
        int height = image.getHeight() - yBorder;
        for (int i = yBorder; i < height; ++i) {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int total = 0;
                int indexSrc = j;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc++] * dataKer[k];
                }
                dataDst[indexDst++] = total;
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_I32 kernel, ImageSInt32 image, ImageSInt32 dest, boolean includeBorder) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int yEnd = imgHeight - radius;
        int xBorder = includeBorder ? 0 : radius;
        for (int y = radius; y < yEnd; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + xBorder;
            int i = image.startIndex + (y - radius) * image.stride;
            int iEnd = i + imgWidth - xBorder;
            i += xBorder;
            while (i < iEnd) {
                int total = 0;
                int indexSrc = i;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    indexSrc += image.stride;
                }
                dataDst[indexDst++] = total;
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_I32 kernel, ImageSInt32 src, ImageSInt32 dest) {
        int kernelRadius;
        int[] dataKernel = kernel.data;
        int[] dataSrc = src.data;
        int[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        for (int y = kernelRadius = kernel.width / 2; y < height - kernelRadius; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
            for (int x = kernelRadius; x < width - kernelRadius; ++x) {
                int total = 0;
                int indexKer = 0;
                for (int ki = -kernelRadius; ki <= kernelRadius; ++ki) {
                    int indexSrc = src.startIndex + (y + ki) * src.stride + x;
                    for (int kj = -kernelRadius; kj <= kernelRadius; ++kj) {
                        total += dataSrc[indexSrc + kj] * dataKernel[indexKer++];
                    }
                }
                dataDst[indexDst++] = total;
            }
        }
    }

    public static void horizontal(Kernel1D_I32 kernel, ImageSInt32 image, ImageSInt32 dest, int divisor, boolean includeBorder) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int halfDivisor = divisor / 2;
        int yBorder = includeBorder ? 0 : radius;
        int width = image.getWidth();
        int height = image.getHeight() - yBorder;
        for (int i = yBorder; i < height; ++i) {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int total = 0;
                int indexSrc = j;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc++] * dataKer[k];
                }
                dataDst[indexDst++] = (total + halfDivisor) / divisor;
                ++j;
            }
        }
    }

    public static void vertical(Kernel1D_I32 kernel, ImageSInt32 image, ImageSInt32 dest, int divisor, boolean includeBorder) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int[] dataKer = kernel.data;
        int radius = kernel.getRadius();
        int kernelWidth = kernel.getWidth();
        int halfDivisor = divisor / 2;
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int yEnd = imgHeight - radius;
        int xBorder = includeBorder ? 0 : radius;
        for (int y = radius; y < yEnd; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + xBorder;
            int i = image.startIndex + (y - radius) * image.stride;
            int iEnd = i + imgWidth - xBorder;
            i += xBorder;
            while (i < iEnd) {
                int total = 0;
                int indexSrc = i;
                for (int k = 0; k < kernelWidth; ++k) {
                    total += dataSrc[indexSrc] * dataKer[k];
                    indexSrc += image.stride;
                }
                dataDst[indexDst++] = (total + halfDivisor) / divisor;
                ++i;
            }
        }
    }

    public static void convolve(Kernel2D_I32 kernel, ImageSInt32 src, ImageSInt32 dest, int divisor) {
        int kernelRadius;
        int[] dataKernel = kernel.data;
        int[] dataSrc = src.data;
        int[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        int halfDivisor = divisor / 2;
        for (int y = kernelRadius = kernel.width / 2; y < height - kernelRadius; ++y) {
            int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
            for (int x = kernelRadius; x < width - kernelRadius; ++x) {
                int total = 0;
                int indexKer = 0;
                for (int ki = -kernelRadius; ki <= kernelRadius; ++ki) {
                    int indexSrc = src.startIndex + (y + ki) * src.stride + x;
                    for (int kj = -kernelRadius; kj <= kernelRadius; ++kj) {
                        total += dataSrc[indexSrc + kj] * dataKernel[indexKer++];
                    }
                }
                dataDst[indexDst++] = (total + halfDivisor) / divisor;
            }
        }
    }
}

