001package org.nasdanika.ai; 002 003import java.util.Arrays; 004 005/** 006 * A simple implementation which treats a character as a token. 007 * Can be used for testing and when an encoder is unknown or not available. 008 */ 009public class CharChunkingEmbeddings extends ChunkingEmbeddings<char[]> { 010 011 public CharChunkingEmbeddings(Embeddings target, int chunkSize, int overlap) { 012 super(target, chunkSize, overlap); 013 } 014 015 @Override 016 protected char[] encode(String input) { 017 return input.toCharArray(); 018 } 019 020 @Override 021 protected String decode(char[] tokens) { 022 return String.valueOf(tokens); 023 } 024 025 @Override 026 protected int size(char[] tokens) { 027 return tokens.length; 028 } 029 030 @Override 031 protected char[] slice(char[] tokens, int offset, int length) { 032 return Arrays.copyOfRange(tokens, offset, Math.min(tokens.length, offset + length)); 033 } 034 035}