001package org.nasdanika.ai;
002
003import java.util.Arrays;
004
005/**
006 * A simple implementation which treats a character as a token.
007 * Can be used for testing and when an encoder is unknown or not available.  
008 */
009public class CharChunkingEmbeddings extends ChunkingEmbeddings<char[]> {
010
011        public CharChunkingEmbeddings(Embeddings target, int chunkSize, int overlap) {
012                super(target, chunkSize, overlap);
013        }
014
015        @Override
016        protected char[] encode(String input) {
017                return input.toCharArray();
018        }
019
020        @Override
021        protected String decode(char[] tokens) {
022                return String.valueOf(tokens);
023        }
024
025        @Override
026        protected int size(char[] tokens) {
027                return tokens.length;
028        }
029
030        @Override
031        protected char[] slice(char[] tokens, int offset, int length) {
032                return Arrays.copyOfRange(tokens, offset, Math.min(tokens.length, offset + length));
033        }
034
035}