001package org.nasdanika.ai.cli; 002 003import org.nasdanika.ai.Embeddings; 004import org.nasdanika.ai.EncodingChunkingEmbeddings; 005 006import com.knuddels.jtokkit.api.EncodingType; 007import com.knuddels.jtokkit.api.IntArrayList; 008 009import io.opentelemetry.api.trace.Span; 010import picocli.CommandLine.Option; 011 012public class EncodingChunkingEmbeddingsArgGroup extends ChunkingEmbeddingsArgGroup<IntArrayList> { 013 014 @Option( 015 names = "--chunk-encoding-type", 016 description = { 017 "Chunk encoding type", 018 "Valid values: ${COMPLETION-CANDIDATES}", 019 "Default value: CL100K_BASE" 020 }) 021 protected EncodingType encodingType = EncodingType.CL100K_BASE; 022 023 @Override 024 public EncodingChunkingEmbeddings createChunkingEmbeddings(Embeddings target) { 025 return new EncodingChunkingEmbeddings(target, chunkSize, chunksOverlap, encodingType); 026 } 027 028 @Override 029 public void setSpanAttributes(Span span) { 030 span.setAttribute("chunk.encoding", encodingType.name()); 031 } 032 033}