001package org.nasdanika.ai; 002 003import java.util.Collections; 004import java.util.Map; 005import java.util.TreeMap; 006import java.util.function.Function; 007import java.util.stream.Collectors; 008import java.util.stream.Stream; 009 010import reactor.core.publisher.Mono; 011 012/** 013 * Splits input by whitespace, lowercases and then computes frequency of each word 014 */ 015public class BagOfWordsGenerator implements TextEmbeddingGenerator<Map<String,Integer>> { 016 017 @Override 018 public Mono<Map<String, Integer>> generateAsync(String source) { 019 if (source == null || source.trim().length() == 0) { 020 return Mono.just(Collections.emptyMap()); 021 } 022 Map<String, Integer> result = new TreeMap<>(); 023 Stream 024 .of(source.split("\\s+")) 025 .map(String::toLowerCase) 026 .collect(Collectors.groupingBy(Function.identity())) 027 .entrySet() 028 .forEach(e -> result.put(e.getKey(), e.getValue().size())); 029 return Mono.just(result); 030 } 031 032}