package rocks.imsofa.ai.puppychatter.rag;

import java.net.URI;
import java.net.URL;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.LoggerFactory;

import com.github.kevinsawicki.http.HttpRequest;

import rocks.imsofa.ai.puppychatter.BarkException;
import rocks.imsofa.ai.puppychatter.Conversation;
import rocks.imsofa.ai.puppychatter.PuppyChatter;
import rocks.imsofa.ai.puppychatter.Response;
import rocks.imsofa.ai.puppychatter.VerificationResult;
import rocks.imsofa.ai.puppychatter.openai.OpenAICompatiblePromptParameters;

/**
 * this implementation expect the given url to be a html page
 */
@SuppressWarnings("all")
public class PuppyChatterHtmlContentExtractor extends PuppyChatterTextContentExtractor {
    
    public PuppyChatterHtmlContentExtractor(PuppyChatter puppyChatter) {
        super(puppyChatter);
    }

    @Override
    public String extractTextContent(List<Conversation> messages, URL url, String charset) throws Exception {
        String content = null;
        Document doc = Jsoup.parse(url, 0);
        if (doc.body() != null) {
            content = doc.body().text();
        } else {
            content = doc.text();
        }
        return extractTextContent(messages, content);
    }

    @Override
    public String extractTextContent(List<Conversation> messages, URL url) throws Exception {
        String content = null;
        Document doc = Jsoup.parse(url, 0);
        if (doc.body() != null) {
            content = doc.body().text();
        } else {
            content = doc.text();
        }
        return extractTextContent(messages, content);
    }

}
