/*
 * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
 * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
 */
package rocks.imsofa.ai.puppychatter.gemini;

import com.google.gson.Gson;
// import com.hankcs.hanlp.HanLP;
import com.hankcs.lucene.HanLPAnalyzer;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.LoggerFactory;
import rocks.imsofa.ai.puppychatter.Conversation;
import rocks.imsofa.ai.puppychatter.PuppyChatter;
import rocks.imsofa.ai.puppychatter.Response;
import rocks.imsofa.ai.puppychatter.cache.FileSystemCacheService;
import rocks.imsofa.ai.puppychatter.openai.OpenAICompatiblePromptParameters;
import rocks.imsofa.ai.puppychatter.openrouter.OpenrouterPuppyChatter;

/**
 *
 * @author lendle
 */
@SuppressWarnings("all")
public class LuceneFactSource extends InlinePassages {

    private File indexDirectory = null;
    /**
     * this is a limited key which use only free models
     */
    private PuppyChatter puppyChatter = null;

    public LuceneFactSource(File indexDirectory) {
        this.indexDirectory = indexDirectory;
        File cacheFolder=new File(".cache");
        if(cacheFolder.exists()==false){
            cacheFolder.mkdirs();
        }
        this.puppyChatter=InternalPuppyChatterInitializer.getPuppyChatter();
    }

    @SuppressWarnings("rawtypes")
    @Override
    public Map toJsonPayload(List<Conversation> messages) {
        if (super.passages == null || super.passages.isEmpty()) {
            String sessionId = puppyChatter.createSession();
            Gson gson=new Gson();
            try {
                String conversation = messages.get(messages.size() - 1).getContent();
                String prompt = "請從以下的文字內容，擷取出兩個關鍵字，每個關鍵字至少包含兩個字,並以json陣列的方式回傳，格式爲 [關鍵字1,關鍵字2]：\r\n" + conversation;
                Response puppyResponse = puppyChatter.bark(sessionId, prompt, new OpenAICompatiblePromptParameters("user"));
                List<String> keywords=gson.fromJson(puppyResponse.getMessage("application/json"), List.class);
                //construct query
//                List<String> keywords=HanLP.extractKeyword(conversation, 2);
//                System.out.println(keywords);
                StringBuffer queryStringTitle=new StringBuffer();
                StringBuffer queryStringText=new StringBuffer();
                for(String keyword : keywords){
                    if(queryStringTitle.length()!=0){
                        queryStringTitle.append(" OR ");
                    }
                    if(queryStringText.length()!=0){
                        queryStringText.append(" OR ");
                    }
                    
                    queryStringTitle.append("(title:"+keyword+")");
                    queryStringText.append("(text:"+keyword+")");
                }
                String queryString="("+queryStringText.toString()+") OR ("+queryStringTitle+")";
                LoggerFactory.getLogger(this.getClass().getName()).info("use lucene query: "+queryString);
                Path indexPath = Files.createDirectories(indexDirectory.toPath());
                Directory directory = FSDirectory.open(indexPath);
                DirectoryReader ireader = DirectoryReader.open(directory);
                IndexSearcher isearcher = new IndexSearcher(ireader);
                Analyzer analyzer = new HanLPAnalyzer();
                MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(new String[]{"title", "text"}, analyzer);
                Query query = multiFieldQueryParser.parse(queryString);
                LoggerFactory.getLogger(this.getClass().getName()).info("use lucene query: "+queryString);
                TopDocs topDocs = isearcher.search(query, 5);

                for (int i = 0; i < topDocs.scoreDocs.length; i++) {
                    Document hitDoc = isearcher.doc(topDocs.scoreDocs[i].doc);
                    String text = hitDoc.get("text");
                    
                    prompt = "請從以下的文字內容，摘要出最重要的內容，以能回答\"" + conversation + "\"爲目標，並以文字1000字以內表達：\r\n" + text;
                    puppyResponse = puppyChatter.bark(sessionId, prompt, new OpenAICompatiblePromptParameters("user"));
                    super.passages.add(puppyResponse.getMessage());
                }
                ireader.close();
                directory.close();

                return super.toJsonPayload(messages); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/OverriddenMethodBody
            } catch (Exception ex) {
                Logger.getLogger(LuceneFactSource.class.getName()).log(Level.SEVERE, null, ex);
            }
            puppyChatter.closeSession(sessionId);
        }
        return super.toJsonPayload(messages);
    }

    @Override
    public String getSummary() throws Exception {
        Path indexPath = Files.createDirectories(indexDirectory.toPath());
        Directory directory = FSDirectory.open(indexPath);
        DirectoryReader ireader = DirectoryReader.open(directory);
        IndexSearcher isearcher = new IndexSearcher(ireader);
        Analyzer analyzer = new HanLPAnalyzer();
        MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(new String[]{"title", "text"}, analyzer);
        Query query = multiFieldQueryParser.parse("*:*");
        TopDocs topDocs = isearcher.search(query, 100);
        StringBuffer buffer=new StringBuffer();
        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
            Document hitDoc = isearcher.doc(topDocs.scoreDocs[i].doc);
            String text = hitDoc.get("text");
            buffer.append(text).append("\r\n");
        }
        ireader.close();
        directory.close();
        return Summarizer.getSummary(buffer.toString(), 20000);
    }
    
    

    public static void main(String[] args) throws Exception {
//        File indexDirectory=new File("C:\\Users\\USER\\Documents\\NetBeansProjects\\advisorycenterkeywords\\lucene\\index");
//        Path indexPath = Files.createDirectories(indexDirectory.toPath());
//        Directory directory = FSDirectory.open(indexPath);
//        DirectoryReader ireader = DirectoryReader.open(directory);
//        IndexSearcher isearcher = new IndexSearcher(ireader);
//        Analyzer analyzer = new HanLPAnalyzer();
//        MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(new String[]{"title", "text"}, analyzer);
//        Query query = multiFieldQueryParser.parse("*:*");
//        TopDocs topDocs = isearcher.search(query, 100);
//        StringBuffer buffer=new StringBuffer();
//        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
//            Document hitDoc = isearcher.doc(topDocs.scoreDocs[i].doc);
//            String text = hitDoc.get("text");
//            buffer.append(text).append("\r\n");
//        }
//        ireader.close();
//        directory.close();
//        
//        PuppyChatter puppyChatter = new OpenrouterPuppyChatter("");
//        String sessionId = puppyChatter.createSession();
//        Response response = puppyChatter.bark(sessionId, "model:meta-llama/llama-3.1-8b-instruct:free 請建立中文常見的 stop words 清單，以 json array 方式回傳，可以參考以下內容，但儘量一般化，要包含標點符號，不要有重複的字:\r\n" + buffer.toString());
//        String stopwords = response.getMessage();
//        puppyChatter.closeSession(sessionId);
//        System.out.println(stopwords);
        LuceneFactSource factSource = new LuceneFactSource(new File("/home/lendle/dev/projects/1112/AdvisoryCenterKeywords/lucene/index/"));
        
        System.out.println(factSource.toJsonPayload(List.of(new Conversation("user", "台灣經濟情況"))));
//        System.out.println(factSource.getSummary());
    }

}
