/*
 * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
 * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
 */
package rocks.imsofa.ai.puppychatter.rag.drive;

import com.google.api.services.drive.Drive;
import com.google.api.services.drive.model.File;
import com.google.gson.Gson;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import rocks.imsofa.utils.text.extractors.TextExtractors;

/**
 *
 * @author USER
 */
@SuppressWarnings("all")
public class GoogleDriveFileContentGetter {

    public synchronized static String getTextContent(Drive drive, File file) throws Exception {
        java.io.File cacheRoot=new java.io.File(".cache");
        java.io.File contentCacheRoot=new java.io.File(cacheRoot, ".driveContent");
        java.io.File indexFile=new java.io.File(contentCacheRoot, ".index");
        contentCacheRoot.mkdirs();
        Map indexMap=null;//{fileId=>{lastModified=>time, cachedFileName=>fileName}}
        Gson gson=new Gson();
        if(indexFile.exists()){
            indexMap=gson.fromJson(FileUtils.readFileToString(indexFile, "utf-8"), Map.class);
        }else{
            indexMap=new HashMap();
        }
        
        Map fileInfo=(Map) indexMap.get(file.getId());
        if(fileInfo!=null){
            Long lastModified=((Double) fileInfo.get("lastModified")).longValue();
            if(lastModified>=file.getModifiedTime().getValue()){
                //cache hit
                java.io.File cachedFile=new java.io.File(contentCacheRoot, (String)fileInfo.get("cachedFileName"));
                if(cachedFile.exists()){
                    return FileUtils.readFileToString(cachedFile, "utf-8");
                }
            }
        }
        //cache miss or can not read
        String textContent=null;
        if (file.getMimeType().contains("google")) {
            try (InputStream input = drive.files().export(file.getId(), "text/plain").executeAsInputStream()) {
                textContent=IOUtils.toString(input, "utf-8");
            }
        } else {
            try (InputStream input = drive.files().get(file.getId()).executeMediaAsInputStream()) {
                java.io.File tempFile = java.io.File.createTempFile(".temp_", file.getName());
                FileUtils.copyToFile(input, tempFile);
                String str = TextExtractors.getDefaultInstance().extractText(tempFile);
                FileUtils.forceDelete(tempFile);
                textContent=str;
            }
        }
        //create cache entry
        java.io.File cachedFile=new java.io.File(contentCacheRoot, file.getId()+".txt");
        FileUtils.write(cachedFile, textContent, "utf-8");
        long lastModified=(file.getModifiedTime()!=null)?file.getModifiedTime().getValue():file.getCreatedTime().getValue();
        Map newEntry=Map.of("lastModified", Long.valueOf(lastModified), "cachedFileName", cachedFile.getName());
        indexMap.put(file.getId(), newEntry);
        FileUtils.write(indexFile, gson.toJson(indexMap), "utf-8");
        return textContent;
    }
}
