/*
 * Decompiled with CFR 0.152.
 */
package top.aoyudi.rag.impl;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
import org.commonmark.node.Node;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.text.TextContentRenderer;
import top.aoyudi.rag.DocumentLoader;
import top.aoyudi.rag.properties.Document;

public class FileSystemDocumentLoader
implements DocumentLoader {
    private static final Logger log = LogManager.getLogger(FileSystemDocumentLoader.class);
    private static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>(Arrays.asList(".txt", ".md", ".docx"));

    @Override
    public Document loadDocument(String source) {
        Path filePath = Paths.get(source, new String[0]);
        this.validateFile(filePath);
        try {
            String content = this.readFileContent(filePath);
            Map<String, Object> metadata = this.extractMetadata(filePath);
            return Document.of(content, metadata);
        }
        catch (IOException e) {
            log.error("Failed to load document from {}", (Object)source, (Object)e);
            throw new RuntimeException("Error loading document: " + e.getMessage(), e);
        }
    }

    @Override
    public List<Document> loadDocuments(List<String> sources) {
        return sources.stream().map(this::loadDocument).collect(Collectors.toList());
    }

    @Override
    public List<Document> loadDocumentsFromPath(String path) throws IOException {
        boolean directory = Files.isDirectory(Paths.get(path, new String[0]), new LinkOption[0]);
        if (!directory) {
            throw new IllegalArgumentException("Path is not a directory: " + path);
        }
        List<String> files = this.findFiles(path);
        return this.loadDocuments(files);
    }

    private List<String> findFiles(String rootPath) throws IOException {
        final ArrayList<String> resultPaths = new ArrayList<String>();
        Path rootDir = Paths.get(rootPath, new String[0]);
        Files.walkFileTree(rootDir, (FileVisitor<? super Path>)new SimpleFileVisitor<Path>(){

            @Override
            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
                String fileName = file.getFileName().toString().toLowerCase();
                for (String ext : SUPPORTED_EXTENSIONS) {
                    if (!fileName.endsWith(ext)) continue;
                    resultPaths.add(file.toAbsolutePath().toString());
                    break;
                }
                return FileVisitResult.CONTINUE;
            }

            @Override
            public FileVisitResult visitFileFailed(Path file, IOException exc) {
                System.err.println("\u65e0\u6cd5\u8bbf\u95ee\u6587\u4ef6: " + file + "\uff0c\u9519\u8bef: " + exc.getMessage());
                return FileVisitResult.CONTINUE;
            }
        });
        return resultPaths;
    }

    private void validateFile(Path filePath) {
        if (!Files.exists(filePath, new LinkOption[0])) {
            throw new IllegalArgumentException("File does not exist: " + filePath);
        }
        if (!Files.isRegularFile(filePath, new LinkOption[0])) {
            throw new IllegalArgumentException("Not a regular file: " + filePath);
        }
        String fileName = filePath.getFileName().toString();
        int lastDotIndex = fileName.lastIndexOf(46);
        if (lastDotIndex == -1) {
            throw new IllegalArgumentException("Unsupported file type: " + fileName);
        }
        String extension = fileName.substring(lastDotIndex).toLowerCase();
        if (!SUPPORTED_EXTENSIONS.contains(extension)) {
            throw new IllegalArgumentException("Unsupported file extension: " + extension + ", supported types: " + SUPPORTED_EXTENSIONS);
        }
    }

    private String readFileContent(Path filePath) throws IOException {
        String extension;
        switch (extension = this.getFileExtension(filePath)) {
            case ".txt": {
                return this.readTextFile(filePath);
            }
            case ".md": {
                return this.readMarkdownFile(filePath);
            }
            case ".docx": {
                return this.readDocxContent(filePath);
            }
        }
        throw new IllegalArgumentException("Unsupported file extension: " + extension);
    }

    private Map<String, Object> extractMetadata(Path filePath) throws IOException {
        HashMap<String, Object> metadata = new HashMap<String, Object>();
        metadata.put("source", filePath.toString());
        metadata.put("filename", filePath.getFileName().toString());
        metadata.put("size", Files.size(filePath));
        metadata.put("lastModified", new Date(Files.getLastModifiedTime(filePath, new LinkOption[0]).toMillis()));
        metadata.put("extension", this.getFileExtension(filePath));
        return metadata;
    }

    private String getFileExtension(Path filePath) {
        String fileName = filePath.getFileName().toString();
        int lastDotIndex = fileName.lastIndexOf(46);
        return lastDotIndex == -1 ? "" : fileName.substring(lastDotIndex).toLowerCase();
    }

    private String readTextFile(Path filePath) throws IOException {
        try {
            return Files.readString(filePath, StandardCharsets.UTF_8);
        }
        catch (IOException e) {
            log.warn("Failed to read file with UTF-8 encoding, trying default system encoding: {}", (Object)filePath);
            return Files.readString(filePath);
        }
    }

    private String readMarkdownFile(Path filePath) throws IOException {
        Parser parser = Parser.builder().build();
        Node document = parser.parseReader((Reader)new InputStreamReader(Files.newInputStream(Paths.get(filePath.toAbsolutePath().toString(), new String[0]), new OpenOption[0]), StandardCharsets.UTF_8));
        TextContentRenderer renderer = TextContentRenderer.builder().build();
        return renderer.render(document);
    }

    private String readDocxContent(Path filePath) throws IOException {
        String string;
        XWPFDocument doc = new XWPFDocument(Files.newInputStream(filePath, new OpenOption[0]));
        try {
            StringBuilder content = new StringBuilder();
            for (XWPFParagraph paragraph : doc.getParagraphs()) {
                content.append(paragraph.getText()).append("\n");
            }
            for (XWPFTable table : doc.getTables()) {
                for (XWPFTableRow row : table.getRows()) {
                    for (XWPFTableCell cell : row.getTableCells()) {
                        content.append("| ").append(cell.getText()).append(" ");
                    }
                    content.append("|\n");
                }
                content.append("\n");
            }
            log.info("Successfully read DOCX file: {}", (Object)filePath);
            string = content.toString().trim();
        }
        catch (Throwable throwable) {
            try {
                try {
                    doc.close();
                }
                catch (Throwable throwable2) {
                    throwable.addSuppressed(throwable2);
                }
                throw throwable;
            }
            catch (IOException e) {
                log.error("Error reading DOCX file: {}", (Object)filePath, (Object)e);
                throw e;
            }
        }
        doc.close();
        return string;
    }
}

