/*
 * Decompiled with CFR 0.152.
 */
package org.dromara.pdf.pdfbox.core.ext.extractor;

import java.awt.Rectangle;
import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.dromara.pdf.pdfbox.core.base.Document;
import org.dromara.pdf.pdfbox.core.ext.extractor.AbstractExtractor;

public abstract class AbstractTextExtractor
extends AbstractExtractor {
    protected static final Pattern TABLE_PATTERN = Pattern.compile("(\\S[^\\n\\r]+)");

    public AbstractTextExtractor(Document document) {
        super(document);
    }

    public abstract Map<Integer, List<String>> extractByRegex(String var1, int ... var2);

    public abstract Map<Integer, Map<String, String>> extractByRegionArea(String var1, Map<String, Rectangle> var2, int ... var3);

    public abstract Map<Integer, Map<String, List<List<String>>>> extractByTable(String var1, Map<String, Rectangle> var2, int ... var3);

    protected List<String> processTextByRegex(String regex, DefaultTextStripper stripper) {
        ArrayList<String> list = new ArrayList<String>(32);
        String text = stripper.getText(this.getDocument());
        if (Objects.nonNull(regex) && !regex.trim().isEmpty()) {
            Matcher matcher = Pattern.compile(regex).matcher(text);
            while (matcher.find()) {
                list.add(matcher.group());
            }
        } else {
            list.add(text);
        }
        return list;
    }

    protected Map<String, String> processTextByRegionArea(String wordSeparator, Map<String, Rectangle> regionArea, PDPage page) {
        HashMap<String, String> data;
        DefaultTextStripper stripper = new DefaultTextStripper(regionArea);
        if (regionArea.isEmpty()) {
            data = new HashMap<String, String>(0);
        } else {
            Set<String> keySet = regionArea.keySet();
            data = new HashMap(keySet.size());
            for (String region : keySet) {
                stripper.setStartPage(stripper.getCurrentPageNo());
                stripper.setEndPage(stripper.getCurrentPageNo());
                stripper.setWordSeparator(wordSeparator);
                ArrayList regionCharactersByArticle = new ArrayList(256);
                regionCharactersByArticle.add(new ArrayList(256));
                stripper.getRegionCharacterList().put(region, regionCharactersByArticle);
                stripper.getRegionText().put(region, new StringWriter());
            }
            if (page.hasContents()) {
                stripper.processPage(page);
            }
            for (String region : keySet) {
                data.put(region, stripper.getRegionText().get(region).toString());
            }
        }
        return data;
    }

    protected Map<String, List<List<String>>> processTextByTable(String wordSeparator, Map<String, Rectangle> regionArea, PDPage page) {
        Map<String, String> sourceMap = this.processTextByRegionArea(wordSeparator, regionArea, page);
        if (sourceMap.isEmpty()) {
            return new HashMap<String, List<List<String>>>(0);
        }
        HashMap<String, List<List<String>>> dataMap = new HashMap<String, List<List<String>>>(sourceMap.size());
        sourceMap.forEach((key, value) -> {
            ArrayList rows = new ArrayList(16);
            ArrayList<String> columns = new ArrayList<String>(16);
            Matcher matcher = TABLE_PATTERN.matcher((CharSequence)value);
            while (matcher.find()) {
                columns.add(matcher.group());
            }
            for (String rowText : columns) {
                rows.add(Arrays.stream(rowText.split(wordSeparator)).collect(Collectors.toList()));
            }
            dataMap.put((String)key, rows);
        });
        return dataMap;
    }

    protected <R> Map<Integer, R> extractText(Function<R> function, String wordSeparator, Map<String, Rectangle> regionArea, int ... pageIndexes) {
        HashMap<Integer, R> data = new HashMap<Integer, R>(32);
        PDPageTree pageTree = this.getDocument().getPages();
        if (Objects.nonNull(pageIndexes) && pageIndexes.length > 0) {
            for (int index : pageIndexes) {
                try {
                    data.put(index, function.apply(wordSeparator, regionArea, pageTree.get(index)));
                }
                catch (Exception e) {
                    log.warn((Object)("the index['" + index + "'] is invalid, will be ignored"));
                }
            }
        } else {
            int index = 0;
            for (PDPage page : pageTree) {
                data.put(index, function.apply(wordSeparator, regionArea, page));
                ++index;
            }
        }
        return data;
    }

    protected static class DefaultTextStripper
    extends PDFTextStripper {
        protected final Map<String, ArrayList<List<TextPosition>>> regionCharacterList = new HashMap<String, ArrayList<List<TextPosition>>>(32);
        protected final Map<String, StringWriter> regionText = new HashMap<String, StringWriter>(32);
        protected Map<String, Rectangle> regionArea;

        public DefaultTextStripper(Map<String, Rectangle> regionArea) {
            this.regionArea = regionArea;
            super.setSortByPosition(true);
        }

        protected int getCurrentPageNo() {
            return super.getCurrentPageNo();
        }

        protected void processTextPosition(TextPosition text) {
            if (Objects.nonNull(this.regionArea)) {
                Set<Map.Entry<String, Rectangle>> entrySet = this.regionArea.entrySet();
                for (Map.Entry<String, Rectangle> regionAreaEntry : entrySet) {
                    Rectangle2D rect = regionAreaEntry.getValue();
                    if (!rect.contains(text.getX(), text.getY())) continue;
                    this.charactersByArticle = this.regionCharacterList.get(regionAreaEntry.getKey());
                }
            }
            super.processTextPosition(text);
        }

        protected void writePage() throws IOException {
            if (Objects.nonNull(this.regionArea)) {
                Set<String> keySet = this.regionArea.keySet();
                for (String region : keySet) {
                    this.charactersByArticle = this.regionCharacterList.get(region);
                    this.output = this.regionText.get(region);
                    super.writePage();
                }
            } else {
                super.writePage();
            }
        }

        public Map<String, ArrayList<List<TextPosition>>> getRegionCharacterList() {
            return this.regionCharacterList;
        }

        public Map<String, StringWriter> getRegionText() {
            return this.regionText;
        }

        public Map<String, Rectangle> getRegionArea() {
            return this.regionArea;
        }
    }

    @FunctionalInterface
    protected static interface Function<R> {
        public R apply(String var1, Map<String, Rectangle> var2, PDPage var3);
    }
}

