package org.wikidata.query.rdf.tool;

import static org.wikidata.query.rdf.tool.OptionsUtils.handleOptions;
import static org.wikidata.query.rdf.tool.StreamUtils.utf8;

import java.io.IOException;
import java.io.InputStream;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.openrdf.model.Literal;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandler;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.Rio;
import org.openrdf.rio.WriterConfig;
import org.openrdf.rio.helpers.BasicWriterSettings;
import org.openrdf.rio.turtle.TurtleParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikidata.query.rdf.common.uri.WikibaseUris;
import org.wikidata.query.rdf.tool.OptionsUtils.BasicOptions;
import org.wikidata.query.rdf.tool.OptionsUtils.WikibaseOptions;
import org.wikidata.query.rdf.tool.exception.ContainedException;
import org.wikidata.query.rdf.tool.rdf.PrefixRecordingRdfHandler;

import com.codahale.metrics.Meter;
import com.lexicalscope.jewel.cli.Option;

/**
 * Munges a Wikidata RDF dump so that it can be loaded in a single import.
 */
@SuppressWarnings("checkstyle:classfanoutcomplexity")
public class MungeLang implements Runnable {
    private static final Logger log = LoggerFactory.getLogger(MungePart.class);

    /**
     * CLI options for use with JewelCli.
     */
    @SuppressWarnings("checkstyle:javadocmethod")
    public interface Options extends BasicOptions, WikibaseOptions {
        @Option(shortName = "f", defaultValue = "-", description = "Source file (or uri) to munge. Default is - aka stdin.")
        String from();

        @Option(shortName = "t", defaultValue = "-", description = "Destination of munge. Use port:<port_number> to start an "
                + "http server on that port. Default is - aka stdout. If the file's parent directories don't exist then they "
                + "will be created ala mkdir -p.")
        String to();

        @Option(defaultValue = "0", description = "Chunk size in entities. If specified then the \"to\" option must be a java "
                + "format string containing a single format identifier which is replaced with the chunk number port:<port_numer>. "
                + "%08d.ttl is a pretty good choice for format string. If \"to\" is in port form then every http request will "
                + "get the next chunk. Must be greater than 0 and less than " + Integer.MAX_VALUE + ".")
        int chunkSize();
    }

    /**
     * Run a bulk munge configured from the command line.
     */
    @SuppressWarnings("checkstyle:illegalcatch")
    public static void main(String[] args) {
        Options options = handleOptions(Options.class, args);
        WikibaseUris uris = new WikibaseUris(options.wikibaseHost());
        OutputPicker<Writer> to;
        try {
            if (options.chunkSize() > 0) {
                to = new ChunkedFileWriterOutputPicker(options.to(), options.chunkSize());
            } else {
                to = new AlwaysOutputPicker<>(CliUtils.writer(options.to()));
            }
        } catch (IOException e) {
            log.error("Error finding output", e);
            System.exit(1);
            return;
        }
        try {
            MungeLang munge = new MungeLang(uris, openInput(options.from()), to);
            munge.run();
        } catch (RuntimeException e) {
            log.error("Fatal error munging RDF", e);
            System.exit(1);
        }
    }

    /**
     * Open the input using the "from" parameter, exiting on failure.
     */
    private static Reader openInput(String from) {
        try {
            return CliUtils.reader(from);
        } catch (IOException e) {
            log.error("Error finding input", e);
            System.exit(1);
            return null;
        }
    }

    /**
     * Uris for this wikibase instance. Used to match the rdf as its read.
     */
    private final WikibaseUris uris;
    /**
     * Source of the rdf.
     */
    private final Reader from;
    /**
     * Where the munged RDF is synced.
     */
    private final OutputPicker<Writer> to;

    public MungeLang(WikibaseUris uris, Reader from, OutputPicker<Writer> to) {
        this.uris = uris;
        this.from = from;
        this.to = to;
    }

    @Override
    public void run() {
        try {
            // TODO this is a temporary hack
            // RDFParser parser = Rio.createParser(RDFFormat.TURTLE);
            RDFParser parser = new ForbiddenOk.HackedTurtleParser();
            OutputPicker<RDFHandler> writer = new WriterToRDFWriterChunkPicker(to);
            EntityMungingRdfHandler handler = new EntityMungingRdfHandler(uris, writer);
            parser.setRDFHandler(handler);
            try {
                parser.parse(from, uris.entity());
            } catch (RDFParseException | RDFHandlerException | IOException e) {
                throw new RuntimeException(e);
            }
        } finally {
            try {
                from.close();
            } catch (IOException e) {
                log.error("Error closing input", e);
            }
            try {
                to.output().close();
            } catch (IOException e) {
                log.error("Error closing output", e);
            }
        }
    }

    /**
     * Collects statements about entities until it hits the next entity or the
     * end of the file, munges those statements, and then passes them to the
     * next handler. Note that this relies on the order of the data in the file
     * to be like:
     * <ul>
     * <li>http://www.wikidata.org/wiki/Special:EntityData/EntityId ?p ?o .
     * <li>everything about EntityId
     * <li>http://www.wikidata.org/wiki/Special:EntityData/NextEntityId ?p ?o .
     * <li>etc
     * </ul>
     * This is how the files are built so that is OK.
     */
    private static class EntityMungingRdfHandler implements RDFHandler {
        /**
         * Uris for this instance of wikibase. We match on these.
         */
        private final WikibaseUris uris;
        /**
         * The place where we sync munged entities.
         */
        private final OutputPicker<RDFHandler> next;
        /**
         * The statements about the current entity.
         */
        private final List<Statement> statements = new ArrayList<>();
        /**
         * Meter measuring the number of entities we munge in grand load average
         * style.
         */
        private final Meter entitiesMeter = new Meter();
        /**
         * The current entity being read. When we hit a new entity we start send
         * the old statements to the munger and then sync them to next.
         */
        private String entityId;

        public EntityMungingRdfHandler(WikibaseUris uris, OutputPicker<RDFHandler> next) {
            this.uris = uris;
            this.next = next;
        }

        @Override
        public void startRDF() throws RDFHandlerException {
            next.output().startRDF();
        }

        @Override
        public void handleNamespace(String prefix, String uri) throws RDFHandlerException {
            // Namespaces go through to the next handler.
            next.output().handleNamespace(prefix, uri);
        }

        @Override
        public void handleComment(String comment) throws RDFHandlerException {
            // Comments go right through to the next handler.
            next.output().handleComment(comment);
        }

        /**
         * Language pattern.
         */
        private static final Pattern LANGUAGE_PATTERN = Pattern.compile("^[a-z0-9-]+$");

        /**
         * Last statement.
         */
        private Statement lastStatement;

        /**
         * Get last statement.
         * @return Statement
         */
        public Statement getLast() {
            return lastStatement;
        }

        @Override
        public void handleStatement(Statement statement) throws RDFHandlerException {
            lastStatement = statement;
            final Value object = statement.getObject();
            if (object instanceof Literal) {
                final String language = ((Literal) object).getLanguage();
                if (language != null) {
                    Matcher m = LANGUAGE_PATTERN.matcher(language);
                    if (!m.matches()) {
                        statements.add(statement);
                        munge();
                    }
                }
            }
        }

        @Override
        public void endRDF() throws RDFHandlerException {
            munge();
            next.output().endRDF();
        }

        /**
         * Munge an entity's worth of RDF and then sync it the the output.
         *
         * @throws RDFHandlerException if there is an error syncing it
         */
        private void munge() throws RDFHandlerException {
            try {
                for (Statement statement : statements) {
                    next.output().handleStatement(statement);
                }
                entitiesMeter.mark();
                if (entitiesMeter.getCount() % 10000 == 0) {
                    log.info("Processed {} entities at ({}, {}, {})", entitiesMeter.getCount(),
                            (long) entitiesMeter.getOneMinuteRate(), (long) entitiesMeter.getFiveMinuteRate(),
                            (long) entitiesMeter.getFifteenMinuteRate());
                }
                next.entitiesMunged((int) entitiesMeter.getCount());

            } catch (ContainedException e) {
                log.warn("Error munging {}", entityId, e);
            }
            statements.clear();
        }
    }

    /**
     * Picks the right RDFHandler for writing.
     */
    public interface OutputPicker<T> {
        /**
         * Get the handler to write to.
         */
        T output();

        /**
         * Update the number of entities already handled.
         */
        void entitiesMunged(int entitiesMunged);
    }

    /**
     * An output picker that always returns one output.
     */
    public static class AlwaysOutputPicker<T> implements OutputPicker<T> {
        /**
         * The output to return.
         */
        private final T next;

        public AlwaysOutputPicker(T next) {
            this.next = next;
        }

        @Override
        public T output() {
            return next;
        }

        @Override
        public void entitiesMunged(int entitiesMunged) {
            // Intentionally do nothing
        }
    }

    /**
     * Output picker that starts new chunks after processing so many entities.
     */
    private abstract static class ChunkedWriterOutputPicker implements OutputPicker<Writer> {
        /**
         * The number of entities per writer.
         */
        private final int chunkSize;
        /**
         * Writer returned by output(). Initialized on first call to output.
         */
        private Writer writer;
        /**
         * The chunk number that writer was built for.
         */
        private int lastChunk = 1;

        public ChunkedWriterOutputPicker(int chunkSize) {
            this.chunkSize = chunkSize;
        }

        @Override
        public Writer output() {
            if (writer == null) {
                writer = buildWriter(lastChunk);
            }
            return writer;
        }

        @Override
        public void entitiesMunged(int entitiesMunged) {
            int currentChunk = entitiesMunged / chunkSize + 1;
            if (lastChunk != currentChunk) {
                lastChunk = currentChunk;
                writer = buildWriter(lastChunk);
            }
        }

        /**
         * Build the next writer.
         */
        protected abstract Writer buildWriter(long chunk);
    }

    /**
     * OutputPicker that writes to files.
     */
    public static class ChunkedFileWriterOutputPicker extends ChunkedWriterOutputPicker {
        /**
         * Pattern for file names.
         */
        private final String pattern;

        public ChunkedFileWriterOutputPicker(String pattern, int chunkSize) {
            super(chunkSize);
            this.pattern = pattern;
        }

        @Override
        protected Writer buildWriter(long chunk) {
            String file = String.format(Locale.ROOT, pattern, chunk);
            log.info("Switching to {}", file);
            try {
                return CliUtils.writer(file);
            } catch (IOException e) {
                throw new RuntimeException("Error switching chunks", e);
            }
        }
    }

    /**
     * OutputPicker writes to PipedOutput stream and throws the corresponding
     * PipedInputStreams on a BlockingQueue.
     */
    public static class ChunkedPipedWriterOutputPicker extends ChunkedWriterOutputPicker {
        /**
         * Queue to hold readable results streams.
         */
        private final BlockingQueue<InputStream> queue;

        public ChunkedPipedWriterOutputPicker(BlockingQueue<InputStream> queue, int chunkSize) {
            super(chunkSize);
            this.queue = queue;
        }

        @Override
        protected Writer buildWriter(long chunk) {
            PipedInputStream toQueue = new PipedInputStream();
            try {
                queue.put(toQueue);
                return utf8(new PipedOutputStream(toQueue));
            } catch (InterruptedException | IOException e) {
                throw new RuntimeException("Error switching chunks", e);
            }
        }
    }

    /**
     * Adapts an OutputPicker for writers to one for RDFHandlers, taking care to
     * always add all the prefixes.
     */
    private static class WriterToRDFWriterChunkPicker implements OutputPicker<RDFHandler> {
        /**
         * Map containing prefixes that have been written to any RDFHandler that
         * we then write to all the next handlers.
         */
        private final Map<String, String> prefixes = new LinkedHashMap<String, String>();
        /**
         * The output picker for the writers.
         */
        private final OutputPicker<Writer> next;
        /**
         * The lastWriter used to build the RDFHandler. If it changes we build a
         * new RDFHandler.
         */
        private Writer lastWriter;
        /**
         * The current RDFHandler to write to.
         */
        private RDFHandler handler;

        public WriterToRDFWriterChunkPicker(OutputPicker<Writer> next) {
            this.next = next;
            lastWriter = next.output();
            try {
                setHandlerFromLastWriter();
            } catch (RDFHandlerException e) {
                throw new RuntimeException("Error setting up first rdf writer", e);
            }
        }

        @Override
        public RDFHandler output() {
            Writer nextWriter = next.output();
            if (nextWriter == lastWriter) {
                return handler;
            }
            try {
                /*
                 * When we hit a new chunk we have to terminate rdf and start it
                 * on the next chunk.
                 */
                handler.endRDF();
                lastWriter.close();
                lastWriter = nextWriter;
                setHandlerFromLastWriter();
                handler.startRDF();
            } catch (RDFHandlerException | IOException e) {
                throw new RuntimeException("Error switching chunks", e);
            }
            return handler;
        }

        @Override
        public void entitiesMunged(int entitiesMunged) {
            next.entitiesMunged(entitiesMunged);
        }

        /**
         * Set the next handler from the lastWriter field.
         *
         * @throws RDFHandlerException if the handler throws it while
         *             initializing
         */
        private void setHandlerFromLastWriter() throws RDFHandlerException {
            final RDFWriter writer = Rio.createWriter(RDFFormat.TURTLE, lastWriter);
            final WriterConfig config = writer.getWriterConfig();
            config.set(BasicWriterSettings.PRETTY_PRINT, false);
            handler = new PrefixRecordingRdfHandler(writer, prefixes);
            for (Map.Entry<String, String> prefix : prefixes.entrySet()) {
                handler.handleNamespace(prefix.getKey(), prefix.getValue());
            }
        }
    }

    /**
     * We need access to getMessage from exceptions. This is brittle but
     * (hopefully) temporary.
     */
    private static class ForbiddenOk {
        /**
         * TurtleParser that tries to recover from errors we see in wikibase.
         */
        private static class HackedTurtleParser extends TurtleParser {
            @Override
            protected URI parseURI() throws IOException, RDFParseException {
                try {
                    return super.parseURI();
                } catch (RDFParseException e) {
                    if (e.getMessage().startsWith("IRI includes string escapes: ")
                            || e.getMessage().startsWith("IRI included an unencoded space: '32'")) {
                        log.warn("Attempting to recover from", e);
                        if (!e.getMessage().startsWith("IRI includes string escapes: '\\62'")) {
                            while (readCodePoint() != '>') {
                                /*
                                 * Dump until the end of the uri.
                                 */
                            }
                        }
                        return super.resolveURI("http://example.com/error");
                    }
                    throw e;
                }
            }

            @Override
            protected void parseStatement() throws IOException, RDFParseException, RDFHandlerException {
                try {
                    super.parseStatement();
                } catch (RDFParseException e) {
                    log.warn("Exception after statement: {} {}", ((EntityMungingRdfHandler)rdfHandler).getLast(), e);
                    while (readCodePoint() != '\n') {
                            /*
                             * Just dump the rest of the line. Hopefully that'll
                             * be enough to recover.
                             */
                    }
                }
            }
        }
    }
}
