package morfologik.fsa;

import static morfologik.fsa.FSAFlags.*;
import static morfologik.util.FileUtils.readFully;

import java.io.IOException;
import java.io.InputStream;
import java.util.*;

/**
 * FSA binary format implementation for version 5.
 * 
 * <p>
 * Version 5 indicates the dictionary was built with these flags:
 * {@link FSAFlags#FLEXIBLE}, {@link FSAFlags#STOPBIT} and
 * {@link FSAFlags#NEXTBIT}. The internal representation of the FSA must
 * therefore follow this description (please note this format describes only a
 * single transition (arc), not the entire dictionary file).
 * 
 * <pre>
 * ---- this node header present only if automaton was compiled with NUMBERS option.
 * Byte
 *        +-+-+-+-+-+-+-+-+\
 *      0 | | | | | | | | | \  LSB
 *        +-+-+-+-+-+-+-+-+  +
 *      1 | | | | | | | | |  |      number of strings recognized
 *        +-+-+-+-+-+-+-+-+  +----- by the automaton starting
 *        : : : : : : : : :  |      from this node.
 *        +-+-+-+-+-+-+-+-+  +
 *  ctl-1 | | | | | | | | | /  MSB
 *        +-+-+-+-+-+-+-+-+/
 *        
 * ---- remaining part of the node
 * 
 * Byte
 *       +-+-+-+-+-+-+-+-+\
 *     0 | | | | | | | | | +------ label
 *       +-+-+-+-+-+-+-+-+/
 * 
 *                  +------------- node pointed to is next
 *                  | +----------- the last arc of the node
 *                  | | +--------- the arc is final
 *                  | | |
 *             +-----------+
 *             |    | | |  |
 *         ___+___  | | |  |
 *        /       \ | | |  |
 *       MSB           LSB |
 *        7 6 5 4 3 2 1 0  |
 *       +-+-+-+-+-+-+-+-+ |
 *     1 | | | | | | | | | \ \
 *       +-+-+-+-+-+-+-+-+  \ \  LSB
 *       +-+-+-+-+-+-+-+-+     +
 *     2 | | | | | | | | |     |
 *       +-+-+-+-+-+-+-+-+     |
 *     3 | | | | | | | | |     +----- target node address (in bytes)
 *       +-+-+-+-+-+-+-+-+     |      (not present except for the byte
 *       : : : : : : : : :     |       with flags if the node pointed to
 *       +-+-+-+-+-+-+-+-+     +       is next)
 *   gtl | | | | | | | | |    /  MSB
 *       +-+-+-+-+-+-+-+-+   /
 * gtl+1                           (gtl = gotoLength)
 * </pre>
 */
public final class FSA5 extends FSA {
	/**
	 * Automaton version as in the file header.
	 */
	public static final byte VERSION = 5;
	
	/**
	 * Bit indicating that an arc corresponds to the last character of a
	 * sequence available when building the automaton.
	 */
	public static final int BIT_FINAL_ARC = 1 << 0;

	/**
	 * Bit indicating that an arc is the last one of the node's list and the
	 * following one belongs to another node.
	 */
	public static final int BIT_LAST_ARC = 1 << 1;

	/**
	 * Bit indicating that the target node of this arc follows it in the
	 * compressed automaton structure (no goto field).
	 */
	public static final int BIT_TARGET_NEXT = 1 << 2;

	/**
	 * An offset in the arc structure, where the address and flags field begins.
	 * In version 5 of FSA automata, this value is constant (1, skip label).
	 */
	public final static int ADDRESS_OFFSET = 1;

	/**
	 * An array of bytes with the internal representation of the automaton.
	 * Please see the documentation of this class for more information on how
	 * this structure is organized.
	 */
	public final byte[] arcs;

	/**
	 * The length of the node header structure (if the automaton was compiled with
	 * <code>NUMBERS</code> option). Otherwise zero.
	 */
	public final int nodeDataLength;

	/**
	 * Flags for this automaton version.
	 */
    private final Set<FSAFlags> flags;

    /**
     * Number of bytes each address takes in full, expanded form (goto length).
     */
	public final int gtl;

	/** Filler character. */
	public final byte filler;
	
	/** Annotation character. */
	public final byte annotation;

	/**
	 * Read and wrap a binary automaton in FSA version 5.
	 */
	public FSA5(InputStream fsaStream) throws IOException {
		// Read the header first.
		final FSAHeader header = FSAHeader.read(fsaStream);
		
		// Ensure we have version 5.
		if (header.version != VERSION) {
			throw new IOException("This class can read FSA version 5 only: " + header.version);
		}
		
		/*
		 * Determine if the automaton was compiled with NUMBERS. If so, modify
		 * ctl and goto fields accordingly.
		 */
		flags = EnumSet.of(FLEXIBLE, STOPBIT, NEXTBIT);
		if ((header.gtl & 0xf0) != 0) {
			this.nodeDataLength = (header.gtl >>> 4) & 0x0f;
			this.gtl = header.gtl & 0x0f;
			flags.add(NUMBERS);
		} else {
			this.nodeDataLength = 0;
			this.gtl = header.gtl & 0x0f;
		}
		
		this.filler = header.filler;
		this.annotation = header.annotation;

		arcs = readFully(fsaStream);		
	}

	/**
	 * Read the arc's layout and skip as many bytes, as needed.
	 */
	private int skipArc(int offset) {
		return offset + (isNextSet(offset) 
				? 1 + 1 /* label + flags */ 
				: 1 + gtl /* label + flags/address */);
	}

	/**
	 * Returns the start node of this automaton.
	 */
	public int getRootNode() {
		return getDestinationNodeOffset(getFirstArc(skipArc(nodeDataLength)));
	}

	/**
     * {@inheritDoc} 
     */
	public final int getFirstArc(int node) {
		return nodeDataLength + node;
	}

	/**
     * {@inheritDoc} 
     */
	public final int getNextArc(int arc) {
		if (isArcLast(arc))
			return 0;
		else
			return skipArc(arc);
	}

	/**
     * {@inheritDoc} 
     */
	public int getArc(int node, byte label) {
		for (int arc = getFirstArc(node); arc != 0; arc = getNextArc(arc)) {
			if (getArcLabel(arc) == label)
				return arc;
		}

		// An arc labeled with "label" not found.
		return 0;
	}

	/**
     * {@inheritDoc} 
     */
	public int getEndNode(int arc) {
		final int nodeOffset = getDestinationNodeOffset(arc);
		assert nodeOffset != 0 : "No target node for terminal arcs.";
		return nodeOffset;
	}

	/**
     * {@inheritDoc} 
     */
	public byte getArcLabel(int arc) {
		return arcs[arc];
	}

	/**
     * {@inheritDoc} 
     */
	public boolean isArcFinal(int arc) {
		return (arcs[arc + ADDRESS_OFFSET] & BIT_FINAL_ARC) != 0;
	}

	/**
     * {@inheritDoc} 
     */
	public boolean isArcTerminal(int arc) {
		return (0 == getDestinationNodeOffset(arc));
	}

	/**
	 * {@inheritDoc}
	 * 
	 * <p>For this automaton version, an additional {@link FSAFlags#NUMBERS} flag
	 * may be set to indicate the automaton contains extra fields for each node.</p>
	 */
	public Set<FSAFlags> getFlags() {
	    return Collections.unmodifiableSet(flags);
	}

	/**
	 * Returns <code>true</code> if this arc has <code>LAST</code> bit set.
	 * 
	 * @see #BIT_LAST_ARC
	 */
	public boolean isArcLast(int arc) {
		return (arcs[arc + ADDRESS_OFFSET] & BIT_LAST_ARC) != 0;
	}

	/**
	 * @see #BIT_TARGET_NEXT
	 */
	public boolean isNextSet(int arc) {
		return (arcs[arc + ADDRESS_OFFSET] & BIT_TARGET_NEXT) != 0;
	}

	/**
	 * Returns an n-byte integer encoded in byte-packed representation.
	 */
	static final int decodeFromBytes(
			final byte[] arcs, final int start, final int n)
	{
		int r = 0;
		for (int i = n; --i >= 0;) {
			r = r << 8 | (arcs[start + i] & 0xff);
		}
		return r;
	}

	/**
	 * Returns the address of the node pointed to by this arc.
	 */
	protected final int getDestinationNodeOffset(int arc) {
		if (isNextSet(arc)) {
			/* The destination node follows this arc in the array. */
			return skipArc(arc);
		} else {
			/*
			 * The destination node address has to be extracted from the arc's
			 * goto field.
			 */
			return decodeFromBytes(arcs, arc + ADDRESS_OFFSET, gtl) >>> 3;
		}
	}
}