/* LanguageTool, a natural language style checker 
 * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */

package org.languagetool.rules.de;

import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Language;
import org.languagetool.UserConfig;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;

import java.util.*;

/**
 */
public class UncommonTagSequenceFakeRule extends Rule {
  
  public UncommonTagSequenceFakeRule(ResourceBundle messages, Language lang, UserConfig userConfig) {
    super(messages);
    //super.setCategory(new Category(new CategoryId("TEXT_ANALYSIS"), "Textanalyse", Location.INTERNAL, false));
  }

  @Override
  public String getId() {
    return "DE_UNCOMMON_TAG_SEQ";
  }

  @Override
  public String getDescription() {
    return "FIXME";
  }

  Map<String, Integer> tagSeq2MatchCount = new HashMap<>();
  int sentCount = 0;

  @Override
  public RuleMatch[] match(AnalyzedSentence sentence) {
    AnalyzedTokenReadings[] tokens = getSentenceWithImmunization(sentence).getTokensWithoutWhitespace();
    AnalyzedTokenReadings prevPrevToken = null;
    AnalyzedTokenReadings prevToken = null;
    List<RuleMatch> matches = new ArrayList<>();
    
    //List<String> tags = Arrays.asList("ADJ", "ADV", "ART", "EIG", "KON", "NEG", "PA1", "PA2", "PRO", "PRP", "SUB", "VER");
    List<String> tags = Arrays.asList(
      "ADJ:AKK",
      "ADJ:DAT",
      "ADJ:GEN",
      "ADJ:NOM",
      "ADJ:PRD",
      "ADV:",
      "ADV:CAU",
      "ADV:INR",
      "ADV:INR+PRO",
      "ADV:LOK",
      "ADV:LOK+CAU",
      "ADV:LOK+MOD",
      "ADV:LOK+PRO",
      "ADV:MOD",
      "ADV:MOD+INR",
      "ADV:MOD+PRO",
      "ADV:MOD+TMP",
      "ADV:MOD+TMP+LOK",
      "ADV:PRO",
      "ADV:PRO+CAU",
      "ADV:PRO+TMP",
      "ADV:TMP",
      "ADV:TMP+LOK+MOD",
      "ADV:TMP+MOD",
      "ART:DEF",
      "ART:IND",
      "EIG:AKK",
      "EIG:DAT",
      "EIG:GEN",
      "EIG:NOM",
      "KON:INF",
      "KON:NEB",
      "KON:PRI",
      "KON:UNT",
      "KON:VGL",
      "NEG:",
      "PA1:AKK",
      "PA1:DAT",
      "PA1:GEN",
      "PA1:NOM",
      "PA1:PRD",
      "PA2:AKK",
      "PA2:DAT",
      "PA2:GEN",
      "PA2:NOM",
      "PA2:PRD",
      "PRO:DEM",
      "PRO:IND",
      "PRO:PER",
      "PRO:POS",
      "PRO:REF",
      "PRO:RIN",
      "PRP:CAU",
      "PRP:CAU+MOD",
      "PRP:DAT",
      "PRP:LOK",
      "PRP:LOK+CAU",
      "PRP:LOK",
      "PRP:LOK+MOD+CAU+TMP",
      "PRP:LOK+MOD",
      "PRP:LOK+TMP+CAU",
      "PRP:LOK+TMP+CAU+MOD",
      "PRP:LOK+TMP",
      "PRP:LOK+TMP+MOD",
      "PRP:LOK+TMP+MOD+CAU",
      "PRP:LOK+TMP+MOD",
      "PRP:MOD",
      "PRP:NOM+AKK",
      "PRP:NOM+DAT+AKK",
      "PRP:PRO",
      "PRP:TMP",
      "PRP:TMP+LOK",
      "PRP:TMP+MOD+CAU",
      "SUB:AKK",
      "SUB:DAR",
      "SUB:DAT",
      "SUB:GEN",
      "SUB:NOM",
      "VER:1",
      "VER:2",
      "VER:3",
      "VER:AUX",
      "VER:EIZ",
      "VER:IMP",
      "VER:INF",
      "VER:MOD",
      "VER:PA1",
      "VER:PA2",
      "ZAL:",
      "ZUS:"
    );
    List<List<String>> tagCombinations = new ArrayList<>();
    for (String tag1 : tags) {
      for (String tag2 : tags) {
        tagCombinations.add(Arrays.asList(tag1, tag2));
      }
    }
    /*for (String tag1 : tags) {
      for (String tag2 : tags) {
        for (String tag3 : tags) {
          tagCombinations.add(Arrays.asList(tag1, tag2, tag3));
        }
      }
    }*/
    /*for (List<String> strings : tagCombinations2) {
      System.out.println(strings);
    }*/
    
    for (AnalyzedTokenReadings token : tokens) {
      // TODO: ngrams prüfen -> wenn vorhanden, dann eher kein Fehler?!
      // TODO: alle Kombinationen prüfen
      for (List<String> tmpTags : tagCombinations) {
        // 2 sequence:
        if (token.hasPosTagStartingWith(tmpTags.get(1)) && prevToken != null && prevToken.hasPosTagStartingWith(tmpTags.get(0))) {
          String key = tmpTags.toString();
          tagSeq2MatchCount.merge(key, 1, (a, b) -> a + b);
        }
        // 3 sequence:
        /*if (token.hasPosTagStartingWith(tmpTags.get(2)) && prevToken != null && prevToken.hasPosTagStartingWith(tmpTags.get(1)) &&
            prevPrevToken != null && prevPrevToken.hasPosTagStartingWith(tmpTags.get(0))) {
          String key = tmpTags.toString();
          tagSeq2MatchCount.merge(key, 1, (a, b) -> a + b);
        }*/
      }
      prevPrevToken = prevToken;
      prevToken = token;
    }
    sentCount++;
    if (sentCount % 1000 == 0) {
      System.out.println("----------- (" + sentCount + ", start)");
      int i = 0;
      for (List<String> list : tagCombinations) {
        Integer ct = tagSeq2MatchCount.get(list.toString());
        if (ct == null || ct < 5) {
          System.out.println(list + " => " + ct);
          i++;
        }
      }
      System.out.println("----------- (" + sentCount + ", end, " + i + " sequences)");
    }
    return toRuleMatchArray(matches);
  }

  //@Override
  public RuleMatch[] matchOLD(AnalyzedSentence sentence) {
    AnalyzedTokenReadings[] tokens = getSentenceWithImmunization(sentence).getTokensWithoutWhitespace();
    AnalyzedTokenReadings prevPrevToken = null;
    AnalyzedTokenReadings prevToken = null;
    List<RuleMatch> matches = new ArrayList<>();

    List<String> tags = Arrays.asList("ABK", "ADJ", "ADV");
    List<List<String>> tagCombinations2 = new ArrayList<>();
    for (String tag1 : tags) {
      for (String tag2 : tags) {
        tagCombinations2.add(Arrays.asList(tag1, tag2));
      }
    }
    Map<String, Integer> tag2Count = new HashMap<>();
    for (List<String> strings : tagCombinations2) {
      System.out.println(strings);
    }
    System.exit(1);

    for (AnalyzedTokenReadings token : tokens) {
      // TODO: ngrams prüfen -> wenn vorhanden, dann eher kein Fehler?!
      // TODO: alle Kombinationen prüfen

      // 2 sequence:
      //String tag = "ABK";  //  2 FAs per 100K sentences ***??
      //String tag = "ADJ";  //  / FAs per 100K sentences
      //String tag = "ADV";  //  / FAs per 100K sentences
      //String tag = "ART";  //  + FAs per 100K sentences
      //String tag = "EIG";  //  / FAs per 100K sentences
      //String tag = "INJ";  //  0 FAs per 100K sentences ***
      //String tag = "KON";  //  / FAs per 100K sentences
      //String tag = "NEG";  //  0 FAs per 100K sentences ***
      //String tag = "PA1";  //  7 FAs per 100K sentences
      //String tag = "PA2";  //  137 FAs per 100K sentences
      //String tag = "PRO";  //  + FAs per 100K sentences
      //String tag = "PRP";  //  / FAs per 100K sentences
      //String tag = "SUB";  //  / FAs per 100K sentences
      //String tag = "VER";  //  / FAs per 100K sentences
      String tag = "ZAL";  //  3 FAs per 100K sentences
      //String tag = "ZUS";  //  / FAs per 100K sentences

      // 3 sequence:
      //String tag = "ABK";  // 3: 0 FAs per 100K sentences ***
      //String tag = "ADJ";  // 3: + FAs per 100K sentences
      //String tag = "ADV";  // 3: + FAs per 100K sentences
      //String tag = "ART";  // 3: 4 FAs per 100K sentences
      //String tag = "EIG";  // 3: + FAs per 100K sentences
      //String tag = "INJ";  // 3: 0 FAs per 100K sentences ***
      //String tag = "KON";  // 3: 113 FAs per 100K sentences
      //String tag = "NEG";  // 3: 0 FAs per 100K sentences *** 
      //String tag = "PA1";  // 3: 0 FAs per 100K sentences ***
      //String tag = "PA2";  // 3: 0 FAs per 100K sentences ***
      //String tag = "PRO";  // 3: 627 FAs per 100K sentences
      //String tag = "PRP";  // 3: 55 FAs per 100K sentences
      //String tag = "SUB";  // 3: ... FAs per 100K sentences
      //String tag = "VER";  // 3: + FAs per 100K sentences
      //String tag = "ZAL";  // 3: 0 FAs per 100K sentences ***
      //String tag = "ZUS";  // 3: 62 FAs per 100K sentences

      // 2 sequence:
      if (token.hasPosTagStartingWith(tag) && prevToken != null && prevToken.hasPosTagStartingWith(tag)) {
        RuleMatch match = new RuleMatch(this, sentence, prevToken.getStartPos(), token.getEndPos(), "FIXME-MESSAGE");
        matches.add(match);
      }
      // 3 sequence:
      /*if (token.hasPosTagStartingWith(tag) && prevToken != null && prevToken.hasPosTagStartingWith(tag) &&
            prevPrevToken != null && prevPrevToken.hasPosTagStartingWith(tag)) {
        RuleMatch match = new RuleMatch(this, sentence, prevPrevToken.getStartPos(), token.getEndPos(), "FIXME-MESSAGE");
        matches.add(match);
      }*/
      prevPrevToken = prevToken;
      prevToken = token;
    }
    return toRuleMatchArray(matches);
  }

}
