/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.pipe;

import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SimpleTokenizer;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureCounter;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.Instance;
import gnu.trove.TIntIntHashMap;

public class FeatureDocFreqPipe
extends Pipe {
    FeatureCounter counter;
    int numInstances;
    static final long serialVersionUID = 1L;

    public FeatureDocFreqPipe() {
        super(new Alphabet(), null);
        this.counter = new FeatureCounter(this.getDataAlphabet());
        this.numInstances = 0;
    }

    public FeatureDocFreqPipe(Alphabet dataAlphabet, Alphabet targetAlphabet) {
        super(dataAlphabet, targetAlphabet);
        this.counter = new FeatureCounter(dataAlphabet);
        this.numInstances = 0;
    }

    @Override
    public Instance pipe(Instance instance) {
        TIntIntHashMap localCounter = new TIntIntHashMap();
        if (instance.getData() instanceof FeatureSequence) {
            FeatureSequence features = (FeatureSequence)instance.getData();
            int position = 0;
            while (position < features.size()) {
                localCounter.adjustOrPutValue(features.getIndexAtPosition(position), 1, 1);
                ++position;
            }
        } else {
            throw new IllegalArgumentException("Looking for a FeatureSequence, found a " + instance.getData().getClass());
        }
        int[] nArray = localCounter.keys();
        int n = nArray.length;
        int n2 = 0;
        while (n2 < n) {
            int feature = nArray[n2];
            this.counter.increment(feature);
            ++n2;
        }
        ++this.numInstances;
        return instance;
    }

    public void addPrunedWordsToStoplist(SimpleTokenizer tokenizer, double docFrequencyCutoff) {
        Alphabet currentAlphabet = this.getDataAlphabet();
        int feature = 0;
        while (feature < currentAlphabet.size()) {
            if ((double)this.counter.get(feature) / (double)this.numInstances > docFrequencyCutoff) {
                tokenizer.stop((String)currentAlphabet.lookupObject(feature));
            }
            ++feature;
        }
    }
}

