package org.fnlp.nlp.pipe;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.fnlp.ml.types.Instance;
import org.fnlp.util.exception.UnsupportedDataTypeException;

/* loaded from: input_file:org/fnlp/nlp/pipe/NGram.class */
public class NGram extends Pipe {
    private static final long serialVersionUID = -2329969202592736092L;
    int[] gramSizes;

    public NGram(int[] iArr) {
        this.gramSizes = null;
        this.gramSizes = iArr;
    }

    @Override // org.fnlp.nlp.pipe.Pipe
    public void addThruPipe(Instance instance) throws UnsupportedDataTypeException {
        ArrayList<String> ngram;
        Object data = instance.getData();
        if (data instanceof String) {
            ngram = ngram((String) data, this.gramSizes);
        } else if (data instanceof List) {
            ngram = ngram((List) data, this.gramSizes);
        } else {
            if (!(data instanceof String[])) {
                throw new UnsupportedDataTypeException("不支持处理数据类型：" + data.getClass().toString());
            }
            ngram = ngram((String[]) data, this.gramSizes);
        }
        instance.setData(ngram);
    }

    private ArrayList<String> ngram(String[] strArr, int[] iArr) {
        ArrayList<String> arrayList = new ArrayList<>();
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < this.gramSizes.length; i++) {
            int i2 = this.gramSizes[i];
            if (i2 > 0 && i2 <= strArr.length) {
                for (int i3 = 0; i3 < (strArr.length - i2) + 1; i3++) {
                    stringBuffer.delete(0, stringBuffer.length());
                    int i4 = 0;
                    while (i4 < i2 - 1) {
                        stringBuffer.append(strArr[i3 + i4]);
                        stringBuffer.append(' ');
                        i4++;
                    }
                    stringBuffer.append(strArr[i3 + i4]);
                    arrayList.add(stringBuffer.toString().intern());
                }
            }
        }
        return arrayList;
    }

    private ArrayList<String> ngram(List list, int[] iArr) {
        ArrayList<String> arrayList = new ArrayList<>();
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < this.gramSizes.length; i++) {
            int i2 = this.gramSizes[i];
            if (i2 > 0 && i2 <= list.size()) {
                for (int i3 = 0; i3 < (list.size() - i2) + 1; i3++) {
                    stringBuffer.delete(0, stringBuffer.length());
                    int i4 = 0;
                    while (i4 < i2 - 1) {
                        stringBuffer.append(list.get(i3 + i4));
                        stringBuffer.append(' ');
                        i4++;
                    }
                    stringBuffer.append(list.get(i3 + i4));
                    arrayList.add(stringBuffer.toString().intern());
                }
            }
        }
        return arrayList;
    }

    public static ArrayList<String> ngram(String str, int[] iArr) {
        ArrayList<String> arrayList = new ArrayList<>();
        ngram(str, iArr, arrayList);
        return arrayList;
    }

    public static Set<String> ngramSet(String str, int[] iArr) {
        HashSet hashSet = new HashSet();
        ngram(str, iArr, hashSet);
        return hashSet;
    }

    private static void ngram(String str, int[] iArr, Collection<String> collection) {
        for (int i : iArr) {
            if (i > 0 && i <= str.length()) {
                for (int i2 = 0; i2 < str.length() - i; i2++) {
                    collection.add(str.substring(i2, i2 + i));
                }
            }
        }
    }
}
