package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.ByteArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.ShortArrayList;
import com.carrotsearch.hppc.cursors.IntCursor;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.stream.Stream;
import org.carrot2.clustering.Document;
import org.carrot2.language.Tokenizer;
import org.carrot2.util.CharArrayUtils;
import org.carrot2.util.MutableCharArray;
import org.carrot2.util.StringUtils;

/* loaded from: input_file:org/carrot2/text/preprocessing/InputTokenizer.class */
final class InputTokenizer {
    private ArrayList<char[]> images;
    private ShortArrayList tokenTypes;
    private IntArrayList documentIndices;
    private ByteArrayList fieldIndices;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/carrot2/text/preprocessing/InputTokenizer$FieldValue.class */
    public static class FieldValue {
        String field;
        String value;

        public FieldValue(String str, String str2) {
            this.field = str;
            this.value = str2;
        }
    }

    public void tokenize(PreprocessingContext preprocessingContext, Stream<? extends Document> stream) {
        this.images = new ArrayList<>();
        this.tokenTypes = new ShortArrayList();
        this.documentIndices = new IntArrayList();
        this.fieldIndices = new ByteArrayList();
        Tokenizer tokenizer = (Tokenizer) preprocessingContext.languageComponents.get(Tokenizer.class);
        MutableCharArray mutableCharArray = new MutableCharArray(CharArrayUtils.EMPTY_ARRAY);
        HashMap hashMap = new HashMap();
        ArrayList arrayList = new ArrayList();
        IntCursor intCursor = new IntCursor();
        stream.forEachOrdered(document -> {
            short nextToken;
            int i = intCursor.value;
            if (i > 0) {
                addDocumentSeparator();
            }
            arrayList.clear();
            document.visitFields((str, str2) -> {
                if (StringUtils.isNullOrEmpty(str2)) {
                    return;
                }
                arrayList.add(new FieldValue(str, str2));
            });
            boolean z = false;
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                FieldValue fieldValue = (FieldValue) it.next();
                int intValue = ((Integer) hashMap.computeIfAbsent(fieldValue.field, str3 -> {
                    return Integer.valueOf(hashMap.size());
                })).intValue();
                if (intValue > 127) {
                    throw new RuntimeException("Too many fields (>" + intValue + ")");
                }
                String str4 = fieldValue.value;
                if (!StringUtils.isNullOrEmpty(str4)) {
                    try {
                        tokenizer.reset(new StringReader(str4));
                        short nextToken2 = tokenizer.nextToken();
                        short s = nextToken2;
                        if (nextToken2 != -1) {
                            if (z) {
                                addFieldSeparator(i);
                            }
                            do {
                                tokenizer.setTermBuffer(mutableCharArray);
                                add(i, (byte) intValue, preprocessingContext.intern(mutableCharArray), s);
                                nextToken = tokenizer.nextToken();
                                s = nextToken;
                            } while (nextToken != -1);
                            z = true;
                        }
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }
            }
            intCursor.value++;
        });
        addTerminator();
        String[] strArr = new String[hashMap.size()];
        hashMap.forEach((str, num) -> {
            strArr[num.intValue()] = str;
        });
        preprocessingContext.documentCount = intCursor.value;
        preprocessingContext.allTokens.documentIndex = this.documentIndices.toArray();
        preprocessingContext.allTokens.fieldIndex = this.fieldIndices.toArray();
        preprocessingContext.allTokens.image = (char[][]) this.images.toArray((Object[]) new char[this.images.size()]);
        preprocessingContext.allTokens.type = this.tokenTypes.toArray();
        preprocessingContext.allFields.name = strArr;
        this.images = null;
        this.fieldIndices = null;
        this.tokenTypes = null;
        this.documentIndices = null;
    }

    void addTerminator() {
        add(-1, (byte) -1, null, (short) 2048);
    }

    void addDocumentSeparator() {
        add(-1, (byte) -1, null, (short) 512);
    }

    void addFieldSeparator(int i) {
        add(i, (byte) -1, null, (short) 1024);
    }

    void add(int i, byte b, char[] cArr, short s) {
        this.documentIndices.add(i);
        this.fieldIndices.add(b);
        this.images.add(cArr);
        this.tokenTypes.add(s);
    }
}
