package de.jplag.text;

import de.jplag.AbstractParser;
import de.jplag.ParsingException;
import de.jplag.Token;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Set;

/* loaded from: input_file:de/jplag/text/ParserAdapter.class */
public class ParserAdapter extends AbstractParser {
    private static final char LF = '\n';
    private static final char CR = '\r';
    private static final String ANNOTATORS_KEY = "annotators";
    private static final String ANNOTATORS_VALUE = "tokenize";
    private final StanfordCoreNLP pipeline;
    private List<Token> tokens;
    private File currentFile;
    private int currentLine;
    private int currentLineBreakIndex;

    public ParserAdapter() {
        Properties properties = new Properties();
        properties.put(ANNOTATORS_KEY, ANNOTATORS_VALUE);
        this.pipeline = new StanfordCoreNLP(properties);
    }

    public List<Token> parse(Set<File> set) throws ParsingException {
        this.tokens = new ArrayList();
        for (File file : set) {
            this.logger.trace("Parsing file {}", file);
            parseFile(file);
            this.tokens.add(Token.fileEnd(file));
        }
        return this.tokens;
    }

    private void parseFile(File file) throws ParsingException {
        this.currentFile = file;
        this.currentLine = 1;
        this.currentLineBreakIndex = 0;
        String readFile = readFile(file);
        int i = 0;
        for (CoreLabel coreLabel : this.pipeline.processToCoreDocument(readFile).tokens()) {
            advanceLineBreaks(readFile, i, coreLabel.beginPosition());
            i = coreLabel.endPosition();
            if (isWord(coreLabel)) {
                addToken(coreLabel);
            }
        }
    }

    private void advanceLineBreaks(String str, int i, int i2) {
        int i3 = i;
        while (i3 < i2) {
            if (str.charAt(i3) == LF) {
                this.currentLine++;
                this.currentLineBreakIndex = i3;
            } else if (str.charAt(i3) == CR) {
                if (i3 + 1 < str.length() && str.charAt(i3 + 1) == LF) {
                    i3++;
                }
                this.currentLine++;
                this.currentLineBreakIndex = i3;
            }
            i3++;
        }
    }

    private boolean isWord(CoreLabel coreLabel) {
        return coreLabel.originalText().chars().anyMatch(i -> {
            return Character.isAlphabetic(i) || Character.isDigit(i);
        });
    }

    private void addToken(CoreLabel coreLabel) {
        String originalText = coreLabel.originalText();
        this.tokens.add(new Token(new TextTokenType(originalText), this.currentFile, this.currentLine, coreLabel.beginPosition() - this.currentLineBreakIndex, coreLabel.endPosition() - coreLabel.beginPosition()));
    }

    private String readFile(File file) throws ParsingException {
        try {
            return Files.readString(file.toPath());
        } catch (IOException e) {
            throw new ParsingException(file, e.getMessage(), e);
        }
    }
}
