package eu.crydee.stanfordcorenlp;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Paths;
import java.util.List;
import java.util.Properties;
import java.util.stream.Collectors;
import net.sourceforge.argparse4j.ArgumentParsers;
import net.sourceforge.argparse4j.annotation.Arg;
import net.sourceforge.argparse4j.inf.ArgumentParser;
import net.sourceforge.argparse4j.inf.ArgumentParserException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.tuple.Pair;

/* loaded from: input_file:eu/crydee/stanfordcorenlp/Tokenizer.class */
public class Tokenizer {
    protected StanfordCoreNLP pipelineWithSS;
    protected StanfordCoreNLP pipelineWithoutSS;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:eu/crydee/stanfordcorenlp/Tokenizer$Params.class */
    public static class Params {

        @Arg(dest = "input_dir")
        public String inDirPath;

        @Arg(dest = "output_dir")
        public String outDirpath;

        private Params() {
        }
    }

    public Tokenizer() {
        Properties properties = new Properties();
        properties.setProperty("annotators", "tokenize, ssplit");
        this.pipelineWithSS = new StanfordCoreNLP(properties);
        Properties properties2 = new Properties();
        properties2.setProperty("annotators", "tokenize");
        this.pipelineWithoutSS = new StanfordCoreNLP(properties2);
    }

    public String tokenizeAndSentenceSplit(String str) {
        Annotation annotation = new Annotation(str);
        this.pipelineWithSS.annotate(annotation);
        return (String) ((List) annotation.get(CoreAnnotations.SentencesAnnotation.class)).stream().map(coreMap -> {
            return (String) ((List) coreMap.get(CoreAnnotations.TokensAnnotation.class)).stream().map(coreLabel -> {
                return (String) coreLabel.get(CoreAnnotations.TextAnnotation.class);
            }).collect(Collectors.joining(" "));
        }).collect(Collectors.joining("\n"));
    }

    public String tokenize(String str) {
        Annotation annotation = new Annotation(str);
        this.pipelineWithoutSS.annotate(annotation);
        return (String) ((List) annotation.get(CoreAnnotations.TokensAnnotation.class)).stream().map(coreLabel -> {
            return (String) coreLabel.get(CoreAnnotations.TextAnnotation.class);
        }).collect(Collectors.joining(" "));
    }

    public static void main(String[] strArr) {
        ArgumentParser description = ArgumentParsers.newArgumentParser("stanford-corenlp-tokenizer-wrapper").description("Converts Mediawiki dumps to text.");
        description.addArgument(new String[]{"-i", "--input-dir"}).required(true).help("Path of the input text files directory.");
        description.addArgument(new String[]{"-o", "--output-dir"}).help("Path of the output text files directory.").setDefault("out");
        Params params = new Params();
        try {
            description.parseArgs(strArr, params);
        } catch (ArgumentParserException e) {
            System.err.println("Could not parse arguments: " + e.getMessage());
            System.exit(1);
        }
        Tokenizer tokenizer = new Tokenizer();
        try {
            Files.list(Paths.get(params.inDirPath, new String[0])).filter(path -> {
                return Files.isRegularFile(path, new LinkOption[0]);
            }).map((v0) -> {
                return v0.toFile();
            }).map(file -> {
                try {
                    return Pair.of(file.getName(), FileUtils.readFileToString(file, StandardCharsets.UTF_8));
                } catch (IOException e2) {
                    System.err.println("Could not read input text file: " + e2.getLocalizedMessage());
                    throw new UncheckedIOException(e2);
                }
            }).forEach(pair -> {
                try {
                    FileUtils.writeStringToFile(Paths.get(params.outDirpath, (String) pair.getLeft()).toFile(), tokenizer.tokenizeAndSentenceSplit((String) pair.getRight()), StandardCharsets.UTF_8);
                } catch (IOException e2) {
                    System.err.println("Could not write output text file: " + e2.getLocalizedMessage());
                }
            });
        } catch (IOException e2) {
            System.err.println("Could not read from input directory: " + e2.getLocalizedMessage());
        }
    }
}
