package org.embulk.guess.csv;

import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;
import org.embulk.config.ConfigDiff;
import org.embulk.config.ConfigSource;
import org.embulk.config.DataSource;
import org.embulk.parser.csv.CsvParserPlugin;
import org.embulk.spi.Buffer;
import org.embulk.spi.BufferAllocator;
import org.embulk.spi.Exec;
import org.embulk.spi.GuessPlugin;
import org.embulk.util.config.ConfigMapperFactory;
import org.embulk.util.csv.CsvTokenizer;
import org.embulk.util.csv.InvalidCsvQuotationException;
import org.embulk.util.csv.RecordDoesNotHaveExpectedColumnException;
import org.embulk.util.file.ListFileInput;
import org.embulk.util.guess.CharsetGuess;
import org.embulk.util.guess.GuesstimatedType;
import org.embulk.util.guess.LineGuessHelper;
import org.embulk.util.guess.NewlineGuess;
import org.embulk.util.guess.SchemaGuess;
import org.embulk.util.text.LineDecoder;
import org.embulk.util.text.LineDelimiter;
import org.embulk.util.text.Newline;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/embulk/guess/csv/CsvGuessPlugin.class */
public class CsvGuessPlugin implements GuessPlugin {
    private static final int MAX_SKIP_LINES = 10;
    private static final int NO_SKIP_DETECT_LINES = 10;
    private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build();
    private static final List<Character> DELIMITER_CANDIDATES = Collections.unmodifiableList(Arrays.asList(',', '\t', '|', ';'));
    private static final List<Character> QUOTE_CANDIDATES = Collections.unmodifiableList(Arrays.asList('\"', '\''));
    private static final List<String> ESCAPE_CANDIDATES = Collections.unmodifiableList(Arrays.asList("\\", "\""));
    private static final List<String> NULL_STRING_CANDIDATES = Collections.unmodifiableList(Arrays.asList("null", "NULL", "#N/A", "\\N"));
    private static final List<String> COMMENT_LINE_MARKER_CANDIDATES = Collections.unmodifiableList(Arrays.asList("#", "//"));
    private static final SchemaGuess SCHEMA_GUESS = SchemaGuess.of(CONFIG_MAPPER_FACTORY);
    private static final Logger logger = LoggerFactory.getLogger(CsvGuessPlugin.class);

    public ConfigDiff guess(ConfigSource configSource, Buffer buffer) {
        ConfigSource nestedOrGetEmpty = configSource.getNestedOrGetEmpty("parser");
        if (!nestedOrGetEmpty.has("charset")) {
            return CharsetGuess.of(CONFIG_MAPPER_FACTORY).guess(buffer);
        }
        if (!nestedOrGetEmpty.has("newline")) {
            return NewlineGuess.of(CONFIG_MAPPER_FACTORY).guess(configSource, buffer);
        }
        return guessLines(configSource, LineGuessHelper.of(CONFIG_MAPPER_FACTORY).toLines(configSource, buffer), Exec.getBufferAllocator());
    }

    ConfigDiff guessLines(ConfigSource configSource, List<String> list, BufferAllocator bufferAllocator) {
        List<GuesstimatedType> list2;
        boolean z;
        List<GuesstimatedType> list3;
        String guessNullString;
        String str;
        ConfigDiff newConfigDiff = newConfigDiff();
        if (!"csv".equals(configSource.getNestedOrGetEmpty("parser").get(String.class, "type", "csv"))) {
            return newConfigDiff;
        }
        ConfigSource nestedOrGetEmpty = configSource.getNestedOrGetEmpty("parser");
        String guessDelimiter = ("csv".equals(nestedOrGetEmpty.get(String.class, "type", "csv")) && nestedOrGetEmpty.has("delimiter")) ? (String) nestedOrGetEmpty.get(String.class, "delimiter") : guessDelimiter(list);
        ConfigDiff newConfigDiff2 = newConfigDiff();
        newConfigDiff2.merge(nestedOrGetEmpty);
        newConfigDiff2.set("type", "csv");
        newConfigDiff2.set("delimiter", guessDelimiter);
        if (!newConfigDiff2.has("quote")) {
            String guessQuote = guessQuote(list, guessDelimiter);
            if (guessQuote == null) {
                newConfigDiff2.setNested("quote", (DataSource) null);
            } else {
                newConfigDiff2.set("quote", guessQuote);
            }
        }
        if ("".equals(newConfigDiff2.get(String.class, "quote"))) {
            newConfigDiff2.set("quote", "\"");
        }
        if (!newConfigDiff2.has("escape") && (str = (String) newConfigDiff2.get(String.class, "quote")) != null) {
            String guessEscape = guessEscape(list, guessDelimiter, str);
            if (guessEscape == null) {
                newConfigDiff2.setNested("escape", (DataSource) null);
            } else {
                newConfigDiff2.set("escape", guessEscape);
            }
        }
        if (!newConfigDiff2.has("null_string") && (guessNullString = guessNullString(list, guessDelimiter)) != null) {
            newConfigDiff2.set("null_string", guessNullString);
        }
        List<List<String>> splitLines = splitLines(newConfigDiff2, false, list, guessDelimiter, null, bufferAllocator);
        int guessSkipHeaderLines = guessSkipHeaderLines(splitLines);
        List<String> subList = list.subList(guessSkipHeaderLines, list.size());
        splitLines.subList(guessSkipHeaderLines, splitLines.size());
        List<String> guessCommentLineMarker = newConfigDiff2.has("comment_line_marker") ? subList : guessCommentLineMarker(subList, guessDelimiter, (String) newConfigDiff2.get(String.class, "quote"), (String) newConfigDiff2.get(String.class, "null_string", (Object) null), newConfigDiff2);
        List<List<String>> splitLines2 = splitLines(newConfigDiff2, true, guessCommentLineMarker, guessDelimiter, null, bufferAllocator);
        if (splitLines2 == null || splitLines2.isEmpty()) {
            return newConfigDiff;
        }
        if (guessCommentLineMarker.size() == 1) {
            z = false;
            if (newConfigDiff2.has("trim_if_not_quoted")) {
                list3 = typesFromListRecords(splitLines2.subList(0, 1));
            } else {
                List<GuesstimatedType> typesFromListRecords = typesFromListRecords(splitLines(newConfigDiff2, true, guessCommentLineMarker, guessDelimiter, true, bufferAllocator));
                List<GuesstimatedType> typesFromListRecords2 = typesFromListRecords(splitLines2.subList(0, 1));
                if (typesFromListRecords2.equals(typesFromListRecords)) {
                    newConfigDiff2.set("trim_if_not_quoted", false);
                    list3 = typesFromListRecords2;
                } else {
                    newConfigDiff2.set("trim_if_not_quoted", true);
                    list3 = typesFromListRecords;
                }
            }
        } else {
            List<GuesstimatedType> typesFromListRecords3 = typesFromListRecords(splitLines2.subList(0, 1));
            List<GuesstimatedType> typesFromListRecords4 = typesFromListRecords(splitLines2.subList(1, splitLines2.size()));
            logger.debug("Types of the first line : {}", typesFromListRecords3);
            logger.debug("Types of the other lines (untrimmed): {}", typesFromListRecords4);
            if (newConfigDiff2.has("trim_if_not_quoted")) {
                list2 = typesFromListRecords4;
            } else {
                List<List<String>> splitLines3 = splitLines(newConfigDiff2, true, guessCommentLineMarker, guessDelimiter, true, bufferAllocator);
                List<GuesstimatedType> typesFromListRecords5 = typesFromListRecords(splitLines3.subList(1, splitLines3.size()));
                if (typesFromListRecords4.equals(typesFromListRecords5)) {
                    newConfigDiff2.set("trim_if_not_quoted", false);
                    list2 = typesFromListRecords4;
                } else {
                    newConfigDiff2.set("trim_if_not_quoted", true);
                    list2 = typesFromListRecords5;
                }
            }
            logger.debug("Types of the other lines: {}", list2);
            z = (!typesFromListRecords3.equals(list2) && typesFromListRecords3.stream().allMatch(guesstimatedType -> {
                return GuesstimatedType.STRING.equals(guesstimatedType) || GuesstimatedType.BOOLEAN.equals(guesstimatedType);
            })) || guessStringHeaderLine(splitLines2);
            logger.debug("headerLine: {}", Boolean.valueOf(z));
            list3 = list2;
        }
        if (list3.isEmpty()) {
            return newConfigDiff;
        }
        if (z) {
            newConfigDiff2.set("skip_header_lines", Integer.valueOf(guessSkipHeaderLines + 1));
        } else {
            newConfigDiff2.set("skip_header_lines", Integer.valueOf(guessSkipHeaderLines));
        }
        if (!newConfigDiff2.has("allow_extra_columns")) {
            newConfigDiff2.set("allow_extra_columns", false);
        }
        if (!newConfigDiff2.has("allow_optional_columns")) {
            newConfigDiff2.set("allow_optional_columns", false);
        }
        List list4 = z ? (List) splitLines2.get(0).stream().map((v0) -> {
            return v0.trim();
        }).collect(Collectors.toList()) : (List) IntStream.range(0, list3.size()).mapToObj(i -> {
            return "c" + i;
        }).collect(Collectors.toList());
        List list5 = list4;
        List<GuesstimatedType> list6 = list3;
        newConfigDiff2.set("columns", (List) IntStream.range(0, Math.min(list4.size(), list3.size())).mapToObj(i2 -> {
            return newColumn((String) list5.get(i2), (GuesstimatedType) list6.get(i2));
        }).collect(Collectors.toList()));
        newConfigDiff.setNested("parser", newConfigDiff2);
        return newConfigDiff;
    }

    protected ConfigDiff newColumn(String str, GuesstimatedType guesstimatedType) {
        ConfigDiff newConfigDiff = newConfigDiff();
        newConfigDiff.set("name", str);
        newConfigDiff.set("type", guesstimatedType.toString());
        if (guesstimatedType.isTimestamp()) {
            newConfigDiff.set("format", guesstimatedType.getFormatOrTimeValue());
        }
        return newConfigDiff;
    }

    protected static ConfigDiff newConfigDiff() {
        return CONFIG_MAPPER_FACTORY.newConfigDiff();
    }

    private static List<List<String>> splitLines(ConfigDiff configDiff, boolean z, List<String> list, String str, Boolean bool, BufferAllocator bufferAllocator) {
        try {
            String str2 = (String) configDiff.get(String.class, "null_string", (Object) null);
            ConfigSource newConfigSource = CONFIG_MAPPER_FACTORY.newConfigSource();
            newConfigSource.merge(configDiff);
            if (bool != null) {
                newConfigSource.set("trim_if_not_quoted", Boolean.valueOf(bool.booleanValue()));
            }
            newConfigSource.set("charset", "UTF-8");
            newConfigSource.set("columns", new ArrayList());
            CsvParserPlugin.PluginTask map = CONFIG_MAPPER_FACTORY.createConfigMapper().map(newConfigSource, CsvParserPlugin.PluginTask.class);
            byte[] joinBytes = joinBytes(list, map.getNewline());
            Buffer allocate = bufferAllocator.allocate(joinBytes.length);
            allocate.setBytes(0, joinBytes, 0, joinBytes.length);
            allocate.limit(joinBytes.length);
            CsvTokenizer buildCsvTokenizer = buildCsvTokenizer(map, allocate);
            ArrayList arrayList = new ArrayList();
            while (buildCsvTokenizer.nextRecord(z)) {
                try {
                    ArrayList arrayList2 = new ArrayList();
                    while (true) {
                        try {
                            String nextColumn = buildCsvTokenizer.nextColumn();
                            boolean wasQuotedColumn = buildCsvTokenizer.wasQuotedColumn();
                            if (str2 == null || wasQuotedColumn || !str2.equals(nextColumn)) {
                                arrayList2.add(nextColumn);
                            } else {
                                arrayList2.add(null);
                            }
                        } catch (RecordDoesNotHaveExpectedColumnException e) {
                            arrayList.add(Collections.unmodifiableList(arrayList2));
                        }
                    }
                } catch (InvalidCsvQuotationException e2) {
                    buildCsvTokenizer.skipCurrentLine();
                }
            }
            return Collections.unmodifiableList(arrayList);
        } catch (RuntimeException e3) {
            ArrayList arrayList3 = new ArrayList();
            Iterator<String> it = list.iterator();
            while (it.hasNext()) {
                arrayList3.add(Collections.unmodifiableList(Arrays.asList(it.next().split(Pattern.quote(str)))));
            }
            return Collections.unmodifiableList(arrayList3);
        }
    }

    private static CsvTokenizer buildCsvTokenizer(CsvParserPlugin.PluginTask pluginTask, Buffer buffer) {
        CsvTokenizer.Builder builder = CsvTokenizer.builder(pluginTask.getDelimiter());
        pluginTask.getQuoteChar().ifPresent(quoteCharacter -> {
            builder.setQuote(quoteCharacter.getCharacter());
        });
        pluginTask.getEscapeChar().ifPresent(escapeCharacter -> {
            builder.setEscape(escapeCharacter.getCharacter());
        });
        builder.setNewline(pluginTask.getNewline().getString());
        if (pluginTask.getTrimIfNotQuoted()) {
            builder.enableTrimIfNotQuoted();
        }
        if (pluginTask.getQuotesInQuotedFields() == CsvParserPlugin.QuotesInQuotedFields.ACCEPT_STRAY_QUOTES_ASSUMING_NO_DELIMITERS_IN_FIELDS) {
            builder.acceptStrayQuotesAssumingNoDelimitersInFields();
        }
        builder.setMaxQuotedFieldLength(pluginTask.getMaxQuotedSizeLimit());
        pluginTask.getCommentLineMarker().ifPresent(str -> {
            builder.setCommentLineMarker(str);
        });
        pluginTask.getNullString().ifPresent(str2 -> {
            builder.setNullString(str2);
        });
        ArrayList arrayList = new ArrayList();
        arrayList.add(buffer);
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add(arrayList);
        LineDecoder of = LineDecoder.of(new ListFileInput(arrayList2), pluginTask.getCharset(), (LineDelimiter) pluginTask.getLineDelimiterRecognized().orElse(null));
        of.nextFile();
        return builder.build(of.iterator());
    }

    private String guessDelimiter(List<String> list) {
        String str = null;
        double d = 0.0d;
        Iterator<Character> it = DELIMITER_CANDIDATES.iterator();
        while (it.hasNext()) {
            char charValue = it.next().charValue();
            List list2 = (List) StreamSupport.stream(list.spliterator(), false).map(str2 -> {
                return Integer.valueOf((int) str2.chars().filter(i -> {
                    return i == charValue;
                }).count());
            }).collect(Collectors.toList());
            int sumOfList = sumOfList(list2);
            if (sumOfList > 0) {
                double standardDeviationOfList = sumOfList / standardDeviationOfList(list2);
                if (standardDeviationOfList > d) {
                    str = "" + charValue;
                    d = standardDeviationOfList;
                }
            }
        }
        return (str == null || d <= 1.0d) ? "" + DELIMITER_CANDIDATES.get(0) : str;
    }

    private static String guessQuote(List<String> list, String str) {
        String str2 = null;
        double d = 0.0d;
        String quote = Pattern.quote(str);
        Iterator<Character> it = QUOTE_CANDIDATES.iterator();
        while (it.hasNext()) {
            char charValue = it.next().charValue();
            String quote2 = Pattern.quote("" + charValue);
            ArrayList arrayList = new ArrayList();
            for (String str3 : list) {
                long count = str3.chars().filter(i -> {
                    return i == charValue;
                }).count();
                if (count > 0) {
                    arrayList.add(Integer.valueOf(((int) count) + weighQuote(str3, quote, quote2)));
                }
            }
            double averageOfList = arrayList.isEmpty() ? 0.0d : averageOfList(arrayList);
            if (averageOfList > d) {
                str2 = "" + charValue;
                d = averageOfList;
            }
        }
        if (d >= 10.0d) {
            return str2;
        }
        if (guessForceNoQuote(list, str, "\"")) {
            return null;
        }
        return "\"";
    }

    private static boolean guessForceNoQuote(List<String> list, String str, String str2) {
        String quote = Pattern.quote(str);
        String quote2 = Pattern.quote(str2);
        Pattern compile = Pattern.compile(String.format("(?:\\A|%s)\\s*[^%s]+%s", quote, quote2, quote2));
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            if (compile.matcher(it.next()).find()) {
                return true;
            }
        }
        return false;
    }

    private static String guessEscape(List<String> list, String str, String str2) {
        int i = 0;
        String str3 = null;
        for (String str4 : ESCAPE_CANDIDATES) {
            Pattern compile = Pattern.compile(String.format("%s(?:%s|%s)", Pattern.quote(str4), Pattern.quote(str), Pattern.quote(str2)));
            int sum = StreamSupport.stream(list.spliterator(), false).mapToInt(str5 -> {
                return countPattern(str5, compile);
            }).sum();
            if (sum > i) {
                str3 = str4;
                i = sum;
            }
        }
        if (str3 != null) {
            return str3;
        }
        if ("\"".equals(str2)) {
            return "\"";
        }
        return null;
    }

    private static String guessNullString(List<String> list, String str) {
        int i = 0;
        String str2 = null;
        for (String str3 : NULL_STRING_CANDIDATES) {
            Pattern compile = Pattern.compile(String.format("(?:^|%s)%s(?:$|%s)", Pattern.quote(str), Pattern.quote(str3), Pattern.quote(str)));
            int sum = StreamSupport.stream(list.spliterator(), false).mapToInt(str4 -> {
                return countPattern(str4, compile);
            }).sum();
            if (sum > i) {
                str2 = str3;
                i = sum;
            }
        }
        return str2;
    }

    private static int guessSkipHeaderLines(List<List<String>> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<List<String>> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(Integer.valueOf(it.next().size()));
        }
        for (int i = 1; i <= Math.min(10, arrayList.size() - 1); i++) {
            int intValue = ((Integer) arrayList.get(i - 1)).intValue();
            if (arrayList.subList(i, Math.min(arrayList.size(), 10)).stream().allMatch(num -> {
                return num.intValue() <= intValue;
            })) {
                return i - 1;
            }
        }
        return 0;
    }

    private static List<String> guessCommentLineMarker(List<String> list, String str, String str2, String str3, ConfigDiff configDiff) {
        ArrayList arrayList = new ArrayList();
        if (str2 != null && !str2.isEmpty()) {
            arrayList.add(Pattern.compile("^" + Pattern.quote(str2)));
        }
        if (str3 != null) {
            arrayList.add(Pattern.compile(String.format("^%s(?:%s|$)", Pattern.quote(str3), Pattern.quote(str))));
        }
        String str4 = null;
        List<String> list2 = null;
        int i = 0;
        for (String str5 : COMMENT_LINE_MARKER_CANDIDATES) {
            Pattern compile = Pattern.compile("^" + Pattern.quote(str5));
            List<String> list3 = (List) list.stream().filter(str6 -> {
                Iterator it = arrayList.iterator();
                while (it.hasNext()) {
                    if (((Pattern) it.next()).matcher(str6).find()) {
                        return true;
                    }
                }
                return !compile.matcher(str6).find();
            }).collect(Collectors.toList());
            int size = list.size() - list3.size();
            if (size > i) {
                str4 = str5;
                list2 = list3;
                i = size;
            }
        }
        if (str4 == null) {
            return list;
        }
        configDiff.set("comment_line_marker", str4);
        return list2;
    }

    private static boolean guessStringHeaderLine(List<List<String>> list) {
        List<String> list2 = list.get(0);
        for (int i = 0; i < list2.size(); i++) {
            int i2 = i;
            List list3 = (List) StreamSupport.stream(list.spliterator(), false).map(list4 -> {
                return (String) list4.get(i2);
            }).filter(str -> {
                return str != null;
            }).map(str2 -> {
                return Integer.valueOf(str2.length());
            }).collect(Collectors.toList());
            if (list3.size() > 1) {
                List subList = list3.subList(1, list3.size());
                if (varianceOfList(subList) <= 0.2d) {
                    double averageOfList = averageOfList(subList);
                    if (averageOfList == 0.0d) {
                        if (((Integer) list3.get(0)).intValue() > 1) {
                            return true;
                        }
                    } else if (Math.abs(averageOfList - ((Integer) list3.get(0)).intValue()) / averageOfList > 0.7d) {
                        return true;
                    }
                } else {
                    continue;
                }
            }
        }
        return false;
    }

    private static int sumOfList(List<Integer> list) {
        return StreamSupport.stream(list.spliterator(), false).mapToInt(num -> {
            return num.intValue();
        }).sum();
    }

    private static double averageOfList(List<Integer> list) {
        return StreamSupport.stream(list.spliterator(), false).mapToInt(num -> {
            return num.intValue();
        }).average().orElse(0.0d);
    }

    private static double varianceOfList(List<Integer> list) {
        double averageOfList = averageOfList(list);
        return StreamSupport.stream(list.spliterator(), false).mapToDouble(num -> {
            return (num.intValue() - averageOfList) * (num.intValue() - averageOfList);
        }).average().orElse(0.0d);
    }

    private static double standardDeviationOfList(List<Integer> list) {
        double sqrt = Math.sqrt(varianceOfList(list));
        if (sqrt < 1.0E-11d) {
            return 1.0E-9d;
        }
        return sqrt;
    }

    private static int weighQuote(String str, String str2, String str3) {
        return (countPattern(str, Pattern.compile(String.format("(?:\\A|%s)\\s*%s(?:(?!%s).)*\\s*%s(?:$|%s)", str2, str3, str3, str3, str2))) * 20) + (countPattern(str, Pattern.compile(String.format("(?:\\A|%s)\\s*%s(?:(?!%s).)*\\s*%s(?:$|%s)", str2, str3, str2, str3, str2))) * 40);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static int countPattern(String str, Pattern pattern) {
        int i = 0;
        while (pattern.matcher(str).find()) {
            i++;
        }
        return i;
    }

    private static byte[] joinBytes(List<String> list, Newline newline) {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        boolean z = true;
        for (String str : list) {
            if (z) {
                z = false;
            } else {
                byte[] bytes = newline.getString().getBytes(StandardCharsets.UTF_8);
                byteArrayOutputStream.write(bytes, 0, bytes.length);
            }
            byte[] bytes2 = str.getBytes(StandardCharsets.UTF_8);
            byteArrayOutputStream.write(bytes2, 0, bytes2.length);
        }
        return byteArrayOutputStream.toByteArray();
    }

    private static List<GuesstimatedType> typesFromListRecords(List<List<String>> list) {
        return SCHEMA_GUESS.typesFromListRecords(list);
    }
}
