package io.projectglow.vcf;

import htsjdk.variant.vcf.VCFCompoundHeaderLine;
import htsjdk.variant.vcf.VCFFormatHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import io.projectglow.common.GenotypeFields$;
import io.projectglow.common.VariantSchemas$;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.ArrayType$;
import org.apache.spark.sql.types.AtomicType;
import org.apache.spark.sql.types.BooleanType$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DoubleType$;
import org.apache.spark.sql.types.IntegerType$;
import org.apache.spark.sql.types.MapType$;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.MetadataBuilder;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.StructType$;
import scala.Array$;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Some;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.IterableLike;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Iterable$;
import scala.collection.immutable.Map;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;

/* compiled from: VCFSchemaInferrer.scala */
/* loaded from: input_file:io/projectglow/vcf/VCFSchemaInferrer$.class */
public final class VCFSchemaInferrer$ {
    public static VCFSchemaInferrer$ MODULE$;
    private final Map<String, Function1<String, Seq<DataType>>> particularSchemas;
    private final String VCF_HEADER_COUNT_KEY;
    private final String VCF_HEADER_DESCRIPTION_KEY;

    static {
        new VCFSchemaInferrer$();
    }

    public StructField getInfoFieldStruct(VCFInfoHeaderLine vCFInfoHeaderLine) {
        return new StructField(new StringBuilder(0).append(VariantSchemas$.MODULE$.infoFieldPrefix()).append(vCFInfoHeaderLine.getID()).toString(), (DataType) typesForHeader(vCFInfoHeaderLine).head(), StructField$.MODULE$.apply$default$3(), metadataForLine(vCFInfoHeaderLine));
    }

    public StructType inferSchema(boolean z, boolean z2, Seq<VCFInfoHeaderLine> seq, Seq<VCFFormatHeaderLine> seq2) {
        return (z2 ? (StructType) validateHeaders(seq).foldLeft(VariantSchemas$.MODULE$.vcfBaseSchema(), (structType, vCFInfoHeaderLine) -> {
            Tuple2 tuple2 = new Tuple2(structType, vCFInfoHeaderLine);
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            return ((StructType) tuple2._1()).add(MODULE$.getInfoFieldStruct((VCFInfoHeaderLine) tuple2._2()));
        }) : VariantSchemas$.MODULE$.vcfBaseSchema().add(new StructField("attributes", MapType$.MODULE$.apply(StringType$.MODULE$, StringType$.MODULE$), StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4()))).add(new StructField("genotypes", ArrayType$.MODULE$.apply(inferGenotypeSchema(z, seq2)), StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4()));
    }

    public StructType inferGenotypeSchema(boolean z, Seq<VCFFormatHeaderLine> seq) {
        ObjectRef create = ObjectRef.create(StructType$.MODULE$.apply(Seq$.MODULE$.empty()));
        if (z) {
            create.elem = ((StructType) create.elem).add(VariantSchemas$.MODULE$.sampleIdField());
        }
        validateHeaders(seq).foreach(vCFFormatHeaderLine -> {
            $anonfun$inferGenotypeSchema$1(create, vCFFormatHeaderLine);
            return BoxedUnit.UNIT;
        });
        return (StructType) create.elem;
    }

    public StructType inferSchema(boolean z, boolean z2, VCFHeader vCFHeader) {
        return inferSchema(z, z2, ((TraversableOnce) JavaConverters$.MODULE$.collectionAsScalaIterableConverter(vCFHeader.getInfoHeaderLines()).asScala()).toSeq(), ((TraversableOnce) JavaConverters$.MODULE$.collectionAsScalaIterableConverter(vCFHeader.getFormatHeaderLines()).asScala()).toSeq());
    }

    public Seq<VCFHeaderLine> headerLinesFromSchema(StructType structType) {
        return (Seq) ((Seq) ((TraversableLike) structType.filter(structField -> {
            return BoxesRunTime.boxToBoolean($anonfun$headerLinesFromSchema$1(structField));
        })).map(structField2 -> {
            String stripPrefix = new StringOps(Predef$.MODULE$.augmentString(structField2.name())).stripPrefix(VariantSchemas$.MODULE$.infoFieldPrefix());
            VCFHeaderLineType vcfDataType = MODULE$.vcfDataType(structField2.dataType());
            Tuple2<VCFHeaderLineCount, Option<Object>> vcfHeaderLineCount = MODULE$.vcfHeaderLineCount(structField2);
            if (vcfHeaderLineCount == null) {
                throw new MatchError(vcfHeaderLineCount);
            }
            Tuple2 tuple2 = new Tuple2((VCFHeaderLineCount) vcfHeaderLineCount._1(), (Option) vcfHeaderLineCount._2());
            return MODULE$.makeHeaderLine(stripPrefix, vcfDataType, (VCFHeaderLineCount) tuple2._1(), (Option) tuple2._2(), MODULE$.vcfHeaderLineDescription(structField2), false);
        }, Seq$.MODULE$.canBuildFrom())).$plus$plus(structType.exists(structField3 -> {
            return BoxesRunTime.boxToBoolean($anonfun$headerLinesFromSchema$3(structField3));
        }) ? (Seq) ((SeqLike) ((StructField) structType.find(structField4 -> {
            return BoxesRunTime.boxToBoolean($anonfun$headerLinesFromSchema$4(structField4));
        }).get()).dataType().elementType().flatMap(structField5 -> {
            Iterable option2Iterable;
            if (Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new String[]{VariantSchemas$.MODULE$.otherFieldsField().name(), VariantSchemas$.MODULE$.sampleIdField().name()})).contains(structField5.name())) {
                option2Iterable = Option$.MODULE$.option2Iterable(None$.MODULE$);
            } else if (MODULE$.getSpecialHeaderLine(structField5.name()).isDefined()) {
                option2Iterable = Option$.MODULE$.option2Iterable(MODULE$.getSpecialHeaderLine(structField5.name()));
            } else {
                String str = (String) GenotypeFields$.MODULE$.reverseAliases().getOrElse(structField5.name(), () -> {
                    return structField5.name();
                });
                VCFHeaderLineType vcfDataType = MODULE$.vcfDataType(structField5.dataType());
                Tuple2<VCFHeaderLineCount, Option<Object>> vcfHeaderLineCount = MODULE$.vcfHeaderLineCount(structField5);
                if (vcfHeaderLineCount == null) {
                    throw new MatchError(vcfHeaderLineCount);
                }
                Tuple2 tuple2 = new Tuple2((VCFHeaderLineCount) vcfHeaderLineCount._1(), (Option) vcfHeaderLineCount._2());
                option2Iterable = Option$.MODULE$.option2Iterable(new Some(MODULE$.makeHeaderLine(str, vcfDataType, (VCFHeaderLineCount) tuple2._1(), (Option) tuple2._2(), MODULE$.vcfHeaderLineDescription(structField5), true)));
            }
            return option2Iterable;
        }, Seq$.MODULE$.canBuildFrom())).distinct() : Seq$.MODULE$.empty(), Seq$.MODULE$.canBuildFrom());
    }

    public Seq<DataType> typesForHeader(VCFCompoundHeaderLine vCFCompoundHeaderLine) {
        StringType$ stringType$;
        if (particularSchemas().contains(vCFCompoundHeaderLine.getID())) {
            return (Seq) ((Function1) particularSchemas().apply(vCFCompoundHeaderLine.getID())).apply(vCFCompoundHeaderLine.getDescription());
        }
        VCFHeaderLineType type = vCFCompoundHeaderLine.getType();
        if (VCFHeaderLineType.Character.equals(type)) {
            stringType$ = StringType$.MODULE$;
        } else if (VCFHeaderLineType.String.equals(type)) {
            stringType$ = StringType$.MODULE$;
        } else if (VCFHeaderLineType.Float.equals(type)) {
            stringType$ = DoubleType$.MODULE$;
        } else if (VCFHeaderLineType.Integer.equals(type)) {
            stringType$ = IntegerType$.MODULE$;
        } else {
            if (!VCFHeaderLineType.Flag.equals(type)) {
                throw new MatchError(type);
            }
            stringType$ = BooleanType$.MODULE$;
        }
        StringType$ stringType$2 = stringType$;
        return (!vCFCompoundHeaderLine.isFixedCount() || vCFCompoundHeaderLine.getCount() > 1) ? Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new ArrayType[]{ArrayType$.MODULE$.apply(stringType$2)})) : Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new AtomicType[]{stringType$2}));
    }

    private Option<VCFHeaderLine> getSpecialHeaderLine(String str) {
        return Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new String[]{VariantSchemas$.MODULE$.callsField().name(), VariantSchemas$.MODULE$.phasedField().name()})).contains(str) ? new Some(VCFRowHeaderLines$.MODULE$.genotype()) : None$.MODULE$;
    }

    private VCFHeaderLineType vcfDataType(DataType dataType) {
        VCFHeaderLineType vCFHeaderLineType;
        while (true) {
            boolean z = false;
            ArrayType arrayType = null;
            DataType dataType2 = dataType;
            if (StringType$.MODULE$.equals(dataType2)) {
                vCFHeaderLineType = VCFHeaderLineType.String;
                break;
            }
            if (DoubleType$.MODULE$.equals(dataType2)) {
                vCFHeaderLineType = VCFHeaderLineType.Float;
                break;
            }
            if (IntegerType$.MODULE$.equals(dataType2)) {
                vCFHeaderLineType = VCFHeaderLineType.Integer;
                break;
            }
            if (BooleanType$.MODULE$.equals(dataType2)) {
                vCFHeaderLineType = VCFHeaderLineType.Flag;
                break;
            }
            if (dataType2 instanceof ArrayType) {
                z = true;
                arrayType = (ArrayType) dataType2;
                if (arrayType.elementType() instanceof StructType) {
                    vCFHeaderLineType = VCFHeaderLineType.String;
                    break;
                }
            }
            if (!z) {
                throw new MatchError(dataType2);
            }
            dataType = arrayType.elementType();
        }
        return vCFHeaderLineType;
    }

    private Tuple2<VCFHeaderLineCount, Option<Object>> vcfHeaderLineCount(StructField structField) {
        if (structField.metadata().contains(VCF_HEADER_COUNT_KEY())) {
            String string = structField.metadata().getString(VCF_HEADER_COUNT_KEY());
            return NumberUtils.isDigits(string) ? new Tuple2<>(VCFHeaderLineCount.INTEGER, new Some(BoxesRunTime.boxToInteger(new StringOps(Predef$.MODULE$.augmentString(string)).toInt()))) : new Tuple2<>(VCFHeaderLineCount.valueOf(structField.metadata().getString(VCF_HEADER_COUNT_KEY())), None$.MODULE$);
        }
        if (structField.dataType() instanceof ArrayType) {
            return new Tuple2<>(VCFHeaderLineCount.UNBOUNDED, None$.MODULE$);
        }
        DataType dataType = structField.dataType();
        BooleanType$ booleanType$ = BooleanType$.MODULE$;
        return (dataType != null ? !dataType.equals(booleanType$) : booleanType$ != null) ? new Tuple2<>(VCFHeaderLineCount.INTEGER, new Some(BoxesRunTime.boxToInteger(1))) : new Tuple2<>(VCFHeaderLineCount.INTEGER, new Some(BoxesRunTime.boxToInteger(0)));
    }

    private String vcfHeaderLineDescription(StructField structField) {
        return structField.metadata().contains(VCF_HEADER_DESCRIPTION_KEY()) ? structField.metadata().getString(VCF_HEADER_DESCRIPTION_KEY()) : "";
    }

    private Metadata metadataForLine(VCFCompoundHeaderLine vCFCompoundHeaderLine) {
        VCFHeaderLineCount countType = vCFCompoundHeaderLine.getCountType();
        VCFHeaderLineCount vCFHeaderLineCount = VCFHeaderLineCount.INTEGER;
        return new MetadataBuilder().putString(VCF_HEADER_COUNT_KEY(), (countType != null ? !countType.equals(vCFHeaderLineCount) : vCFHeaderLineCount != null) ? vCFCompoundHeaderLine.getCountType().name() : BoxesRunTime.boxToInteger(vCFCompoundHeaderLine.getCount()).toString()).putString(VCF_HEADER_DESCRIPTION_KEY(), vCFCompoundHeaderLine.getDescription()).build();
    }

    private VCFCompoundHeaderLine makeHeaderLine(String str, VCFHeaderLineType vCFHeaderLineType, VCFHeaderLineCount vCFHeaderLineCount, Option<Object> option, String str2, boolean z) {
        VCFFormatHeaderLine vCFInfoHeaderLine;
        Tuple2 tuple2 = new Tuple2(vCFHeaderLineCount, BoxesRunTime.boxToBoolean(z));
        if (tuple2 != null) {
            VCFHeaderLineCount vCFHeaderLineCount2 = (VCFHeaderLineCount) tuple2._1();
            boolean _2$mcZ$sp = tuple2._2$mcZ$sp();
            if (VCFHeaderLineCount.INTEGER.equals(vCFHeaderLineCount2) && true == _2$mcZ$sp) {
                vCFInfoHeaderLine = new VCFFormatHeaderLine(str, BoxesRunTime.unboxToInt(option.get()), vCFHeaderLineType, str2);
                return vCFInfoHeaderLine;
            }
        }
        if (tuple2 != null) {
            VCFHeaderLineCount vCFHeaderLineCount3 = (VCFHeaderLineCount) tuple2._1();
            boolean _2$mcZ$sp2 = tuple2._2$mcZ$sp();
            if (VCFHeaderLineCount.INTEGER.equals(vCFHeaderLineCount3) && false == _2$mcZ$sp2) {
                vCFInfoHeaderLine = new VCFInfoHeaderLine(str, BoxesRunTime.unboxToInt(option.get()), vCFHeaderLineType, str2);
                return vCFInfoHeaderLine;
            }
        }
        if (tuple2 != null) {
            VCFHeaderLineCount vCFHeaderLineCount4 = (VCFHeaderLineCount) tuple2._1();
            if (true == tuple2._2$mcZ$sp()) {
                vCFInfoHeaderLine = new VCFFormatHeaderLine(str, vCFHeaderLineCount4, vCFHeaderLineType, str2);
                return vCFInfoHeaderLine;
            }
        }
        if (tuple2 != null) {
            VCFHeaderLineCount vCFHeaderLineCount5 = (VCFHeaderLineCount) tuple2._1();
            if (false == tuple2._2$mcZ$sp()) {
                vCFInfoHeaderLine = new VCFInfoHeaderLine(str, vCFHeaderLineCount5, vCFHeaderLineType, str2);
                return vCFInfoHeaderLine;
            }
        }
        throw new MatchError(tuple2);
    }

    private <A extends VCFCompoundHeaderLine> Seq<A> validateHeaders(Seq<A> seq) {
        return ((TraversableOnce) seq.groupBy(vCFCompoundHeaderLine -> {
            return vCFCompoundHeaderLine.getID();
        }).map(tuple2 -> {
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            String str = (String) tuple2._1();
            Seq seq2 = (Seq) tuple2._2();
            if (((IterableLike) seq2.tail()).forall(vCFCompoundHeaderLine2 -> {
                return BoxesRunTime.boxToBoolean($anonfun$validateHeaders$3(seq2, vCFCompoundHeaderLine2));
            })) {
                return (VCFCompoundHeaderLine) seq2.head();
            }
            throw new IllegalArgumentException(new StringBuilder(46).append("VCF headers with id ").append(str).append(" have incompatible schemas").toString());
        }, Iterable$.MODULE$.canBuildFrom())).toSeq();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public Seq<DataType> getGtSchema(String str) {
        return Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new DataType[]{VariantSchemas$.MODULE$.phasedField().dataType(), VariantSchemas$.MODULE$.callsField().dataType()}));
    }

    /* JADX INFO: Access modifiers changed from: private */
    public Seq<DataType> getAnnotationSchema(String str) {
        return Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new ArrayType[]{ArrayType$.MODULE$.apply(new StructType((StructField[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(((String) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(str.split(":"))).last()).replace("'", "").split(AnnotationUtils$.MODULE$.annotationDelimiterRegex()))).map(str2 -> {
            String replace = str2.replace(" ", "").replace(".", "_");
            return new StructField(replace, (DataType) AnnotationUtils$.MODULE$.allFieldsToSchema().apply(replace), StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4());
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(StructField.class)))))}));
    }

    private Map<String, Function1<String, Seq<DataType>>> particularSchemas() {
        return this.particularSchemas;
    }

    public String VCF_HEADER_COUNT_KEY() {
        return this.VCF_HEADER_COUNT_KEY;
    }

    public String VCF_HEADER_DESCRIPTION_KEY() {
        return this.VCF_HEADER_DESCRIPTION_KEY;
    }

    public static final /* synthetic */ void $anonfun$inferGenotypeSchema$4(ObjectRef objectRef, Metadata metadata, Tuple2 tuple2) {
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        objectRef.elem = ((StructType) objectRef.elem).add(new StructField((String) tuple2._1(), (DataType) tuple2._2(), StructField$.MODULE$.apply$default$3(), metadata));
        BoxedUnit boxedUnit = BoxedUnit.UNIT;
    }

    public static final /* synthetic */ void $anonfun$inferGenotypeSchema$1(ObjectRef objectRef, VCFFormatHeaderLine vCFFormatHeaderLine) {
        Seq seq = (Seq) GenotypeFields$.MODULE$.aliases().getOrElse(vCFFormatHeaderLine.getID(), () -> {
            return Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{vCFFormatHeaderLine.getID()}));
        });
        Seq<DataType> typesForHeader = MODULE$.typesForHeader(vCFFormatHeaderLine);
        Predef$.MODULE$.require(seq.size() == typesForHeader.size(), () -> {
            return "Must have same number of header line struct names and types";
        });
        Metadata metadataForLine = MODULE$.metadataForLine(vCFFormatHeaderLine);
        ((IterableLike) seq.zip(typesForHeader, Seq$.MODULE$.canBuildFrom())).foreach(tuple2 -> {
            $anonfun$inferGenotypeSchema$4(objectRef, metadataForLine, tuple2);
            return BoxedUnit.UNIT;
        });
    }

    public static final /* synthetic */ boolean $anonfun$headerLinesFromSchema$1(StructField structField) {
        return structField.name().startsWith(VariantSchemas$.MODULE$.infoFieldPrefix());
    }

    public static final /* synthetic */ boolean $anonfun$headerLinesFromSchema$3(StructField structField) {
        String name = structField.name();
        String genotypesFieldName = VariantSchemas$.MODULE$.genotypesFieldName();
        return name != null ? name.equals(genotypesFieldName) : genotypesFieldName == null;
    }

    public static final /* synthetic */ boolean $anonfun$headerLinesFromSchema$4(StructField structField) {
        String name = structField.name();
        String genotypesFieldName = VariantSchemas$.MODULE$.genotypesFieldName();
        return name != null ? name.equals(genotypesFieldName) : genotypesFieldName == null;
    }

    public static final /* synthetic */ boolean $anonfun$validateHeaders$3(Seq seq, VCFCompoundHeaderLine vCFCompoundHeaderLine) {
        return vCFCompoundHeaderLine.equalsExcludingDescription((VCFCompoundHeaderLine) seq.head());
    }

    private VCFSchemaInferrer$() {
        MODULE$ = this;
        this.particularSchemas = Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("GT"), str -> {
            return MODULE$.getGtSchema(str);
        }), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("CSQ"), str2 -> {
            return MODULE$.getAnnotationSchema(str2);
        }), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc("ANN"), str3 -> {
            return MODULE$.getAnnotationSchema(str3);
        })}));
        this.VCF_HEADER_COUNT_KEY = "vcf_header_count";
        this.VCF_HEADER_DESCRIPTION_KEY = "vcf_header_description";
    }
}
