package org.culturegraph.mf.mediawiki.analyzers;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.culturegraph.mf.framework.StreamReceiver;
import org.culturegraph.mf.framework.annotations.Description;
import org.culturegraph.mf.framework.annotations.In;
import org.culturegraph.mf.framework.annotations.Out;
import org.culturegraph.mf.framework.helpers.DefaultObjectPipe;
import org.culturegraph.mf.mediawiki.WikiTextParser;
import org.culturegraph.mf.mediawiki.objects.WikiPage;

@Description("Authority link extraction for wikitext (GND, LOC, IMDB, VIAF).")
@In(WikiPage.class)
@Out(StreamReceiver.class)
/* loaded from: input_file:org/culturegraph/mf/mediawiki/analyzers/AuthorityLinkExtractor.class */
public final class AuthorityLinkExtractor extends DefaultObjectPipe<WikiPage, StreamReceiver> implements Analyzer {
    private static final Pattern IMDB_PATTERN = Pattern.compile("\\{\\{\\s*imdb\\s*name\\s*\\|(?:id=)?([^|}]*)[^}]*\\}\\}", 10);
    private static final Pattern AUTHORITY_PATTERN = Pattern.compile("\\{\\{\\s*normdaten\\s*\\|([^}]*)\\}\\}", 10);
    private static final Pattern ENTRY_PATTERN = Pattern.compile("|", 16);
    private static final Pattern KEYVALUE_PATTERN = Pattern.compile("(.*)\\s*=\\s*(.*)");

    @Override // org.culturegraph.mf.mediawiki.analyzers.Analyzer
    public boolean wikiTextOnly() {
        return true;
    }

    @Override // org.culturegraph.mf.mediawiki.analyzers.Analyzer
    public WikiTextParser.ParseLevel requiredParseLevel() {
        return null;
    }

    public void process(WikiPage wikiPage) {
        String wikiText = wikiPage.getWikiText();
        Matcher matcher = AUTHORITY_PATTERN.matcher(wikiText);
        if (matcher.find()) {
            for (String str : ENTRY_PATTERN.split(matcher.group(1))) {
                Matcher matcher2 = KEYVALUE_PATTERN.matcher(str.trim());
                if (matcher2.find()) {
                    getReceiver().literal(matcher2.group(1).toLowerCase(), matcher2.group(2));
                }
            }
        }
        Matcher matcher3 = IMDB_PATTERN.matcher(wikiText);
        if (matcher3.find()) {
            getReceiver().literal("imdb", matcher3.group(1));
        }
    }
}
