package nlp4j.wiki.converter;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Date;
import nlp4j.util.DateUtils;
import nlp4j.wiki.BreakException;
import nlp4j.wiki.WikiDumpReader;
import nlp4j.wiki.WikiPage;
import nlp4j.wiki.WikiPageHandler;
import nlp4j.wiki.util.WikiUtils;
import org.apache.commons.io.FileUtils;

/* loaded from: input_file:nlp4j/wiki/converter/MediaWikiHtmlConverter.class */
public class MediaWikiHtmlConverter {
    public void convert(File file) throws IOException {
        convert(file, new File(file.getAbsolutePath() + ".html.txt"));
    }

    public void convert(File file, final File file2) throws IOException {
        if (!file.exists()) {
            throw new FileNotFoundException("NOT_FOUND: " + file.getAbsolutePath());
        }
        if (file2.exists()) {
            throw new IOException("FILE_ALREADY_EXISTS: " + file2.getAbsolutePath());
        }
        System.err.println("CONVERTING_FROM: " + file.getAbsolutePath());
        System.err.println("CONVERTING_TO: " + file2.getAbsolutePath());
        long currentTimeMillis = System.currentTimeMillis();
        String iso8601 = DateUtils.toISO8601(new Date());
        WikiPageHandler wikiPageHandler = new WikiPageHandler() { // from class: nlp4j.wiki.converter.MediaWikiHtmlConverter.1
            int count = 0;
            StringBuilder sb = new StringBuilder();

            @Override // nlp4j.wiki.WikiPageHandler
            public void read(WikiPage wikiPage) throws BreakException {
                String id = wikiPage.getId();
                String title = wikiPage.getTitle();
                if (wikiPage == null || !title.contains(":")) {
                    try {
                        this.sb.append(id + "," + title + "," + WikiUtils.toHtml(wikiPage.getText()).replace("\n", "") + "\n");
                        try {
                            FileUtils.write(file2, this.sb.toString(), "UTF-8", true);
                            this.sb = new StringBuilder();
                            this.count++;
                            if (this.count % 1000 == 0) {
                                System.err.println(this.count);
                            }
                        } catch (IOException e) {
                            e.printStackTrace();
                            throw new BreakException();
                        }
                    } catch (Exception e2) {
                        e2.printStackTrace();
                        throw new BreakException();
                    }
                }
            }
        };
        WikiDumpReader wikiDumpReader = new WikiDumpReader(file);
        try {
            wikiDumpReader.read(wikiPageHandler);
        } catch (BreakException e) {
        } catch (Throwable th) {
            try {
                wikiDumpReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
        wikiDumpReader.close();
        long currentTimeMillis2 = System.currentTimeMillis();
        String iso86012 = DateUtils.toISO8601(new Date());
        System.err.println("FINISHED");
        System.err.println("time: " + (currentTimeMillis2 - currentTimeMillis));
        System.err.println("input: " + file.getAbsolutePath());
        System.err.println("output: " + file2.getAbsolutePath());
        System.err.println("start: " + iso8601);
        System.err.println("end: " + iso86012);
    }
}
