package org.opencms.util;

import com.opencms.template.A_CmsXmlContent;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.nodes.TextNode;
import org.opencms.test.OpenCmsTestCase;

/* loaded from: input_file:org/opencms/util/TestCmsHtmlExtractor.class */
public class TestCmsHtmlExtractor extends OpenCmsTestCase {
    private static final String HTML_PAGE_1 = "<html><title>This is the title</title><body><h1>A headline</h1>This is a test.<br>This  is&nbsp;a <a href=\"http://www.opencms.org\">link</a> in a    paragraph.<p>Some more text here. This is a very long line, because this is long line, because this is long line, because this is long line, because this is long line. This is a very long line, because this is long line, because this is long line, because this is long line, because this is long line. <p>This is a paragraph.</p>This is a very long line, because this is long line, because this is long line, because this is long line, because this is long line. <div><p>This is a p in a div<p>This is another p in a div<p></div><h2>Another headline <b>with some tag content</b></h2><p>This is a paragraph.</p>This is a very long line, because this is long line, because this is long line, because this is long line, because this is long line. <div><p>This is a p in a div<p>This is another p in a div<p></div></body></html>";

    public static String extractFromHtml2(String str) throws Exception {
        Parser parser = new Parser();
        parser.setInputHTML(str);
        StringBean stringBean = new StringBean();
        stringBean.setLinks(true);
        stringBean.setCollapse(true);
        parser.visitAllNodesWith(stringBean);
        return stringBean.getStrings();
    }

    public void testHtmlExtractor() throws Exception {
        System.out.println(CmsHtmlExtractor.extractText(HTML_PAGE_1, "ISO-8859-1") + "\n\n");
        System.out.println(extractFromHtml(HTML_PAGE_1) + "\n\n");
        System.out.println(extractFromHtml2(HTML_PAGE_1) + "\n\n");
    }

    public void testHtmlExtractorWithEmptyInput() throws Exception {
        assertEquals("Empty input should generate empty output", A_CmsXmlContent.C_TEMPLATE_EXTENSION, CmsHtmlExtractor.extractText(A_CmsXmlContent.C_TEMPLATE_EXTENSION, "ISO-8859-1"));
        assertEquals("null input should generate null output", null, CmsHtmlExtractor.extractText((String) null, "ISO-8859-1"));
        assertEquals("Whitespace only input should generate empty String output", A_CmsXmlContent.C_TEMPLATE_EXTENSION, CmsHtmlExtractor.extractText("   \t\r\n  ", "ISO-8859-1"));
    }

    private String extractFromHtml(String str) throws Exception {
        TextNode[] nodeArray = Parser.createParser(str, (String) null).extractAllNodesThatMatch(new NodeClassFilter(TextNode.class)).toNodeArray();
        StringBuffer stringBuffer = new StringBuffer();
        for (TextNode textNode : nodeArray) {
            stringBuffer.append(textNode.toPlainTextString().trim());
        }
        return stringBuffer.toString();
    }
}
