/*
 * Decompiled with CFR 0.152.
 */
package org.xwiki.annotation.internal.content;

import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import javax.inject.Inject;
import javax.inject.Singleton;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.slf4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSInput;
import org.xwiki.annotation.content.TextExtractor;
import org.xwiki.component.annotation.Component;
import org.xwiki.component.phase.Initializable;
import org.xwiki.component.phase.InitializationException;
import org.xwiki.rendering.syntax.Syntax;
import org.xwiki.xml.XMLUtils;
import org.xwiki.xml.html.HTMLCleaner;
import org.xwiki.xml.html.HTMLCleanerConfiguration;
import org.xwiki.xml.html.HTMLUtils;

@Component(hints={"html", "xhtml", "annotatedhtml", "annotatedxhtml"})
@Singleton
public class HTMLTextExtractor
implements TextExtractor,
Initializable {
    private DOMImplementationLS lsImpl;
    private Map<String, String> htmlCleanerParametersMap;
    @Inject
    private HTMLCleaner htmlCleaner;
    @Inject
    private Logger logger;

    public void initialize() throws InitializationException {
        try {
            this.lsImpl = (DOMImplementationLS)((Object)DOMImplementationRegistry.newInstance().getDOMImplementation("LS 3.0"));
        }
        catch (Exception exception) {
            throw new InitializationException("Failed to initialize DOM Level 3 Load and Save APIs.", (Throwable)exception);
        }
        this.htmlCleanerParametersMap = new HashMap<String, String>();
        this.htmlCleanerParametersMap.put("useCharacterReferences", Boolean.toString(true));
    }

    @Override
    public String extractText(String content, Syntax syntax) {
        StringBuilder fullContent = new StringBuilder();
        Document htmlDoc = this.parseHTML(content);
        XPath xPath = XPathFactory.newInstance().newXPath();
        try {
            NodeList textNodes = (NodeList)xPath.compile("//text()").evaluate(htmlDoc, XPathConstants.NODESET);
            for (int i = 0; i < textNodes.getLength(); ++i) {
                String textContent = textNodes.item(i).getTextContent();
                fullContent.append(textContent);
            }
        }
        catch (XPathExpressionException e) {
            this.logger.warn("Failed to extract the text content of an HTML document. Root cause: [{}]", (Object)ExceptionUtils.getRootCauseMessage((Throwable)e));
        }
        return fullContent.toString();
    }

    private Document parseHTML(String html) {
        return this.parseXML(this.cleanHTML(html));
    }

    private String cleanHTML(String html) {
        HTMLCleanerConfiguration config = this.htmlCleaner.getDefaultConfiguration();
        config.setParameters(this.htmlCleanerParametersMap);
        Document htmlDoc = this.htmlCleaner.clean((Reader)new StringReader(this.wrap(html)), config);
        return HTMLUtils.toString((Document)htmlDoc);
    }

    private Document parseXML(String xml) {
        LSInput input = this.lsImpl.createLSInput();
        input.setStringData(xml);
        return XMLUtils.parse((LSInput)input);
    }

    private String wrap(String fragment) {
        return "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE html><html xmlns=\"http://www.w3.org/1999/xhtml\"><head></head><body>" + fragment + "</body></html>";
    }
}

