package de.jetwick.snacktory;

import com.google.android.exoplayer2.text.ttml.TtmlNode;
import com.mopub.common.Constants;
import gd.c;
import gd.d;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: classes2.dex */
public class ArticleTextExtractor {

    /* renamed from: a, reason: collision with root package name */
    private static final c f28095a = d.a((Class<?>) ArticleTextExtractor.class);

    /* renamed from: b, reason: collision with root package name */
    private static final Pattern f28096b = Pattern.compile("p|div|td|h1|h2|article|section");

    /* renamed from: i, reason: collision with root package name */
    private static final Pattern f28097i = Pattern.compile("hidden|display: ?none|font-size: ?small");

    /* renamed from: j, reason: collision with root package name */
    private static final Set<String> f28098j = new LinkedHashSet<String>() { // from class: de.jetwick.snacktory.ArticleTextExtractor.1
        {
            add("hacker news");
            add("facebook");
        }
    };

    /* renamed from: k, reason: collision with root package name */
    private static final OutputFormatter f28099k = new OutputFormatter();

    /* renamed from: c, reason: collision with root package name */
    private String f28100c;

    /* renamed from: d, reason: collision with root package name */
    private Pattern f28101d;

    /* renamed from: e, reason: collision with root package name */
    private String f28102e;

    /* renamed from: f, reason: collision with root package name */
    private Pattern f28103f;

    /* renamed from: g, reason: collision with root package name */
    private String f28104g;

    /* renamed from: h, reason: collision with root package name */
    private Pattern f28105h;

    /* renamed from: l, reason: collision with root package name */
    private OutputFormatter f28106l = f28099k;

    /* loaded from: classes2.dex */
    public class ImageComparator implements Comparator<ImageResult> {
        public ImageComparator() {
        }

        @Override // java.util.Comparator
        public int compare(ImageResult imageResult, ImageResult imageResult2) {
            return imageResult2.weight.compareTo(imageResult.weight);
        }
    }

    public ArticleTextExtractor() {
        setUnlikely("com(bx|ment|munity)|dis(qus|cuss)|e(xtra|[-]?mail)|foot|header|menu|re(mark|ply)|rss|sh(are|outbox)|sponsora(d|ll|gegate|rchive|ttachment)|(pag(er|ination))|popup|print|login|si(debar|gn|ngle)");
        setPositive("(^(body|content|h?entry|main|page|post|text|blog|story|haupt))|arti(cle|kel)|instapaper_body");
        setNegative("nav($|igation)|user|com(ment|bx)|(^com-)|contact|foot|masthead|(me(dia|ta))|outbrain|promo|related|scroll|(sho(utbox|pping))|sidebar|sponsor|tags|tool|widget|player|disclaimer|toc|infobox|vcard");
    }

    private int a(Element element, String str) {
        int round = ((SHelper.count(str, "&quot;") + SHelper.count(str, "&lt;")) + SHelper.count(str, "&gt;")) + SHelper.count(str, "px") > 5 ? -30 : (int) Math.round(str.length() / 25.0d);
        addScore(element, round);
        return round;
    }

    private boolean a(String str) {
        return SHelper.count(str, "ad") >= 2;
    }

    private int c(Element element) {
        int i2 = this.f28103f.matcher(element.className()).find() ? 35 : 0;
        if (this.f28103f.matcher(element.id()).find()) {
            i2 += 40;
        }
        if (this.f28101d.matcher(element.className()).find()) {
            i2 -= 20;
        }
        if (this.f28101d.matcher(element.id()).find()) {
            i2 -= 20;
        }
        if (this.f28105h.matcher(element.className()).find()) {
            i2 -= 50;
        }
        if (this.f28105h.matcher(element.id()).find()) {
            i2 -= 50;
        }
        String attr = element.attr(TtmlNode.TAG_STYLE);
        return (attr == null || attr.isEmpty() || !f28097i.matcher(attr).find()) ? i2 : i2 - 50;
    }

    private Document j(Document document) {
        Iterator<Element> it = document.getElementsByTag("script").iterator();
        while (it.hasNext()) {
            it.next().remove();
        }
        Iterator<Element> it2 = document.getElementsByTag("noscript").iterator();
        while (it2.hasNext()) {
            it2.next().remove();
        }
        Iterator<Element> it3 = document.getElementsByTag(TtmlNode.TAG_STYLE).iterator();
        while (it3.hasNext()) {
            it3.next().remove();
        }
        return document;
    }

    protected int a(Element element) {
        return c(element) + ((int) Math.round((element.ownText().length() / 100.0d) * 10.0d)) + b(element);
    }

    protected String a(Document document) {
        String cleanTitle = cleanTitle(document.title());
        if (!cleanTitle.isEmpty()) {
            return cleanTitle;
        }
        String innerTrim = SHelper.innerTrim(document.select("head title").text());
        if (!innerTrim.isEmpty()) {
            return innerTrim;
        }
        String innerTrim2 = SHelper.innerTrim(document.select("head meta[name=title]").attr(Constants.VAST_TRACKER_CONTENT));
        if (!innerTrim2.isEmpty()) {
            return innerTrim2;
        }
        String innerTrim3 = SHelper.innerTrim(document.select("head meta[property=og:title]").attr(Constants.VAST_TRACKER_CONTENT));
        return innerTrim3.isEmpty() ? SHelper.innerTrim(document.select("head meta[name=twitter:title]").attr(Constants.VAST_TRACKER_CONTENT)) : innerTrim3;
    }

    public ArticleTextExtractor addNegative(String str) {
        setNegative(this.f28104g + "|" + str);
        return this;
    }

    public ArticleTextExtractor addPositive(String str) {
        return setPositive(this.f28102e + "|" + str);
    }

    public void addScore(Element element, int i2) {
        setScore(element, i2 + getScore(element));
    }

    public ArticleTextExtractor addUnlikely(String str) {
        return setUnlikely(this.f28100c + "|" + str);
    }

    protected int b(Element element) {
        ArrayList arrayList = new ArrayList(5);
        Iterator<Element> it = element.children().iterator();
        int i2 = 0;
        Element element2 = null;
        while (it.hasNext()) {
            Element next = it.next();
            String ownText = next.ownText();
            int length = ownText.length();
            if (length >= 20) {
                if (length > 200) {
                    i2 += Math.max(50, length / 10);
                }
                if (next.tagName().equals("h1") || next.tagName().equals("h2")) {
                    i2 += 30;
                } else if (next.tagName().equals(TtmlNode.TAG_DIV) || next.tagName().equals(TtmlNode.TAG_P)) {
                    i2 += a(next, ownText);
                    if (next.tagName().equals(TtmlNode.TAG_P) && length > 50) {
                        arrayList.add(next);
                    }
                    if (next.className().toLowerCase().equals("caption")) {
                        element2 = next;
                    }
                }
            }
        }
        if (element2 != null) {
            i2 += 30;
        }
        if (arrayList.size() >= 2) {
            Iterator<Element> it2 = element.children().iterator();
            while (it2.hasNext()) {
                Element next2 = it2.next();
                if ("h1;h2;h3;h4;h5;h6".contains(next2.tagName())) {
                    i2 += 20;
                } else if ("table;li;td;th".contains(next2.tagName())) {
                    addScore(next2, -30);
                }
                if (TtmlNode.TAG_P.contains(next2.tagName())) {
                    addScore(next2, 30);
                }
            }
        }
        return i2;
    }

    protected String b(Document document) {
        String replaceSpaces = SHelper.replaceSpaces(document.select("head link[rel=canonical]").attr("href"));
        if (!replaceSpaces.isEmpty()) {
            return replaceSpaces;
        }
        String replaceSpaces2 = SHelper.replaceSpaces(document.select("head meta[property=og:url]").attr(Constants.VAST_TRACKER_CONTENT));
        return replaceSpaces2.isEmpty() ? SHelper.replaceSpaces(document.select("head meta[name=twitter:url]").attr(Constants.VAST_TRACKER_CONTENT)) : replaceSpaces2;
    }

    protected String c(Document document) {
        String innerTrim = SHelper.innerTrim(document.select("head meta[name=description]").attr(Constants.VAST_TRACKER_CONTENT));
        if (!innerTrim.isEmpty()) {
            return innerTrim;
        }
        String innerTrim2 = SHelper.innerTrim(document.select("head meta[property=og:description]").attr(Constants.VAST_TRACKER_CONTENT));
        return innerTrim2.isEmpty() ? SHelper.innerTrim(document.select("head meta[name=twitter:description]").attr(Constants.VAST_TRACKER_CONTENT)) : innerTrim2;
    }

    public String cleanTitle(String str) {
        StringBuilder sb = new StringBuilder();
        int i2 = 0;
        for (String str2 : str.split("\\|")) {
            if (!f28098j.contains(str2.toLowerCase().trim()) && (i2 != r8.length - 1 || sb.length() <= str2.length())) {
                if (i2 > 0) {
                    sb.append("|");
                }
                sb.append(str2);
                i2++;
            }
        }
        return SHelper.innerTrim(sb.toString());
    }

    protected Collection<String> d(Document document) {
        String innerTrim = SHelper.innerTrim(document.select("head meta[name=keywords]").attr(Constants.VAST_TRACKER_CONTENT));
        if (innerTrim != null) {
            if (innerTrim.startsWith("[") && innerTrim.endsWith("]")) {
                innerTrim = innerTrim.substring(1, innerTrim.length() - 1);
            }
            String[] split = innerTrim.split("\\s*,\\s*");
            if (split.length > 1 || (split.length > 0 && !"".equals(split[0]))) {
                return Arrays.asList(split);
            }
        }
        return Collections.emptyList();
    }

    public Element determineImageSource(Element element, List<ImageResult> list) {
        int i2;
        int i3;
        int i4;
        boolean z2;
        String attr;
        Elements select = element.select("img");
        if (select.isEmpty()) {
            select = element.parent().select("img");
        }
        double d2 = 1.0d;
        Iterator<Element> it = select.iterator();
        Element element2 = null;
        int i5 = 0;
        while (it.hasNext()) {
            Element next = it.next();
            String attr2 = next.attr("src");
            if (!attr2.isEmpty() && !a(attr2)) {
                try {
                    int parseInt = Integer.parseInt(next.attr("height"));
                    i2 = parseInt >= 50 ? 20 : -20;
                    i3 = parseInt;
                } catch (Exception unused) {
                    i2 = 0;
                    i3 = 0;
                }
                try {
                    int parseInt2 = Integer.parseInt(next.attr("width"));
                    i2 = parseInt2 >= 50 ? i2 + 20 : i2 - 20;
                    i4 = parseInt2;
                } catch (Exception unused2) {
                    i4 = 0;
                }
                String attr3 = next.attr("alt");
                if (attr3.length() > 35) {
                    i2 += 20;
                }
                String attr4 = next.attr("title");
                if (attr4.length() > 35) {
                    i2 += 20;
                }
                if (next.parent() == null || (attr = next.parent().attr("rel")) == null || !attr.contains("nofollow")) {
                    z2 = false;
                } else {
                    i2 -= 40;
                    z2 = attr.contains("nofollow");
                }
                int i6 = (int) (i2 * d2);
                if (i6 > i5) {
                    d2 /= 2.0d;
                    element2 = next;
                    i5 = i6;
                }
                list.add(new ImageResult(attr2, Integer.valueOf(i6), attr4, i3, i4, attr3, z2));
            }
        }
        Collections.sort(list, new ImageComparator());
        return element2;
    }

    protected String e(Document document) {
        String replaceSpaces = SHelper.replaceSpaces(document.select("head meta[property=og:image]").attr(Constants.VAST_TRACKER_CONTENT));
        if (!replaceSpaces.isEmpty()) {
            return replaceSpaces;
        }
        String replaceSpaces2 = SHelper.replaceSpaces(document.select("head meta[name=twitter:image]").attr(Constants.VAST_TRACKER_CONTENT));
        if (!replaceSpaces2.isEmpty()) {
            return replaceSpaces2;
        }
        String replaceSpaces3 = SHelper.replaceSpaces(document.select("link[rel=image_src]").attr("href"));
        return replaceSpaces3.isEmpty() ? SHelper.replaceSpaces(document.select("head meta[name=thumbnail]").attr(Constants.VAST_TRACKER_CONTENT)) : replaceSpaces3;
    }

    public JResult extractContent(JResult jResult, String str) throws Exception {
        return extractContent(jResult, str, this.f28106l);
    }

    public JResult extractContent(JResult jResult, String str, OutputFormatter outputFormatter) throws Exception {
        if (str.isEmpty()) {
            throw new IllegalArgumentException("html string is empty!?");
        }
        return extractContent(jResult, Jsoup.parse(str), outputFormatter);
    }

    public JResult extractContent(JResult jResult, Document document, OutputFormatter outputFormatter) throws Exception {
        if (document == null) {
            throw new NullPointerException("missing document");
        }
        jResult.setTitle(a(document));
        jResult.setDescription(c(document));
        jResult.setCanonicalUrl(b(document));
        i(document);
        int i2 = 0;
        Element element = null;
        for (Element element2 : getNodes(document)) {
            int a2 = a(element2);
            if (a2 > i2) {
                element = element2;
                if (a2 > 200) {
                    break;
                }
                i2 = a2;
            }
        }
        if (element != null) {
            ArrayList arrayList = new ArrayList();
            Element determineImageSource = determineImageSource(element, arrayList);
            if (determineImageSource != null) {
                jResult.setImageUrl(SHelper.replaceSpaces(determineImageSource.attr("src")));
                jResult.setImages(arrayList);
            }
            String removeTitleFromText = removeTitleFromText(outputFormatter.getFormattedText(element), jResult.getTitle());
            if (removeTitleFromText.length() > jResult.getTitle().length()) {
                jResult.setText(removeTitleFromText);
            }
        }
        if (jResult.getImageUrl().isEmpty()) {
            jResult.setImageUrl(e(document));
        }
        jResult.setRssUrl(f(document));
        jResult.setVideoUrl(g(document));
        jResult.setFaviconUrl(h(document));
        jResult.setKeywords(d(document));
        return jResult;
    }

    public JResult extractContent(String str) throws Exception {
        return extractContent(new JResult(), str);
    }

    protected String f(Document document) {
        return SHelper.replaceSpaces(document.select("link[rel=alternate]").select("link[type=application/rss+xml]").attr("href"));
    }

    protected String g(Document document) {
        return SHelper.replaceSpaces(document.select("head meta[property=og:video]").attr(Constants.VAST_TRACKER_CONTENT));
    }

    public Collection<Element> getNodes(Document document) {
        LinkedHashMap linkedHashMap = new LinkedHashMap(64);
        Iterator<Element> it = document.select(TtmlNode.TAG_BODY).select("*").iterator();
        int i2 = 100;
        while (it.hasNext()) {
            Element next = it.next();
            if (f28096b.matcher(next.tagName()).matches()) {
                linkedHashMap.put(next, null);
                setScore(next, i2);
                i2 /= 2;
            }
        }
        return linkedHashMap.keySet();
    }

    public int getScore(Element element) {
        try {
            return Integer.parseInt(element.attr("gravityScore"));
        } catch (Exception unused) {
            return 0;
        }
    }

    protected String h(Document document) {
        String replaceSpaces = SHelper.replaceSpaces(document.select("head link[rel=icon]").attr("href"));
        return replaceSpaces.isEmpty() ? SHelper.replaceSpaces(document.select("head link[rel^=shortcut],link[rel$=icon]").attr("href")) : replaceSpaces;
    }

    protected void i(Document document) {
        j(document);
    }

    public String removeTitleFromText(String str, String str2) {
        return str;
    }

    public ArticleTextExtractor setNegative(String str) {
        this.f28104g = str;
        this.f28105h = Pattern.compile(str);
        return this;
    }

    public void setOutputFormatter(OutputFormatter outputFormatter) {
        this.f28106l = outputFormatter;
    }

    public ArticleTextExtractor setPositive(String str) {
        this.f28102e = str;
        this.f28103f = Pattern.compile(str);
        return this;
    }

    public void setScore(Element element, int i2) {
        element.attr("gravityScore", Integer.toString(i2));
    }

    public ArticleTextExtractor setUnlikely(String str) {
        this.f28100c = str;
        this.f28101d = Pattern.compile(str);
        return this;
    }
}
