package org.apache.tika.parser.microsoft.xml;

import com.healthmarketscience.jackcess.PropertyMap;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.fontbox.afm.AFMParser;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.WriteLimitReachedException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.xml.ElementMetadataHandler;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.apache.tika.sax.TaggedContentHandler;
import org.apache.tika.sax.TeeContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.utils.XMLReaderUtils;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: input_file:BOOT-INF/lib/tika-parser-microsoft-module-2.9.0.jar:org/apache/tika/parser/microsoft/xml/AbstractXML2003Parser.class */
public abstract class AbstractXML2003Parser extends AbstractParser {
    static final String MS_OFFICE_PROPERTIES_URN = "urn:schemas-microsoft-com:office:office";
    static final String MS_DOC_PROPERTIES_URN = "urn:schemas-microsoft-com:office:office";
    static final String MS_SPREADSHEET_URN = "urn:schemas-microsoft-com:office:spreadsheet";
    static final String MS_VML_URN = "urn:schemas-microsoft-com:vml";
    static final String WORD_ML_URL = "http://schemas.microsoft.com/office/word/2003/wordml";
    static final String DOCUMENT_PROPERTIES = "DocumentProperties";
    static final String PICT = "pict";
    static final String BIN_DATA = "binData";
    static final String A = "a";
    static final String BODY = "body";
    static final String BR = "br";
    static final String CDATA = "cdata";
    static final String DIV = "div";
    static final String HREF = "href";
    static final String IMG = "img";
    static final String P = "p";
    static final String TD = "td";
    static final String TR = "tr";
    static final String TABLE = "table";
    static final String TBODY = "tbody";
    static final String HLINK = "hlink";
    static final String HLINK_DEST = "dest";
    static final String NAME_ATTR = "name";
    static final Attributes EMPTY_ATTRS = new AttributesImpl();
    static final char[] NEWLINE = {'\n'};

    private static ContentHandler getMSPropertiesHandler(Metadata metadata, Property property, String str) {
        return new ElementMetadataHandler("urn:schemas-microsoft-com:office:office", str, metadata, property);
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        setContentType(metadata);
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        try {
            try {
                XMLReaderUtils.getSAXParser().parse(CloseShieldInputStream.wrap(inputStream), new EmbeddedContentHandler(getContentHandler(new TaggedContentHandler(xHTMLContentHandler), metadata, parseContext)));
                xHTMLContentHandler.endDocument();
            } catch (SAXException e) {
                WriteLimitReachedException.throwIfWriteLimitReached(e);
                throw new TikaException("XML parse error", e);
            }
        } catch (Throwable th) {
            xHTMLContentHandler.endDocument();
            throw th;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public ContentHandler getContentHandler(ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) {
        return new TeeContentHandler(getMSPropertiesHandler(metadata, TikaCoreProperties.TITLE, PropertyMap.TITLE_PROP), getMSPropertiesHandler(metadata, TikaCoreProperties.CREATOR, PropertyMap.AUTHOR_PROP), getMSPropertiesHandler(metadata, Office.LAST_AUTHOR, "LastAuthor"), getMSPropertiesHandler(metadata, OfficeOpenXMLCore.REVISION, "Revision"), getMSPropertiesHandler(metadata, OfficeOpenXMLExtended.TOTAL_TIME, "TotalTime"), getMSPropertiesHandler(metadata, TikaCoreProperties.CREATED, "Created"), getMSPropertiesHandler(metadata, Office.SAVE_DATE, "LastSaved"), getMSPropertiesHandler(metadata, Office.PAGE_COUNT, "Pages"), getMSPropertiesHandler(metadata, Office.WORD_COUNT, "Words"), getMSPropertiesHandler(metadata, Office.CHARACTER_COUNT, AFMParser.CHARACTERS), getMSPropertiesHandler(metadata, Office.CHARACTER_COUNT_WITH_SPACES, "CharactersWithSpaces"), getMSPropertiesHandler(metadata, OfficeOpenXMLExtended.COMPANY, PropertyMap.COMPANY_PROP), getMSPropertiesHandler(metadata, Office.LINE_COUNT, "Lines"), getMSPropertiesHandler(metadata, Office.PARAGRAPH_COUNT, "Paragraphs"), getMSPropertiesHandler(metadata, OfficeOpenXMLCore.VERSION, "Version"));
    }

    protected abstract void setContentType(Metadata metadata);
}
