/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.csv;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.config.Field;
import org.apache.tika.detect.AutoDetectReader;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractEncodingDetectorParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.csv.CSVParams;
import org.apache.tika.parser.csv.CSVResult;
import org.apache.tika.parser.csv.CSVSniffer;
import org.apache.tika.parser.csv.TextAndCSVConfig;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TextAndCSVParser
extends AbstractEncodingDetectorParser {
    static final MediaType CSV = MediaType.text("csv");
    static final MediaType TSV = MediaType.text("tsv");
    private static final String CSV_PREFIX = "csv";
    private static final String CHARSET = "charset";
    private static final String DELIMITER = "delimiter";
    public static final Property DELIMITER_PROPERTY = Property.externalText("csv:delimiter");
    public static final Property NUM_COLUMNS = Property.externalInteger("csv:num_columns");
    public static final Property NUM_ROWS = Property.externalInteger("csv:num_rows");
    private static final String TD = "td";
    private static final String TR = "tr";
    private static final String TABLE = "table";
    private static final int DEFAULT_MARK_LIMIT = 20000;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(CSV, TSV, MediaType.TEXT_PLAIN)));
    @Field
    private int markLimit = 20000;
    @Field
    private double minConfidence = 0.5;
    private final TextAndCSVConfig defaultTextAndCSVConfig = new TextAndCSVConfig();

    public TextAndCSVParser() {
    }

    public TextAndCSVParser(EncodingDetector encodingDetector) {
        super(encodingDetector);
    }

    private static void handleText(Reader reader, XHTMLContentHandler xhtml) throws SAXException, IOException {
        xhtml.startElement("p");
        char[] buffer = new char[4096];
        int n = reader.read(buffer);
        while (n != -1) {
            xhtml.characters(buffer, 0, n);
            n = reader.read(buffer);
        }
        xhtml.endElement("p");
    }

    static boolean isCSVOrTSV(MediaType mediaType) {
        if (mediaType == null) {
            return false;
        }
        return mediaType.getBaseType().equals(TSV) || mediaType.getBaseType().equals(CSV);
    }

    @Override
    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return SUPPORTED_TYPES;
    }

    @Override
    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
        Charset charset;
        Reader reader;
        TextAndCSVConfig textAndCSVConfig = context.get(TextAndCSVConfig.class, this.defaultTextAndCSVConfig);
        CSVParams params = this.getOverride(metadata, textAndCSVConfig);
        if (!params.isComplete()) {
            reader = this.detect(params, textAndCSVConfig, stream, metadata, context);
            charset = params.getCharset() != null ? params.getCharset() : ((AutoDetectReader)reader).getCharset();
        } else {
            reader = new BufferedReader(new InputStreamReader(stream, params.getCharset()));
            charset = params.getCharset();
        }
        this.updateMetadata(params, metadata, textAndCSVConfig);
        if (!params.getMediaType().getBaseType().equals(CSV) && !params.getMediaType().getBaseType().equals(TSV)) {
            this.handleText(reader, charset, handler, metadata);
            return;
        }
        CSVFormat csvFormat = CSVFormat.EXCEL.builder().setDelimiter(params.getDelimiter().charValue()).get();
        metadata.set(DELIMITER_PROPERTY, textAndCSVConfig.getDelimiterToNameMap().get(Character.valueOf(csvFormat.getDelimiterString().charAt(0))));
        XHTMLContentHandler xhtmlContentHandler = new XHTMLContentHandler(handler, metadata);
        int totalRows = 0;
        try (CSVParser commonsParser = ((CSVParser.Builder)CSVParser.builder().setReader(reader)).setFormat(csvFormat).get();){
            xhtmlContentHandler.startDocument();
            xhtmlContentHandler.startElement(TABLE);
            int firstRowColCount = 0;
            try {
                for (CSVRecord row : commonsParser) {
                    xhtmlContentHandler.startElement(TR);
                    for (String cell : row) {
                        if (totalRows == 0) {
                            ++firstRowColCount;
                        }
                        xhtmlContentHandler.startElement(TD);
                        xhtmlContentHandler.characters(cell);
                        xhtmlContentHandler.endElement(TD);
                    }
                    xhtmlContentHandler.endElement(TR);
                    if (totalRows == 0) {
                        metadata.set(NUM_COLUMNS, firstRowColCount);
                    }
                    ++totalRows;
                }
                metadata.set(NUM_ROWS, totalRows);
            }
            catch (UncheckedIOException e) {
                if (e.getCause() != null && e.getCause().getMessage() != null && e.getCause().getMessage().contains("encapsulated")) {
                    xhtmlContentHandler.endElement(TABLE);
                    xhtmlContentHandler.startElement("div", "name", "after exception");
                    TextAndCSVParser.handleText(reader, xhtmlContentHandler);
                    xhtmlContentHandler.endElement("div");
                    xhtmlContentHandler.endDocument();
                    throw new TikaException("exception parsing the csv", e);
                }
                if (e.getCause() != null) {
                    throw new TikaException("exception parsing the csv", e.getCause());
                }
                throw new TikaException("exception parsing the csv", e);
            }
            xhtmlContentHandler.endElement(TABLE);
            xhtmlContentHandler.endDocument();
        }
    }

    private void handleText(Reader reader, Charset charset, ContentHandler handler, Metadata metadata) throws SAXException, IOException, TikaException {
        MediaType tmpMediaType;
        String incomingMime = metadata.get("Content-Type");
        MediaType mediaType = MediaType.TEXT_PLAIN;
        if (incomingMime != null && (tmpMediaType = MediaType.parse(incomingMime)) != null) {
            mediaType = tmpMediaType;
        }
        MediaType type = new MediaType(mediaType, charset);
        metadata.set("Content-Type", type.toString());
        metadata.set("Content-Encoding", charset.name());
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        TextAndCSVParser.handleText(reader, xhtml);
        xhtml.endDocument();
    }

    private Reader detect(CSVParams params, TextAndCSVConfig textAndCSVConfig, InputStream stream, Metadata metadata, ParseContext context) throws IOException, TikaException {
        BufferedReader reader;
        MediaType mediaType;
        String mediaString = metadata.get("Content-Type");
        if (mediaString != null && !SUPPORTED_TYPES.contains((mediaType = MediaType.parse(mediaString)).getBaseType())) {
            params.setMediaType(mediaType);
            return new AutoDetectReader((InputStream)CloseShieldInputStream.wrap(stream), metadata, this.getEncodingDetector(context));
        }
        if (params.getCharset() == null) {
            reader = new AutoDetectReader((InputStream)CloseShieldInputStream.wrap(stream), metadata, this.getEncodingDetector(context));
            params.setCharset(((AutoDetectReader)reader).getCharset());
            if (params.isComplete()) {
                return reader;
            }
        } else {
            reader = new BufferedReader(new InputStreamReader((InputStream)CloseShieldInputStream.wrap(stream), params.getCharset()));
        }
        if (params.getDelimiter() == null && (params.getMediaType() == null || TextAndCSVParser.isCSVOrTSV(params.getMediaType()))) {
            CSVSniffer sniffer = new CSVSniffer(this.markLimit, textAndCSVConfig.getDelimiterToNameMap().keySet(), this.minConfidence);
            CSVResult result = sniffer.getBest(reader, metadata);
            params.setMediaType(result.getMediaType());
            params.setDelimiter(result.getDelimiter());
        }
        return reader;
    }

    private CSVParams getOverride(Metadata metadata, TextAndCSVConfig textAndCSVConfig) {
        String override = metadata.get(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE);
        if (override == null) {
            return new CSVParams();
        }
        MediaType mediaType = MediaType.parse(override);
        if (mediaType == null) {
            return new CSVParams();
        }
        String charsetString = mediaType.getParameters().get(CHARSET);
        Charset charset = null;
        if (charsetString != null) {
            try {
                charset = Charset.forName(charsetString);
            }
            catch (UnsupportedCharsetException unsupportedCharsetException) {
                // empty catch block
            }
        }
        if (!TextAndCSVParser.isCSVOrTSV(mediaType)) {
            return new CSVParams(mediaType, charset);
        }
        String delimiterName = mediaType.getParameters().get(DELIMITER);
        if (delimiterName == null) {
            return new CSVParams(mediaType, charset);
        }
        if (textAndCSVConfig.getNameToDelimiterMap().containsKey(delimiterName)) {
            return new CSVParams(mediaType, charset, Character.valueOf(textAndCSVConfig.getNameToDelimiterMap().get(delimiterName).charValue()));
        }
        if (delimiterName.length() == 1) {
            return new CSVParams(mediaType, charset, Character.valueOf(delimiterName.charAt(0)));
        }
        return new CSVParams(mediaType, charset);
    }

    private void updateMetadata(CSVParams params, Metadata metadata, TextAndCSVConfig textAndCSVConfig) {
        MediaType mediaType = null;
        if (params.getMediaType().getBaseType().equals(MediaType.TEXT_PLAIN)) {
            mediaType = MediaType.TEXT_PLAIN;
        } else if (params.getDelimiter() != null) {
            mediaType = params.getDelimiter().charValue() == '\t' ? TSV : CSV;
        } else if (metadata.get("Content-Type") != null) {
            mediaType = MediaType.parse(metadata.get("Content-Type"));
        }
        HashMap<String, String> attrs = new HashMap<String, String>();
        if (params.getCharset() != null) {
            attrs.put(CHARSET, params.getCharset().name());
            metadata.set("Content-Encoding", params.getCharset().name());
        }
        if (!MediaType.TEXT_PLAIN.equals(mediaType) && params.getDelimiter() != null) {
            if (textAndCSVConfig.getDelimiterToNameMap().containsKey(params.getDelimiter())) {
                attrs.put(DELIMITER, textAndCSVConfig.getDelimiterToNameMap().get(params.getDelimiter()));
            } else {
                attrs.put(DELIMITER, Integer.toString(params.getDelimiter().charValue()));
            }
        }
        MediaType type = new MediaType(mediaType, attrs);
        metadata.set("Content-Type", type.toString());
    }

    @Field
    public void setNameToDelimiterMap(Map<String, String> map) throws TikaConfigException {
        HashMap<String, Character> m = new HashMap<String, Character>();
        for (Map.Entry<String, String> e : map.entrySet()) {
            if (e.getValue().length() > 1) {
                throw new TikaConfigException("delimiter must be a single character: " + e.getValue());
            }
            m.put(e.getKey(), Character.valueOf(e.getValue().charAt(0)));
        }
        this.defaultTextAndCSVConfig.setNameToDelimiterMap(m);
    }
}

