/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.steps.xmlcharfixing;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.URI;
import java.util.IllegalFormatException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.BOMNewlineEncodingDetector;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.exceptions.OkapiIOException;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.pipeline.annotations.StepParameterMapping;
import net.sf.okapi.common.pipeline.annotations.StepParameterType;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.steps.xmlcharfixing.Parameters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@UsingParameters(value=Parameters.class)
public class XMLCharFixingStep
extends BasePipelineStep {
    private final Logger LOGGER = LoggerFactory.getLogger(this.getClass());
    private final Pattern pattern = Pattern.compile("&#(x?)([0-9a-fA-F]+);");
    private Parameters params = new Parameters();
    private URI outputURI;
    private int count;

    @Override
    public String getDescription() {
        return "Fixes invalid characters in XML documents. Expects: raw document. Sends back: raw document.";
    }

    @Override
    public String getName() {
        return "XML Characters Fixing";
    }

    @Override
    public IParameters getParameters() {
        return this.params;
    }

    @Override
    public void setParameters(IParameters params) {
        this.params = (Parameters)params;
    }

    @StepParameterMapping(parameterType=StepParameterType.OUTPUT_URI)
    public void setOutputURI(URI outputURI) {
        this.outputURI = outputURI;
    }

    public URI getOutputURI() {
        return this.outputURI;
    }

    @Override
    protected Event handleStartBatch(Event event) {
        this.count = 0;
        return event;
    }

    @Override
    protected Event handleEndBatch(Event event) {
        this.LOGGER.info("Number of invalid characters replaced = {}", (Object)this.count);
        return event;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    protected Event handleRawDocument(Event event) {
        FileOutputStream output = null;
        BufferedReader reader = null;
        OutputStreamWriter writer = null;
        try {
            String line;
            File outFile;
            RawDocument rd = event.getRawDocument();
            BOMNewlineEncodingDetector detector = new BOMNewlineEncodingDetector(rd.getStream(), rd.getEncoding());
            detector.detectAndRemoveBom();
            rd.setEncoding(detector.getEncoding());
            String lineBreak = detector.getNewlineType().toString();
            reader = new BufferedReader(new InputStreamReader(detector.getInputStream(), rd.getEncoding()));
            if (this.isLastOutputStep()) {
                outFile = rd.createOutputFile(this.outputURI);
            } else {
                try {
                    outFile = File.createTempFile("~okapi-72_okp-xcf_", ".tmp");
                }
                catch (Throwable e) {
                    throw new OkapiIOException("Cannot create temporary output.", e);
                }
            }
            output = new FileOutputStream(outFile);
            writer = new OutputStreamWriter((OutputStream)new BufferedOutputStream(output), rd.getEncoding());
            Util.writeBOMIfNeeded(writer, detector.hasUtf8Bom(), rd.getEncoding());
            StringBuilder tmp = new StringBuilder();
            Matcher m = null;
            while ((line = reader.readLine()) != null) {
                tmp.setLength(0);
                tmp.append(line);
                block23: for (int i = 0; i < tmp.length(); ++i) {
                    int ch = tmp.codePointAt(i);
                    switch (ch) {
                        case 9: 
                        case 10: 
                        case 13: {
                            continue block23;
                        }
                        default: {
                            if (ch >= 32 && ch <= 55295 || ch >= 57344 && ch <= 65535) continue block23;
                            if (ch >= 65536 && ch <= 0x10FFFF) {
                                ++i;
                                continue block23;
                            }
                            String repl = String.format(this.params.getReplacement(), ch);
                            tmp.replace(i, i + (ch > 65535 ? 2 : 1), repl);
                            i += repl.length() - 1;
                            ++this.count;
                            continue block23;
                        }
                    }
                }
                int start = 0;
                while ((m = this.pattern.matcher(tmp.toString())).find(start)) {
                    try {
                        int n = Integer.parseInt(m.group(2), m.group(1).isEmpty() ? 10 : 16);
                        start = m.start();
                        if (!this.isValid(n)) {
                            String repl = String.format(this.params.getReplacement(), n);
                            tmp.replace(start, m.end(), repl);
                            start += repl.length() - m.group().length();
                            ++this.count;
                            continue;
                        }
                        start = m.end();
                    }
                    catch (NumberFormatException e) {
                        this.LOGGER.error("Invalid NCR: '{}'", (Object)m.group());
                    }
                }
                writer.write(tmp.toString() + lineBreak);
            }
            reader.close();
            reader = null;
            writer.close();
            writer = null;
            rd.finalizeOutput();
            event.setResource(new RawDocument(outFile.toURI(), rd.getEncoding(), rd.getSourceLocale(), rd.getTargetLocale()));
        }
        catch (IllegalFormatException e) {
            this.LOGGER.error("Invalid replacement format: '{}'", (Object)this.params.getReplacement());
        }
        catch (Exception e) {
            this.LOGGER.error("Error while processing XML for invalid characters.");
        }
        finally {
            try {
                if (writer != null) {
                    writer.close();
                }
                if (reader != null) {
                    reader.close();
                }
            }
            catch (IOException e) {
                throw new OkapiIOException("IO error while closing.", e);
            }
        }
        return event;
    }

    private boolean isValid(int value) {
        switch (value) {
            case 9: 
            case 10: 
            case 13: {
                return true;
            }
        }
        if (value >= 32 && value <= 55295) {
            return true;
        }
        if (value >= 57344 && value <= 65535) {
            return true;
        }
        return value >= 65536 && value <= 0x10FFFF;
    }
}

