package net.kafujo.samples.wikidata;

import net.kafujo.samples.wikidata.parser.*;
import net.kafujo.units.Location;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.time.LocalDate;
import java.time.ZoneId;
import java.util.*;

public class WikidataCity implements Comparable<WikidataCity> {

    private static final Logger lgr = LoggerFactory.getLogger(WikidataCity.class);

    private final String wikidataId;

    private final String name;

    private final String country;

    private final URL website;

    private final Long population;

    private final boolean capital;

    private final LocalDate inception;

    private final Integer elevation;

    private final Location location;

    private final ZoneId timeZone;

    public WikidataCity(String wikidataId, String name, String country, URL website, Long population, boolean capital,
                        LocalDate inception, Integer elevation, Location location, ZoneId timeZone) {
        this.wikidataId = Objects.requireNonNull(wikidataId);
        this.name = Objects.requireNonNull(name);
        this.country = Objects.requireNonNull(country);
        this.website = website;
        this.population = population;
        this.capital = capital;
        this.inception = inception;
        this.elevation = elevation;
        this.location = location;
        this.timeZone = timeZone;
    }


    /**
     * Reads the whole csv file to a Sorted Set. Because {@link #compareTo(WikidataCity)} uses only name, only
     * the first occurrence of each city will end up in the list.
     *
     * @param is stream to read from
     * @return set without duplicates
     */
    public static Set<WikidataCity> read(final InputStream is) {
        var cities = new HashSet<WikidataCity>();
        int count = 0;
        int ignoredForDuplicate = 0;
        int ignoredForCorruption = 0;
        try (is; var reader = new InputStreamReader(is)) {
            for (CSVRecord record : CSVFormat.RFC4180.withHeader().withCommentMarker('#').withIgnoreEmptyLines().parse(reader)) {
                count++;
                try {
                    if (!cities.add(from(record))) {
                        ignoredForDuplicate++;
                    }
                } catch (Exception rt) {
                    lgr.info("Ignore " + record);
                    lgr.info("  Because of: " + rt);
                    ignoredForCorruption++;
                }
            }
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
        lgr.info("IGNORED {} of total {}", ignoredForCorruption + ignoredForDuplicate, count);
        lgr.info("   corrupt datasets: {}", ignoredForCorruption);
        lgr.info("   duplicate ids:    {}", ignoredForDuplicate);
        return cities;
    }

    /**
     * @param rec data to build a city object
     * @return a City object, if the data in {@code rec} was ok
     * @throws RuntimeException when something goes wrong to easily ignore the dataset.
     */
    public static WikidataCity from(CSVRecord rec) throws MalformedURLException {
        final String wikidataId = rec.get("city");
        final String name = rec.get("cityLabel");
        final String country = rec.get("countryLabel");
        final URL website = UrlParser.DEFAULT.parseFbk(rec.get("websiteLabel"), null);

        final LocalDate inception = LocalDateParser.DEFAULT.parseFbk(rec.get("inception"), null);
        final Long population = LongParser.DEFAULT.parseFbk(rec.get("population"), null);
        final Integer elevation = IntParser.DEFAULT.parseFbk(rec.get("elevation"), null);
        final boolean capital = !rec.get("capitalOf").isBlank();

        final Location location = LocationParser.DEFAULT.parseFbk(rec.get("location"), null);
        final ZoneId timezone = TimeZoneParser.DEFAULT.parseFbk(rec.get("timezoneLabel"), null);

        return new WikidataCity(wikidataId, name, country, website, population, capital, inception,
                elevation, location, timezone);
    }


    public static List<WikidataCity> byName(Collection<WikidataCity> set, String name) {
        List<WikidataCity> collect = new LinkedList<>();
        for (var city : set) {
            if (name.equalsIgnoreCase(city.getName())) {
                collect.add(city);
            }
        }
        return collect;
    }

    public static Optional<WikidataCity> byId(Collection<WikidataCity> set, String id) {
        for (var city : set) {
            if (id.equalsIgnoreCase(city.getWikidataId())) {
                return Optional.of(city);
            }
        }
        return Optional.empty();
    }


    @Override
    public String toString() {
        return name + " ," + country + " [" + wikidataId + "]";
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        WikidataCity that = (WikidataCity) o;
        return wikidataId.equals(that.wikidataId);
    }

    @Override
    public int hashCode() {
        return Objects.hash(wikidataId);
    }

    @Override
    public int compareTo(WikidataCity o) {
        return wikidataId.compareTo(o.wikidataId);
    }

    public String getWikidataId() {
        return wikidataId;
    }

    public String getName() {
        return name;
    }

    public String getCountry() {
        return country;
    }

    public LocalDate getInception() {
        return inception;
    }

    public boolean isCapital() {
        return capital;
    }

    public Long getPopulation() {
        return population;
    }

    public Integer getElevation() {
        return elevation;
    }

    public URL getWebsite() {
        return website;
    }

    public Location getLocation() {
        return location;
    }

    public ZoneId getTimeZone() {
        return timeZone;
    }
}
