package jlama;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

import javax.xml.parsers.DocumentBuilderFactory;
import java.io.IOException;
import java.io.StringReader;
import java.net.URI;
import java.net.URLEncoder;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;

public class ArticleLoader {
    private static final String ARXIV_API = "https://export.arxiv.org/api/query";
    private static final int MINIMUM_REQUEST_INTERVAL_MS = 3000;

    private final HttpClient httpClient = HttpClient.newBuilder()
            .connectTimeout(Duration.ofSeconds(10))
            .build();

    private long lastRequestTime;

    public List<Article> loadArticles(int start, int maxResults) throws IOException {
        String url = ARXIV_API
                     + "?search_query=all:llm"
                     + "&start=" + start
                     + "&max_results=" + maxResults
                     + "&sortBy=relevance";
        String body = httpGet(url);
        if (body != null) {
            Document doc = parseXml(body);
            NodeList entries = doc.getElementsByTagName("entry");
            List<Article> articles = new ArrayList<>();
            for (int i = 0; i < entries.getLength(); i++) {
                Node node = entries.item(i);
                if (node instanceof Element entry) {
                    String id = getChildText(entry, "id");
                    String title = getChildText(entry, "title");
                    String summary = getChildText(entry, "summary");
                    if (id != null && title != null && summary != null) {
                        articles.add(new Article(title.trim(), summary.trim(), URI.create(id)));
                    } else {
                        System.err.println("ERROR: incomplete article " + id + " " + title + " " + summary);
                    }
                }
            }
            return articles;
        } else {
            System.err.println("ERROR: No articles found.");
            sleep(3000);
            return List.of();
        }
    }

    public int getKeywordResultCount(String keyword) {
        try {
            String queryResult = httpGet(ARXIV_API
                                         + "?search_query=" + URLEncoder.encode("all:" + keyword, StandardCharsets.UTF_8)
                                         + "&start=0&max_results=1"
            );
            if (queryResult != null) {
                Document document = parseXml(queryResult);
                NodeList totalResultsNodeList = document.getElementsByTagName("opensearch:totalResults");
                if (totalResultsNodeList.getLength() == 0) {
                    System.err.println("Error: total result count not found");
                    return -1;
                }
                String totalResultsText = totalResultsNodeList.item(0).getTextContent();
                try {
                    return Integer.parseInt(totalResultsText.trim());
                } catch (NumberFormatException e) {
                    e.printStackTrace();
                    return -1;
                }
            } else {
                return -1;
            }
        } catch (IOException e) {
            e.printStackTrace();
            return -1;
        }
    }

    private String httpGet(String url) throws IOException {
        long timeSinceLastRequest = System.currentTimeMillis() - lastRequestTime;
        if (timeSinceLastRequest < MINIMUM_REQUEST_INTERVAL_MS) {
            sleep(MINIMUM_REQUEST_INTERVAL_MS - timeSinceLastRequest + 100);
        }
        HttpRequest request = HttpRequest.newBuilder()
                .uri(URI.create(url))
                .timeout(Duration.ofSeconds(10))
                .GET()
                .build();
        try {
            HttpResponse<String> httpResponse = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
            int statusCode = httpResponse.statusCode();
            if (statusCode == 200) {
                return httpResponse.body();
            } else {
                System.err.println("Status code " + statusCode + " for " + url + ": " + httpResponse.body());
                return null;
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        } finally {
            lastRequestTime = System.currentTimeMillis();
        }
    }

    private Document parseXml(String xml) throws IOException {
        try {
            return DocumentBuilderFactory.newInstance()
                    .newDocumentBuilder()
                    .parse(new InputSource(new StringReader(xml)));
        } catch (Exception e) {
            throw new IOException("Error parsing query result", e);
        }
    }

    private String getChildText(Element parent, String tag) {
        NodeList nodes = parent.getElementsByTagName(tag);
        if (nodes.getLength() == 0) {
            return null;
        }
        return nodes.item(0).getTextContent();
    }

    private void sleep(long millis) {
        try {
            Thread.sleep(millis);
        } catch (InterruptedException ignored) {
        }
    }
}
