package de.ph87.kleinanzeigen.api; import lombok.extern.slf4j.Slf4j; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.telegram.telegrambots.meta.api.objects.MaybeInaccessibleMessage; import java.io.File; import java.io.IOException; import java.net.URI; import java.time.LocalDate; import java.time.LocalTime; import java.time.ZonedDateTime; import java.util.*; import java.util.function.Consumer; import java.util.regex.Matcher; import java.util.regex.Pattern; import static de.ph87.kleinanzeigen.api.JSON.objectMapper; @Slf4j public class Kleinanzeigen { private static final int KEEP_LAST_OFFERS_COUNT = 50; private static final File FILE = new File("./offers.json"); private static final URI VERSCHENKEN_EPPELBORN_30KM = URI.create("https://www.kleinanzeigen.de/s-zu-verschenken/66571/c192l339r30"); private final List offers; private final Consumer remove; public Kleinanzeigen(final Consumer remove) { this.remove = remove; offers = load(); } private List load() { try { final List offers = objectMapper.readerForListOf(Offer.class).readValue(FILE); log.info("Loaded {} offers from file: {}", offers.size(), FILE); return offers; } catch (IOException e) { log.warn("Failed to load Offers from file={}: {}", FILE, e.toString()); return new ArrayList<>(); } } private void save() { try { final List removed; synchronized (offers) { removed = _cleanUp(); objectMapper.writerWithDefaultPrettyPrinter().writeValue(FILE, offers); log.info("Wrote {} offers to file: {}", offers.size(), FILE); } removed.forEach(remove); } catch (IOException e) { log.warn("Failed to write Offers to file={}: {}", FILE, e.toString()); } } private List _cleanUp() { if (offers.stream().anyMatch(Offer::_deleted_)) { throw new RuntimeException(); } offers.sort(Comparator.comparing(Offer::getDate)); final List deleted = new ArrayList<>(); final List removable = new ArrayList<>(offers.stream().filter(offer -> !offer.isRemember() && offer.getRememberUntil() == null).toList()); while (!removable.isEmpty() && removable.size() > Kleinanzeigen.KEEP_LAST_OFFERS_COUNT) { final Offer offer = removable.removeFirst(); offers.remove(offer); offer.markDeleted(); deleted.add(offer); } return deleted; } public void fetch() { try { final Document document = Jsoup.parse(VERSCHENKEN_EPPELBORN_30KM.toURL(), 3000); for (Element article : document.select("li.ad-listitem:not(.is-topad) article.aditem")) { try { final Offer offer = parse(article); merge(offer); } catch (OfferParseException e) { log.error("Failed to parse Offer:", e); } } save(); } catch (IOException e) { log.error("Failed to fetch Kleinanzeigen: {}", e.toString()); } } private Offer parse(final Element article) throws OfferParseException { try { final String id = article.attr("data-adid"); final String title = article.select(".text-module-begin").text(); final String description = article.select(".aditem-main--middle--description").text(); final ZonedDateTime date = parseDate(article.select(".aditem-main--top--right").text()); final String articleURL = VERSCHENKEN_EPPELBORN_30KM.resolve(article.select(".aditem-image a").attr("href")).toString(); final String zipcode; final String location; final Integer distance; final String locationString = article.select(".aditem-main--top--left").text(); final Matcher locationMatcher = Pattern.compile("^(?\\d+) (?.+) \\((:?ca.)?\\s*(?\\d+)\\s*km\\s*\\)$").matcher(locationString); if (!locationMatcher.find()) { zipcode = ""; location = locationString; distance = null; } else { zipcode = locationMatcher.group("zipcode"); location = locationMatcher.group("location"); distance = Integer.parseInt(locationMatcher.group("distance")); } final String imageURL = getImageURL(articleURL); return new Offer(id, date, title, zipcode, location, distance, description, articleURL, imageURL); } catch (NumberFormatException | IOException e) { throw new OfferParseException(article, e); } } private String getImageURL(final String articleURL) throws IOException { final String imageURL; final Document document = Jsoup.parse(URI.create(articleURL).toURL(), 3000); final Element image = document.select(".galleryimage-element img").first(); if (image == null) { imageURL = ""; } else { imageURL = image.attr("src"); } return imageURL; } private void merge(final Offer offer) { synchronized (offer) { offers.stream().filter(existing -> existing.getId().equals(offer.getId())).peek(existing -> existing.merge(offer)).findFirst().orElseGet(() -> { log.info("Created: {}", offer); offers.add(offer); return offer; }); } } private ZonedDateTime parseDate(final String text) { final Matcher dayNameMatcher = Pattern.compile("(?Gestern|Heute), (?\\d+):(?\\d+)").matcher(text); if (dayNameMatcher.find()) { final long minusDays = dayNameMatcher.group("day").equals("Gestern") ? 1 : 0; return ZonedDateTime.now().minusDays(minusDays).withHour(Integer.parseInt(dayNameMatcher.group("hour"))).withMinute(Integer.parseInt(dayNameMatcher.group("minute"))).withSecond(0).withNano(0); } final Matcher localDateMatcher = Pattern.compile("(?\\d+).(?\\d+).(?\\d+)").matcher(text); if (localDateMatcher.find()) { return ZonedDateTime.of(LocalDate.of(Integer.parseInt(localDateMatcher.group("day")), Integer.parseInt(localDateMatcher.group("month")), Integer.parseInt(localDateMatcher.group("year"))), LocalTime.MIDNIGHT, TimeZone.getDefault().toZoneId()); } throw new NumberFormatException("Failed to parse date: " + text); } public List findAll() { synchronized (offers) { return new ArrayList<>(offers); } } public void ignore(final MaybeInaccessibleMessage message) { synchronized (offers) { findByTelegramMessageId(message).ifPresent(offer -> { offer.ignore(); save(); }); } } public Optional remember(final MaybeInaccessibleMessage message, final boolean remember) { synchronized (offers) { final Optional optional = findByTelegramMessageId(message); optional.ifPresent(offer -> { offer.setRemember(remember); save(); }); return optional; } } public Optional findByTelegramMessageId(final MaybeInaccessibleMessage message) { synchronized (offers) { return offers.stream().filter(offer -> Objects.equals(offer.getTelegramMessageId(), message.getMessageId())).findFirst(); } } }