- highres image from article page - no-image-placeholder - all messages via SendPhoto - rememberUntil
200 lines
6.9 KiB
Java
200 lines
6.9 KiB
Java
package de.ph87.kleinanzeigen.api;
|
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Element;
|
|
import org.telegram.telegrambots.meta.api.objects.MaybeInaccessibleMessage;
|
|
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.net.URI;
|
|
import java.time.LocalDate;
|
|
import java.time.LocalTime;
|
|
import java.time.ZonedDateTime;
|
|
import java.util.*;
|
|
import java.util.function.Consumer;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
import static de.ph87.kleinanzeigen.api.JSON.objectMapper;
|
|
|
|
@Slf4j
|
|
public class Kleinanzeigen {
|
|
|
|
private static final int KEEP_LAST_OFFERS_COUNT = 50;
|
|
|
|
private static final File FILE = new File("./offers.json");
|
|
|
|
private static final URI VERSCHENKEN_EPPELBORN_30KM = URI.create("https://www.kleinanzeigen.de/s-zu-verschenken/66571/c192l339r30");
|
|
|
|
private final List<Offer> offers;
|
|
|
|
private final Consumer<Offer> remove;
|
|
|
|
public Kleinanzeigen(final Consumer<Offer> remove) {
|
|
this.remove = remove;
|
|
offers = load();
|
|
}
|
|
|
|
private List<Offer> load() {
|
|
try {
|
|
final List<Offer> offers = objectMapper.readerForListOf(Offer.class).readValue(FILE);
|
|
log.info("Loaded {} offers from file: {}", offers.size(), FILE);
|
|
return offers;
|
|
} catch (IOException e) {
|
|
log.warn("Failed to load Offers from file={}: {}", FILE, e.toString());
|
|
return new ArrayList<>();
|
|
}
|
|
}
|
|
|
|
private void save() {
|
|
try {
|
|
final List<Offer> removed;
|
|
synchronized (offers) {
|
|
removed = _cleanUp();
|
|
objectMapper.writerWithDefaultPrettyPrinter().writeValue(FILE, offers);
|
|
log.info("Wrote {} offers to file: {}", offers.size(), FILE);
|
|
}
|
|
removed.forEach(remove);
|
|
} catch (IOException e) {
|
|
log.warn("Failed to write Offers to file={}: {}", FILE, e.toString());
|
|
}
|
|
}
|
|
|
|
private List<Offer> _cleanUp() {
|
|
if (offers.stream().anyMatch(Offer::_deleted_)) {
|
|
throw new RuntimeException();
|
|
}
|
|
|
|
offers.sort(Comparator.comparing(Offer::getDate));
|
|
|
|
final List<Offer> deleted = new ArrayList<>();
|
|
final List<Offer> removable = new ArrayList<>(offers.stream().filter(offer -> !offer.isRemember() && offer.getRememberUntil() == null).toList());
|
|
while (!removable.isEmpty() && removable.size() > Kleinanzeigen.KEEP_LAST_OFFERS_COUNT) {
|
|
final Offer offer = removable.removeFirst();
|
|
offers.remove(offer);
|
|
offer.markDeleted();
|
|
deleted.add(offer);
|
|
}
|
|
|
|
return deleted;
|
|
}
|
|
|
|
public void fetch() {
|
|
try {
|
|
final Document document = Jsoup.parse(VERSCHENKEN_EPPELBORN_30KM.toURL(), 3000);
|
|
for (Element article : document.select("li.ad-listitem:not(.is-topad) article.aditem")) {
|
|
try {
|
|
final Offer offer = parse(article);
|
|
merge(offer);
|
|
} catch (OfferParseException e) {
|
|
log.error("Failed to parse Offer:", e);
|
|
}
|
|
}
|
|
save();
|
|
} catch (IOException e) {
|
|
log.error("Failed to fetch Kleinanzeigen: {}", e.toString());
|
|
}
|
|
}
|
|
|
|
private Offer parse(final Element article) throws OfferParseException {
|
|
try {
|
|
final String id = article.attr("data-adid");
|
|
final String title = article.select(".text-module-begin").text();
|
|
final String description = article.select(".aditem-main--middle--description").text();
|
|
final ZonedDateTime date = parseDate(article.select(".aditem-main--top--right").text());
|
|
final String articleURL = VERSCHENKEN_EPPELBORN_30KM.resolve(article.select(".aditem-image a").attr("href")).toString();
|
|
final String zipcode;
|
|
final String location;
|
|
final Integer distance;
|
|
final String locationString = article.select(".aditem-main--top--left").text();
|
|
final Matcher locationMatcher = Pattern.compile("^(?<zipcode>\\d+) (?<location>.+) \\((:?ca.)?\\s*(?<distance>\\d+)\\s*km\\s*\\)$").matcher(locationString);
|
|
if (!locationMatcher.find()) {
|
|
zipcode = "";
|
|
location = locationString;
|
|
distance = null;
|
|
} else {
|
|
zipcode = locationMatcher.group("zipcode");
|
|
location = locationMatcher.group("location");
|
|
distance = Integer.parseInt(locationMatcher.group("distance"));
|
|
}
|
|
|
|
final String imageURL = getImageURL(articleURL);
|
|
|
|
return new Offer(id, date, title, zipcode, location, distance, description, articleURL, imageURL);
|
|
} catch (NumberFormatException | IOException e) {
|
|
throw new OfferParseException(article, e);
|
|
}
|
|
}
|
|
|
|
private String getImageURL(final String articleURL) throws IOException {
|
|
final String imageURL;
|
|
final Document document = Jsoup.parse(URI.create(articleURL).toURL(), 3000);
|
|
final Element image = document.select(".galleryimage-element img").first();
|
|
if (image == null) {
|
|
imageURL = "";
|
|
} else {
|
|
imageURL = image.attr("src");
|
|
}
|
|
return imageURL;
|
|
}
|
|
|
|
private void merge(final Offer offer) {
|
|
synchronized (offer) {
|
|
offers.stream().filter(existing -> existing.getId().equals(offer.getId())).peek(existing -> existing.merge(offer)).findFirst().orElseGet(() -> {
|
|
log.info("Created: {}", offer);
|
|
offers.add(offer);
|
|
return offer;
|
|
});
|
|
}
|
|
}
|
|
|
|
private ZonedDateTime parseDate(final String text) {
|
|
final Matcher dayNameMatcher = Pattern.compile("(?<day>Gestern|Heute), (?<hour>\\d+):(?<minute>\\d+)").matcher(text);
|
|
if (dayNameMatcher.find()) {
|
|
final long minusDays = dayNameMatcher.group("day").equals("Gestern") ? 1 : 0;
|
|
return ZonedDateTime.now().minusDays(minusDays).withHour(Integer.parseInt(dayNameMatcher.group("hour"))).withMinute(Integer.parseInt(dayNameMatcher.group("minute"))).withSecond(0).withNano(0);
|
|
}
|
|
|
|
final Matcher localDateMatcher = Pattern.compile("(?<day>\\d+).(?<month>\\d+).(?<year>\\d+)").matcher(text);
|
|
if (localDateMatcher.find()) {
|
|
return ZonedDateTime.of(LocalDate.of(Integer.parseInt(localDateMatcher.group("day")), Integer.parseInt(localDateMatcher.group("month")), Integer.parseInt(localDateMatcher.group("year"))), LocalTime.MIDNIGHT, TimeZone.getDefault().toZoneId());
|
|
}
|
|
throw new NumberFormatException("Failed to parse date: " + text);
|
|
}
|
|
|
|
public List<Offer> findAll() {
|
|
synchronized (offers) {
|
|
return new ArrayList<>(offers);
|
|
}
|
|
}
|
|
|
|
public void ignore(final MaybeInaccessibleMessage message) {
|
|
synchronized (offers) {
|
|
findByTelegramMessageId(message).ifPresent(offer -> {
|
|
offer.ignore();
|
|
save();
|
|
});
|
|
}
|
|
}
|
|
|
|
public Optional<Offer> remember(final MaybeInaccessibleMessage message, final boolean remember) {
|
|
synchronized (offers) {
|
|
final Optional<Offer> optional = findByTelegramMessageId(message);
|
|
optional.ifPresent(offer -> {
|
|
offer.setRemember(remember);
|
|
save();
|
|
});
|
|
return optional;
|
|
}
|
|
}
|
|
|
|
public Optional<Offer> findByTelegramMessageId(final MaybeInaccessibleMessage message) {
|
|
synchronized (offers) {
|
|
return offers.stream().filter(offer -> Objects.equals(offer.getTelegramMessageId(), message.getMessageId())).findFirst();
|
|
}
|
|
}
|
|
|
|
}
|