diff --git a/src/main/java/de/ph87/kleinanzeigen/api/FetchResult.java b/src/main/java/de/ph87/kleinanzeigen/api/FetchResult.java new file mode 100644 index 0000000..35adff1 --- /dev/null +++ b/src/main/java/de/ph87/kleinanzeigen/api/FetchResult.java @@ -0,0 +1,28 @@ +package de.ph87.kleinanzeigen.api; + +import lombok.Data; + +@Data +public class FetchResult { + + private int created = 0; + + private int updated = 0; + + private int error = 0; + + public void add(final MergeResult mergeResult) { + switch (mergeResult) { + case CREATED -> created++; + case UPDATED -> updated++; + case ERROR -> error++; + } + } + + public void merge(final FetchResult other) { + this.created += other.created; + this.updated += other.updated; + this.error += other.error; + } + +} diff --git a/src/main/java/de/ph87/kleinanzeigen/api/Kleinanzeigen.java b/src/main/java/de/ph87/kleinanzeigen/api/Kleinanzeigen.java index 920fad8..26c54ad 100644 --- a/src/main/java/de/ph87/kleinanzeigen/api/Kleinanzeigen.java +++ b/src/main/java/de/ph87/kleinanzeigen/api/Kleinanzeigen.java @@ -8,6 +8,7 @@ import org.telegram.telegrambots.meta.api.objects.MaybeInaccessibleMessage; import java.io.File; import java.io.IOException; +import java.net.MalformedURLException; import java.net.URI; import java.time.LocalDate; import java.time.LocalTime; @@ -22,11 +23,11 @@ import static de.ph87.kleinanzeigen.api.JSON.objectMapper; @Slf4j public class Kleinanzeigen { - private static final int KEEP_LAST_OFFERS_COUNT = 50; + private static final int KEEP_LAST_OFFERS_COUNT = 200; private static final File FILE = new File("./offers.json"); - private static final URI VERSCHENKEN_EPPELBORN_30KM = URI.create("https://www.kleinanzeigen.de/s-zu-verschenken/66571/c192l339r30"); + private static final String VERSCHENKEN_EPPELBORN_30KM = "https://www.kleinanzeigen.de/s-zu-verschenken/66571/seite:%d/c192l339r30"; private final List offers; @@ -54,7 +55,6 @@ public class Kleinanzeigen { synchronized (offers) { removed = _cleanUp(); objectMapper.writerWithDefaultPrettyPrinter().writeValue(FILE, offers); - log.info("Wrote {} offers to file: {}", offers.size(), FILE); } removed.forEach(remove); } catch (IOException e) { @@ -82,30 +82,52 @@ public class Kleinanzeigen { return deleted; } - public void fetch() { + public void fetchUntilDuplicate(final int maxPageCount) { + int page = 0; + final FetchResult totalFetchResult = new FetchResult(); + while (totalFetchResult.getUpdated() <= 0 && page <= maxPageCount) { + final FetchResult pageFetchResult = fetch(++page); + totalFetchResult.merge(pageFetchResult); + } + log.info("FetchResult: {}", totalFetchResult); + } + + private FetchResult fetch(final int page) { + final FetchResult fetchResult = new FetchResult(); try { - final Document document = Jsoup.parse(VERSCHENKEN_EPPELBORN_30KM.toURL(), 3000); + final URI uri = getPageURI(page); + log.info("Fetching page: {}", uri); + final Document document = Jsoup.parse(uri.toURL(), 3000); for (Element article : document.select("li.ad-listitem:not(.is-topad) article.aditem")) { + final Offer offer; try { - final Offer offer = parse(article); - merge(offer); + offer = parse(article, uri); } catch (OfferParseException e) { log.error("Failed to parse Offer:", e); + fetchResult.add(MergeResult.ERROR); + continue; } + final MergeResult mergeResult = merge(offer); + fetchResult.add(mergeResult); } save(); } catch (IOException e) { log.error("Failed to fetch Kleinanzeigen: {}", e.toString()); } + return fetchResult; } - private Offer parse(final Element article) throws OfferParseException { + private URI getPageURI(final int page) throws MalformedURLException { + return URI.create(VERSCHENKEN_EPPELBORN_30KM.formatted(page)); + } + + private Offer parse(final Element article, final URI uri) throws OfferParseException { try { final String id = article.attr("data-adid"); final String title = article.select(".text-module-begin").text(); final String description = article.select(".aditem-main--middle--description").text(); final ZonedDateTime date = parseDate(article.select(".aditem-main--top--right").text()); - final String articleURL = VERSCHENKEN_EPPELBORN_30KM.resolve(article.select(".aditem-image a").attr("href")).toString(); + final String articleURL = uri.resolve(article.select(".aditem-image a").attr("href")).toString(); final String zipcode; final String location; final Integer distance; @@ -124,30 +146,35 @@ public class Kleinanzeigen { final String imageURL = getImageURL(articleURL); return new Offer(id, date, title, zipcode, location, distance, description, articleURL, imageURL); - } catch (NumberFormatException | IOException e) { + } catch (NumberFormatException e) { throw new OfferParseException(article, e); } } - private String getImageURL(final String articleURL) throws IOException { - final String imageURL; - final Document document = Jsoup.parse(URI.create(articleURL).toURL(), 3000); - final Element image = document.select(".galleryimage-element img").first(); - if (image == null) { - imageURL = ""; - } else { - imageURL = image.attr("src"); + private String getImageURL(final String articleURL) { + try { + final Document document = Jsoup.parse(URI.create(articleURL).toURL(), 3000); + final Element image = document.select(".galleryimage-element img").first(); + if (image != null) { + return image.attr("src"); + } + } catch (IOException e) { + log.error("Failed to load Article page: {}", articleURL); } - return imageURL; + return ""; } - private void merge(final Offer offer) { + private MergeResult merge(final Offer offer) { synchronized (offer) { - offers.stream().filter(existing -> existing.getId().equals(offer.getId())).peek(existing -> existing.merge(offer)).findFirst().orElseGet(() -> { + final Optional existingOptional = offers.stream().filter(existing -> existing.getId().equals(offer.getId())).findFirst(); + if (existingOptional.isPresent()) { + existingOptional.get().merge(offer); + return MergeResult.UPDATED; + } else { log.info("Created: {}", offer); offers.add(offer); - return offer; - }); + return MergeResult.CREATED; + } } } diff --git a/src/main/java/de/ph87/kleinanzeigen/api/Main.java b/src/main/java/de/ph87/kleinanzeigen/api/Main.java index 05c7ae5..d7f14d9 100644 --- a/src/main/java/de/ph87/kleinanzeigen/api/Main.java +++ b/src/main/java/de/ph87/kleinanzeigen/api/Main.java @@ -29,7 +29,7 @@ public class Main { } private static void handle(final Bot bot) { - kleinanzeigen.fetch(); + kleinanzeigen.fetchUntilDuplicate(5); kleinanzeigen.findAll().stream().filter(offer -> offer.getTelegramMessageId() == null).forEach(bot::send); } diff --git a/src/main/java/de/ph87/kleinanzeigen/api/MergeResult.java b/src/main/java/de/ph87/kleinanzeigen/api/MergeResult.java new file mode 100644 index 0000000..b1ce9fe --- /dev/null +++ b/src/main/java/de/ph87/kleinanzeigen/api/MergeResult.java @@ -0,0 +1,5 @@ +package de.ph87.kleinanzeigen.api; + +public enum MergeResult { + CREATED, UPDATED, ERROR +}