fetchUntilDuplicate
This commit is contained in:
parent
bf674edde0
commit
2838afc843
28
src/main/java/de/ph87/kleinanzeigen/api/FetchResult.java
Normal file
28
src/main/java/de/ph87/kleinanzeigen/api/FetchResult.java
Normal file
@ -0,0 +1,28 @@
|
||||
package de.ph87.kleinanzeigen.api;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class FetchResult {
|
||||
|
||||
private int created = 0;
|
||||
|
||||
private int updated = 0;
|
||||
|
||||
private int error = 0;
|
||||
|
||||
public void add(final MergeResult mergeResult) {
|
||||
switch (mergeResult) {
|
||||
case CREATED -> created++;
|
||||
case UPDATED -> updated++;
|
||||
case ERROR -> error++;
|
||||
}
|
||||
}
|
||||
|
||||
public void merge(final FetchResult other) {
|
||||
this.created += other.created;
|
||||
this.updated += other.updated;
|
||||
this.error += other.error;
|
||||
}
|
||||
|
||||
}
|
||||
@ -8,6 +8,7 @@ import org.telegram.telegrambots.meta.api.objects.MaybeInaccessibleMessage;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
@ -22,11 +23,11 @@ import static de.ph87.kleinanzeigen.api.JSON.objectMapper;
|
||||
@Slf4j
|
||||
public class Kleinanzeigen {
|
||||
|
||||
private static final int KEEP_LAST_OFFERS_COUNT = 50;
|
||||
private static final int KEEP_LAST_OFFERS_COUNT = 200;
|
||||
|
||||
private static final File FILE = new File("./offers.json");
|
||||
|
||||
private static final URI VERSCHENKEN_EPPELBORN_30KM = URI.create("https://www.kleinanzeigen.de/s-zu-verschenken/66571/c192l339r30");
|
||||
private static final String VERSCHENKEN_EPPELBORN_30KM = "https://www.kleinanzeigen.de/s-zu-verschenken/66571/seite:%d/c192l339r30";
|
||||
|
||||
private final List<Offer> offers;
|
||||
|
||||
@ -54,7 +55,6 @@ public class Kleinanzeigen {
|
||||
synchronized (offers) {
|
||||
removed = _cleanUp();
|
||||
objectMapper.writerWithDefaultPrettyPrinter().writeValue(FILE, offers);
|
||||
log.info("Wrote {} offers to file: {}", offers.size(), FILE);
|
||||
}
|
||||
removed.forEach(remove);
|
||||
} catch (IOException e) {
|
||||
@ -82,30 +82,52 @@ public class Kleinanzeigen {
|
||||
return deleted;
|
||||
}
|
||||
|
||||
public void fetch() {
|
||||
public void fetchUntilDuplicate(final int maxPageCount) {
|
||||
int page = 0;
|
||||
final FetchResult totalFetchResult = new FetchResult();
|
||||
while (totalFetchResult.getUpdated() <= 0 && page <= maxPageCount) {
|
||||
final FetchResult pageFetchResult = fetch(++page);
|
||||
totalFetchResult.merge(pageFetchResult);
|
||||
}
|
||||
log.info("FetchResult: {}", totalFetchResult);
|
||||
}
|
||||
|
||||
private FetchResult fetch(final int page) {
|
||||
final FetchResult fetchResult = new FetchResult();
|
||||
try {
|
||||
final Document document = Jsoup.parse(VERSCHENKEN_EPPELBORN_30KM.toURL(), 3000);
|
||||
final URI uri = getPageURI(page);
|
||||
log.info("Fetching page: {}", uri);
|
||||
final Document document = Jsoup.parse(uri.toURL(), 3000);
|
||||
for (Element article : document.select("li.ad-listitem:not(.is-topad) article.aditem")) {
|
||||
final Offer offer;
|
||||
try {
|
||||
final Offer offer = parse(article);
|
||||
merge(offer);
|
||||
offer = parse(article, uri);
|
||||
} catch (OfferParseException e) {
|
||||
log.error("Failed to parse Offer:", e);
|
||||
fetchResult.add(MergeResult.ERROR);
|
||||
continue;
|
||||
}
|
||||
final MergeResult mergeResult = merge(offer);
|
||||
fetchResult.add(mergeResult);
|
||||
}
|
||||
save();
|
||||
} catch (IOException e) {
|
||||
log.error("Failed to fetch Kleinanzeigen: {}", e.toString());
|
||||
}
|
||||
return fetchResult;
|
||||
}
|
||||
|
||||
private Offer parse(final Element article) throws OfferParseException {
|
||||
private URI getPageURI(final int page) throws MalformedURLException {
|
||||
return URI.create(VERSCHENKEN_EPPELBORN_30KM.formatted(page));
|
||||
}
|
||||
|
||||
private Offer parse(final Element article, final URI uri) throws OfferParseException {
|
||||
try {
|
||||
final String id = article.attr("data-adid");
|
||||
final String title = article.select(".text-module-begin").text();
|
||||
final String description = article.select(".aditem-main--middle--description").text();
|
||||
final ZonedDateTime date = parseDate(article.select(".aditem-main--top--right").text());
|
||||
final String articleURL = VERSCHENKEN_EPPELBORN_30KM.resolve(article.select(".aditem-image a").attr("href")).toString();
|
||||
final String articleURL = uri.resolve(article.select(".aditem-image a").attr("href")).toString();
|
||||
final String zipcode;
|
||||
final String location;
|
||||
final Integer distance;
|
||||
@ -124,30 +146,35 @@ public class Kleinanzeigen {
|
||||
final String imageURL = getImageURL(articleURL);
|
||||
|
||||
return new Offer(id, date, title, zipcode, location, distance, description, articleURL, imageURL);
|
||||
} catch (NumberFormatException | IOException e) {
|
||||
} catch (NumberFormatException e) {
|
||||
throw new OfferParseException(article, e);
|
||||
}
|
||||
}
|
||||
|
||||
private String getImageURL(final String articleURL) throws IOException {
|
||||
final String imageURL;
|
||||
final Document document = Jsoup.parse(URI.create(articleURL).toURL(), 3000);
|
||||
final Element image = document.select(".galleryimage-element img").first();
|
||||
if (image == null) {
|
||||
imageURL = "";
|
||||
} else {
|
||||
imageURL = image.attr("src");
|
||||
private String getImageURL(final String articleURL) {
|
||||
try {
|
||||
final Document document = Jsoup.parse(URI.create(articleURL).toURL(), 3000);
|
||||
final Element image = document.select(".galleryimage-element img").first();
|
||||
if (image != null) {
|
||||
return image.attr("src");
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("Failed to load Article page: {}", articleURL);
|
||||
}
|
||||
return imageURL;
|
||||
return "";
|
||||
}
|
||||
|
||||
private void merge(final Offer offer) {
|
||||
private MergeResult merge(final Offer offer) {
|
||||
synchronized (offer) {
|
||||
offers.stream().filter(existing -> existing.getId().equals(offer.getId())).peek(existing -> existing.merge(offer)).findFirst().orElseGet(() -> {
|
||||
final Optional<Offer> existingOptional = offers.stream().filter(existing -> existing.getId().equals(offer.getId())).findFirst();
|
||||
if (existingOptional.isPresent()) {
|
||||
existingOptional.get().merge(offer);
|
||||
return MergeResult.UPDATED;
|
||||
} else {
|
||||
log.info("Created: {}", offer);
|
||||
offers.add(offer);
|
||||
return offer;
|
||||
});
|
||||
return MergeResult.CREATED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -29,7 +29,7 @@ public class Main {
|
||||
}
|
||||
|
||||
private static void handle(final Bot bot) {
|
||||
kleinanzeigen.fetch();
|
||||
kleinanzeigen.fetchUntilDuplicate(5);
|
||||
kleinanzeigen.findAll().stream().filter(offer -> offer.getTelegramMessageId() == null).forEach(bot::send);
|
||||
}
|
||||
|
||||
|
||||
5
src/main/java/de/ph87/kleinanzeigen/api/MergeResult.java
Normal file
5
src/main/java/de/ph87/kleinanzeigen/api/MergeResult.java
Normal file
@ -0,0 +1,5 @@
|
||||
package de.ph87.kleinanzeigen.api;
|
||||
|
||||
public enum MergeResult {
|
||||
CREATED, UPDATED, ERROR
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user