104 lines
3.4 KiB
Java
104 lines
3.4 KiB
Java
package de.ph87.kleinanzeigen.kleinanzeigen.offer;
|
|
|
|
import jakarta.annotation.Nullable;
|
|
import lombok.Getter;
|
|
import lombok.NonNull;
|
|
import lombok.ToString;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Element;
|
|
|
|
import java.io.IOException;
|
|
import java.net.URI;
|
|
import java.time.LocalDate;
|
|
import java.time.LocalTime;
|
|
import java.time.ZonedDateTime;
|
|
import java.util.TimeZone;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
@Slf4j
|
|
@Getter
|
|
@ToString(onlyExplicitlyIncluded = true)
|
|
public class OfferCreate {
|
|
|
|
@NonNull
|
|
@ToString.Include
|
|
private final String articleId;
|
|
|
|
@NonNull
|
|
private final ZonedDateTime articleDate;
|
|
|
|
@NonNull
|
|
@ToString.Include
|
|
private final String title;
|
|
|
|
@NonNull
|
|
@ToString.Include
|
|
private final String location;
|
|
|
|
@Nullable
|
|
private final String zipcode;
|
|
|
|
@Nullable
|
|
private final Integer distance;
|
|
|
|
@NonNull
|
|
private final String description;
|
|
|
|
@NonNull
|
|
private final String articleURL;
|
|
|
|
@Nullable
|
|
private final String imageURL;
|
|
|
|
public OfferCreate(final Element article, final URI uri) {
|
|
articleId = article.attr("data-adid");
|
|
title = article.select(".text-module-begin").text();
|
|
description = article.select(".aditem-main--middle--description").text();
|
|
articleDate = parseDate(article.select(".aditem-main--top--right").text());
|
|
articleURL = uri.resolve(article.select(".aditem-image a").attr("href")).toString();
|
|
final String locationString = article.select(".aditem-main--top--left").text();
|
|
final Matcher locationMatcher = Pattern.compile("^(?<zipcode>\\d+) (?<location>.+) \\((:?ca.)?\\s*(?<distance>\\d+)\\s*km\\s*\\)$").matcher(locationString);
|
|
if (!locationMatcher.find()) {
|
|
zipcode = "";
|
|
location = locationString;
|
|
distance = null;
|
|
} else {
|
|
zipcode = locationMatcher.group("zipcode");
|
|
location = locationMatcher.group("location");
|
|
distance = Integer.parseInt(locationMatcher.group("distance"));
|
|
}
|
|
imageURL = getImageURL(articleURL);
|
|
}
|
|
|
|
private ZonedDateTime parseDate(final String text) {
|
|
final Matcher dayNameMatcher = Pattern.compile("(?<day>Gestern|Heute), (?<hour>\\d+):(?<minute>\\d+)").matcher(text);
|
|
if (dayNameMatcher.find()) {
|
|
final long minusDays = dayNameMatcher.group("day").equals("Gestern") ? 1 : 0;
|
|
return ZonedDateTime.now().minusDays(minusDays).withHour(Integer.parseInt(dayNameMatcher.group("hour"))).withMinute(Integer.parseInt(dayNameMatcher.group("minute"))).withSecond(0).withNano(0);
|
|
}
|
|
|
|
final Matcher localDateMatcher = Pattern.compile("(?<day>\\d+).(?<month>\\d+).(?<year>\\d+)").matcher(text);
|
|
if (localDateMatcher.find()) {
|
|
return ZonedDateTime.of(LocalDate.of(Integer.parseInt(localDateMatcher.group("day")), Integer.parseInt(localDateMatcher.group("month")), Integer.parseInt(localDateMatcher.group("year"))), LocalTime.MIDNIGHT, TimeZone.getDefault().toZoneId());
|
|
}
|
|
throw new NumberFormatException("Failed to parse date: " + text);
|
|
}
|
|
|
|
private String getImageURL(final String articleURL) {
|
|
try {
|
|
final Document document = Jsoup.parse(URI.create(articleURL).toURL(), 3000);
|
|
final Element image = document.select(".galleryimage-element img").first();
|
|
if (image != null) {
|
|
return image.attr("src");
|
|
}
|
|
} catch (IOException e) {
|
|
log.error("Failed to load Article page: {}", articleURL);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
}
|