diff --git a/doc/spacemedia.uml b/doc/spacemedia.uml
index c5b4adbd..8de836ef 100644
--- a/doc/spacemedia.uml
+++ b/doc/spacemedia.uml
@@ -1,286 +1,286 @@
-
+
diff --git a/doc/spacemedia_fr_FR.properties b/doc/spacemedia_fr_FR.properties
index 2f5a39e5..3263a636 100644
--- a/doc/spacemedia_fr_FR.properties
+++ b/doc/spacemedia_fr_FR.properties
@@ -1 +1 @@
-#Sun May 30 01:37:45 CEST 2021
+#Sun May 30 02:48:30 CEST 2021
diff --git a/pom.xml b/pom.xml
index 8629d27e..3fa280bb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,86 +1,92 @@
4.0.0
org.wikimedia.commons.donvip
spacemedia
0.5.0-SNAPSHOT
pom
Find free media released by space agencies missing in Wikimedia Commons
sm-apps
sm-commons-api
sm-commons-data
+ sm-core
sm-data
sm-legacyapp
sm-repositories
sm-utils
11
${java.version}
${java.version}
UTF-8
https://sonarcloud.io
toolforge
tool-spacemedia
4.4
3.0.6
v3-rev20210410-1.31.0
8.3.1
2.5.0
scm:git:https://phabricator.wikimedia.org/source/tool-spacemedia.git
HEAD
https://phabricator.wikimedia.org/source/tool-spacemedia/
org.springframework.boot
spring-boot-dependencies
${springboot-version}
pom
import
com.flickr4java
flickr4java
${flickr4java-version}
com.github.scribejava
scribejava-apis
${scribejava-version}
com.google.apis
google-api-services-youtube
${google-api-services-youtube-version}
org.apache.commons
commons-collections4
${collections4-version}
+
+ org.jsoup
+ jsoup
+ 1.13.1
+
geosolutions
GeoSolutions Repository
http://maven.geo-solutions.it
jcenter
https://jcenter.bintray.com/
diff --git a/sm-apps/pom.xml b/sm-apps/pom.xml
index 07463c77..ef96abe0 100644
--- a/sm-apps/pom.xml
+++ b/sm-apps/pom.xml
@@ -1,37 +1,37 @@
4.0.0
org.wikimedia.commons.donvip
spacemedia
0.5.0-SNAPSHOT
sm-apps
pom
Applications
sm-backend
sm-cronjobs
sm-webapp
org.wikimedia.commons.donvip
- sm-data
+ sm-core
${project.version}
org.springframework.boot
spring-boot-starter
org.mariadb.jdbc
mariadb-java-client
diff --git a/sm-apps/sm-backend/src/main/resources/logback-spring-toolforge.xml b/sm-apps/sm-backend/src/main/resources/logback-spring-toolforge.xml
index df02b423..02d28ba8 100644
--- a/sm-apps/sm-backend/src/main/resources/logback-spring-toolforge.xml
+++ b/sm-apps/sm-backend/src/main/resources/logback-spring-toolforge.xml
@@ -1,9 +1,9 @@
-
+
diff --git a/sm-apps/sm-cronjobs/sm-harvester-esa-website/pom.xml b/sm-apps/sm-cronjobs/sm-harvester-esa-website/pom.xml
index 92b44577..15a9d6ed 100644
--- a/sm-apps/sm-cronjobs/sm-harvester-esa-website/pom.xml
+++ b/sm-apps/sm-cronjobs/sm-harvester-esa-website/pom.xml
@@ -1,12 +1,19 @@
4.0.0
org.wikimedia.commons.donvip
sm-cronjobs
0.5.0-SNAPSHOT
sm-harvester-esa-website
Harvests media from ESA (European Space Agency) website
+
+
+ org.jsoup
+ jsoup
+
+
+
diff --git a/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/java/org/wikimedia/commons/donvip/spacemedia/harvester/esa/website/EsaWebsiteApplication.java b/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/java/org/wikimedia/commons/donvip/spacemedia/harvester/esa/website/EsaWebsiteApplication.java
new file mode 100644
index 00000000..e4c3d4d4
--- /dev/null
+++ b/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/java/org/wikimedia/commons/donvip/spacemedia/harvester/esa/website/EsaWebsiteApplication.java
@@ -0,0 +1,12 @@
+package org.wikimedia.commons.donvip.spacemedia.harvester.esa.website;
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+import org.wikimedia.commons.donvip.spacemedia.core.AbstractHarvestingApplication;
+
+@SpringBootApplication
+public class EsaWebsiteApplication extends AbstractHarvestingApplication {
+
+ public static void main(String[] args) {
+ SpringApplication.run(EsaWebsiteApplication.class, args);
+ }
+}
diff --git a/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/java/org/wikimedia/commons/donvip/spacemedia/harvester/esa/website/service/EsaWebsiteScrappingService.java b/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/java/org/wikimedia/commons/donvip/spacemedia/harvester/esa/website/service/EsaWebsiteScrappingService.java
new file mode 100644
index 00000000..bc78342e
--- /dev/null
+++ b/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/java/org/wikimedia/commons/donvip/spacemedia/harvester/esa/website/service/EsaWebsiteScrappingService.java
@@ -0,0 +1,65 @@
+package org.wikimedia.commons.donvip.spacemedia.harvester.esa.website.service;
+
+import java.io.IOException;
+import java.net.SocketTimeoutException;
+import java.net.URL;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+import org.wikimedia.commons.donvip.spacemedia.core.Harvester;
+
+@Service
+public class EsaWebsiteScrappingService implements Harvester {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(EsaWebsiteScrappingService.class);
+
+ @Value("${esa.search.link}")
+ private String searchLink;
+
+ @Value("${esa.max.tries}")
+ private int maxTries;
+
+ @Value("${esa.date.pattern}")
+ private String datePattern;
+
+ @Override
+ public void harvestMedia() throws IOException {
+ final URL url = new URL(searchLink);
+ final String proto = url.getProtocol();
+ final String host = url.getHost();
+ boolean moreImages = true;
+ int index = 0;
+ do {
+ String searchUrl = searchLink.replace("", Integer.toString(index));
+ try {
+ boolean ok = false;
+ for (int i = 0; i < maxTries && !ok; i++) {
+ try {
+ LOGGER.debug("Fetching ESA images: {}", searchUrl);
+ Document html = Jsoup.connect(searchUrl).timeout(15_000).get();
+ Elements divs = html.getElementsByClass("grid-item");
+ for (Element div : divs) {
+ URL imageUrl = new URL(proto, host, div.select("a").get(0).attr("href"));
+ index++;
+ LOGGER.debug("Checking ESA image {}: {}", index, imageUrl);
+ }
+ moreImages = !html.getElementsByClass("paging").get(0)
+ .getElementsByAttributeValue("title", "Next").isEmpty();
+ ok = true;
+ } catch (SocketTimeoutException e) {
+ LOGGER.debug(searchUrl, e);
+ }
+ }
+ } catch (IOException | RuntimeException e) {
+ LOGGER.error(searchUrl, e);
+ moreImages = false;
+ }
+ } while (moreImages);
+ }
+}
diff --git a/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/resources/application-dev.properties b/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/resources/application-dev.properties
new file mode 100644
index 00000000..5e7173ba
--- /dev/null
+++ b/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/resources/application-dev.properties
@@ -0,0 +1 @@
+logging.level.org.wikimedia.commons.donvip.spacemedia=DEBUG
diff --git a/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/resources/application.properties b/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/resources/application.properties
new file mode 100644
index 00000000..684e91d0
--- /dev/null
+++ b/sm-apps/sm-cronjobs/sm-harvester-esa-website/src/main/resources/application.properties
@@ -0,0 +1,24 @@
+# Spring
+
+spring.jackson.default-property-inclusion=non_empty
+spring.jackson.property-naming-strategy=SNAKE_CASE
+
+spring.jpa.defer-datasource-initialization=true
+spring.jpa.hibernate.ddl-auto=update
+spring.jpa.open-in-view=false
+spring.jpa.show-sql=false
+spring.jpa.properties.hibernate.format_sql=false
+
+# Local database for development
+domain.datasource.url=jdbc:mariadb://localhost:3306/root_spacemedia
+domain.datasource.username=root
+domain.datasource.password=
+domain.datasource.driver-class-name=org.mariadb.jdbc.Driver
+domain.datasource.hikari.maximum-pool-size=2
+domain.datasource.hikari.max-lifetime=300000
+domain.datasource.hikari.connectionInitSql = SET NAMES 'utf8mb4'
+
+# ESA
+esa.search.link = https://www.esa.int/ESA_Multimedia/Search/(offset)//(sortBy)/published?SearchText=by-sa
+esa.max.tries = 5
+esa.date.pattern = dd/MM/yyyy
diff --git a/sm-apps/pom.xml b/sm-core/pom.xml
similarity index 61%
copy from sm-apps/pom.xml
copy to sm-core/pom.xml
index 07463c77..f934274b 100644
--- a/sm-apps/pom.xml
+++ b/sm-core/pom.xml
@@ -1,37 +1,34 @@
4.0.0
org.wikimedia.commons.donvip
spacemedia
0.5.0-SNAPSHOT
- sm-apps
- pom
- Applications
-
-
- sm-backend
- sm-cronjobs
- sm-webapp
-
+ sm-core
+ Core application classes
org.wikimedia.commons.donvip
sm-data
- ${project.version}
+ ${project.version}
- org.springframework.boot
- spring-boot-starter
+ org.springframework
+ spring-beans
+
+
+ org.springframework
+ spring-context
- org.mariadb.jdbc
- mariadb-java-client
+ jakarta.annotation
+ jakarta.annotation-api
diff --git a/sm-core/src/main/java/org/wikimedia/commons/donvip/spacemedia/core/AbstractHarvestingApplication.java b/sm-core/src/main/java/org/wikimedia/commons/donvip/spacemedia/core/AbstractHarvestingApplication.java
new file mode 100644
index 00000000..847d5d45
--- /dev/null
+++ b/sm-core/src/main/java/org/wikimedia/commons/donvip/spacemedia/core/AbstractHarvestingApplication.java
@@ -0,0 +1,21 @@
+package org.wikimedia.commons.donvip.spacemedia.core;
+
+import java.io.IOException;
+
+import javax.annotation.PostConstruct;
+
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Import;
+import org.wikimedia.commons.donvip.spacemedia.data.DomainDbConfiguration;
+
+@Import(DomainDbConfiguration.class)
+public abstract class AbstractHarvestingApplication {
+
+ @Autowired
+ private Harvester harvester;
+
+ @PostConstruct
+ public void init() throws IOException {
+ harvester.harvestMedia();
+ }
+}
diff --git a/sm-core/src/main/java/org/wikimedia/commons/donvip/spacemedia/core/Harvester.java b/sm-core/src/main/java/org/wikimedia/commons/donvip/spacemedia/core/Harvester.java
new file mode 100644
index 00000000..65c0bb5f
--- /dev/null
+++ b/sm-core/src/main/java/org/wikimedia/commons/donvip/spacemedia/core/Harvester.java
@@ -0,0 +1,8 @@
+package org.wikimedia.commons.donvip.spacemedia.core;
+
+import java.io.IOException;
+
+public interface Harvester {
+
+ void harvestMedia() throws IOException;
+}