diff --git a/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsDbConfiguration.java b/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsDbConfiguration.java
index 779c092b..edeecd82 100644
--- a/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsDbConfiguration.java
+++ b/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsDbConfiguration.java
@@ -1,47 +1,49 @@
package org.wikimedia.commons.donvip.spacemedia.data.commons;
+import javax.persistence.EntityManagerFactory;
import javax.sql.DataSource;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.autoconfigure.jdbc.DataSourceProperties;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.boot.orm.jpa.EntityManagerFactoryBuilder;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
-import org.springframework.jdbc.datasource.DataSourceTransactionManager;
+import org.springframework.orm.jpa.JpaTransactionManager;
import org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean;
import org.springframework.transaction.PlatformTransactionManager;
import org.springframework.transaction.annotation.EnableTransactionManagement;
@Configuration
@EnableTransactionManagement
@EnableJpaRepositories(
entityManagerFactoryRef = "commonsEntityManagerFactory",
transactionManagerRef = "commonsTransactionManager",
basePackageClasses = {CommonsDbConfiguration.class})
public class CommonsDbConfiguration {
@Bean(name = "commonsDataSourceProperties")
@ConfigurationProperties("commons.datasource")
public DataSourceProperties dataSourceProperties() {
return new DataSourceProperties();
}
@Bean(name = "commonsDataSource")
@ConfigurationProperties("commons.datasource.hikari")
public DataSource dataSource() {
return dataSourceProperties().initializeDataSourceBuilder().build();
}
@Bean(name = "commonsEntityManagerFactory")
public LocalContainerEntityManagerFactoryBean entityManagerFactory(EntityManagerFactoryBuilder builder,
@Qualifier("commonsDataSource") DataSource dataSource) {
return builder.dataSource(dataSource).packages(getClass().getPackage().getName()).persistenceUnit("commons").build();
}
@Bean(name = "commonsTransactionManager")
- public PlatformTransactionManager transactionManager() {
- return new DataSourceTransactionManager(dataSource());
+ public PlatformTransactionManager transactionManager(
+ @Qualifier("commonsEntityManagerFactory") EntityManagerFactory entityManagerFactory) {
+ return new JpaTransactionManager(entityManagerFactory);
}
}
diff --git a/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsPageProp.java b/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsPageProp.java
index 05993769..1fa38ce8 100644
--- a/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsPageProp.java
+++ b/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsPageProp.java
@@ -1,103 +1,104 @@
package org.wikimedia.commons.donvip.spacemedia.data.commons;
import java.io.Serializable;
import java.util.Objects;
import javax.persistence.Column;
import javax.persistence.Entity;
+import javax.persistence.FetchType;
import javax.persistence.Id;
import javax.persistence.JoinColumn;
import javax.persistence.Lob;
import javax.persistence.ManyToOne;
import javax.persistence.Table;
/**
* Mediawiki
* page_props table
- *
+ *
*
* +-------------+---------------+------+-----+---------+-------+
* | Field | Type | Null | Key | Default | Extra |
* +-------------+---------------+------+-----+---------+-------+
* | pp_page | int(11) | NO | PRI | NULL | |
* | pp_propname | varbinary(60) | NO | PRI | NULL | |
* | pp_value | blob | NO | | NULL | |
* | pp_sortkey | float | YES | | NULL | |
* +-------------+---------------+------+-----+---------+-------+
*
*/
@Entity
@Table(name = "page_props")
public class CommonsPageProp implements Serializable {
private static final long serialVersionUID = 1L;
@Id
- @ManyToOne
+ @ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "pp_page", nullable = false)
private CommonsPage page;
@Id
@Column(name = "pp_propname", nullable = false, length = 60, columnDefinition = "VARBINARY")
private String propname;
@Lob
@Column(name = "pp_value", nullable = false, columnDefinition = "BLOB")
private byte[] value;
@Column(name = "pp_sortkey", nullable = true)
private Float sortkey;
public CommonsPage getPage() {
return page;
}
public void setPage(CommonsPage page) {
this.page = page;
}
public String getPropname() {
return propname;
}
public void setPropname(String propname) {
this.propname = propname;
}
public byte[] getValue() {
return value;
}
public void setValue(byte[] value) {
this.value = value;
}
public Float getSortkey() {
return sortkey;
}
public void setSortkey(Float sortkey) {
this.sortkey = sortkey;
}
@Override
public int hashCode() {
return Objects.hash(page, propname);
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null || getClass() != obj.getClass())
return false;
CommonsPageProp other = (CommonsPageProp) obj;
return Objects.equals(page, other.page) && Objects.equals(propname, other.propname);
}
@Override
public String toString() {
return "CommonsPageProp [" + (propname != null ? "propname=" + propname + ", " : "")
+ (value != null ? "value=" + value + ", " : "") + "sortkey=" + sortkey + "]";
}
}
diff --git a/src/main/java/org/wikimedia/commons/donvip/spacemedia/service/CommonsService.java b/src/main/java/org/wikimedia/commons/donvip/spacemedia/service/CommonsService.java
index 3d94ca8f..08e60a15 100644
--- a/src/main/java/org/wikimedia/commons/donvip/spacemedia/service/CommonsService.java
+++ b/src/main/java/org/wikimedia/commons/donvip/spacemedia/service/CommonsService.java
@@ -1,572 +1,572 @@
package org.wikimedia.commons.donvip.spacemedia.service;
import static java.time.LocalDateTime.now;
import static java.time.temporal.ChronoUnit.SECONDS;
import java.io.IOException;
import java.math.BigInteger;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.annotation.PostConstruct;
import javax.annotation.Resource;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.MapUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsCategoryLinkId;
import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsCategoryLinkRepository;
import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsCategoryLinkType;
import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsCategoryRepository;
import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsImage;
import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsImageRepository;
import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsOldImage;
import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsOldImageRepository;
import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsPage;
import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsPageRepository;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.FileArchive;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.FileArchiveQuery;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.FileArchiveQueryResponse;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.Limit;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.MetaQueryResponse;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.Revision;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.RevisionsPage;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.RevisionsQueryResponse;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.Slot;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.Tokens;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.UploadApiResponse;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.UploadError;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.UploadResponse;
import org.wikimedia.commons.donvip.spacemedia.data.commons.api.UserInfo;
import org.wikimedia.commons.donvip.spacemedia.exception.CategoryNotFoundException;
import org.wikimedia.commons.donvip.spacemedia.exception.CategoryPageNotFoundException;
import org.wikimedia.commons.donvip.spacemedia.utils.Utils;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.scribejava.apis.MediaWikiApi;
import com.github.scribejava.core.builder.ServiceBuilder;
import com.github.scribejava.core.model.OAuth1AccessToken;
import com.github.scribejava.core.model.OAuthRequest;
import com.github.scribejava.core.model.Verb;
import com.github.scribejava.core.oauth.OAuth10aService;
@Service
public class CommonsService {
private static final Logger LOGGER = LoggerFactory.getLogger(CommonsService.class);
private static final Pattern EXACT_DUPE_ERROR = Pattern.compile(
"The upload is an exact duplicate of the current version of \\[\\[:File:(.+)\\]\\]\\.");
/**
* Minimal delay between successive uploads, in seconds.
*/
private static final int DELAY = 5;
@Autowired
private CommonsImageRepository imageRepository;
@Autowired
private CommonsOldImageRepository oldImageRepository;
@Autowired
private CommonsCategoryRepository categoryRepository;
@Autowired
private CommonsPageRepository pageRepository;
@Autowired
private CommonsCategoryLinkRepository categoryLinkRepository;
@Autowired
private ObjectMapper jackson;
/**
* Self-autowiring to call {@link Cacheable} methods, otherwise the cache is
* skipped. Spring cache is only trigerred on external calls.
*/
@Resource
private CommonsService self;
@Value("${commons.api.url}")
private URL apiUrl;
@Value("${commons.api.rest.url}")
private URL restApiUrl;
@Value("${commons.cat.search.depth}")
private int catSearchDepth;
@Value("${commons.img.preview.width}")
private int imgPreviewWidth;
private final String account;
private final String userAgent;
private final OAuth10aService oAuthService;
private final OAuth1AccessToken oAuthAccessToken;
private UserInfo userInfo;
private String token;
private LocalDateTime lastUpload;
public CommonsService(
@Value("${application.version}") String appVersion,
@Value("${application.contact}") String appContact,
@Value("${flickr4java.version}") String flickr4javaVersion,
@Value("${spring-boot.version}") String bootVersion,
@Value("${scribejava.version}") String scribeVersion,
@Value("${commons.api.account}") String apiAccount,
@Value("${commons.api.oauth1.consumer-token}") String consumerToken,
@Value("${commons.api.oauth1.consumer-secret}") String consumerSecret,
@Value("${commons.api.oauth1.access-token}") String accessToken,
@Value("${commons.api.oauth1.access-secret}") String accessSecret
) {
account = apiAccount;
// Comply to Wikimedia User-Agent Policy: https://meta.wikimedia.org/wiki/User-Agent_policy
if (!account.toLowerCase(Locale.ENGLISH).contains("bot")) {
throw new IllegalArgumentException("Bot account must include 'bot' in its name!");
}
userAgent = String.format("%s/%s (%s - %s) %s/%s %s/%s %s/%s",
"Spacemedia", appVersion, appContact, apiAccount, "SpringBoot", bootVersion, "ScribeJava",
scribeVersion, "Flickr4Java", flickr4javaVersion);
oAuthService = new ServiceBuilder(consumerToken).apiSecret(consumerSecret).build(MediaWikiApi.instance());
oAuthAccessToken = new OAuth1AccessToken(accessToken, accessSecret);
}
@PostConstruct
public void init() throws IOException {
userInfo = queryUserInfo();
LOGGER.info("Identified to Wikimedia Commons API as {}", userInfo.getName());
if (!hasUploadRight() && !hasUploadByUrlRight()) {
LOGGER.warn("Wikimedia Commons user account has no upload right!");
}
Limit uploadRate = userInfo.getRateLimits().getUpload().getUser();
LOGGER.info("Upload rate limited to {} hits every {} seconds.", uploadRate.getHits(), uploadRate.getSeconds());
// Fetch CSRF token, mandatory for upload using the Mediawiki API
token = queryTokens().getCsrftoken();
}
private boolean hasUploadByUrlRight() {
return userInfo.getRights().contains("upload_by_url");
}
private boolean hasUploadRight() {
return userInfo.getRights().contains("upload");
}
public Set findFilesWithSha1(String sha1) throws IOException {
// See https://www.mediawiki.org/wiki/Manual:Image_table#img_sha1
// The SHA-1 hash of the file contents in base 36 format, zero-padded to 31 characters
String sha1base36 = String.format("%31s", new BigInteger(sha1, 16).toString(36)).replace(' ', '0');
Set files = imageRepository.findBySha1(sha1base36).stream().map(CommonsImage::getName).collect(Collectors.toSet());
if (files.isEmpty()) {
files.addAll(oldImageRepository.findBySha1(sha1base36).stream().map(CommonsOldImage::getName).collect(Collectors.toSet()));
}
if (files.isEmpty()) {
files.addAll(queryFileArchive(sha1base36).stream().map(FileArchive::getName).collect(Collectors.toSet()));
}
return files;
}
public synchronized Tokens queryTokens() throws IOException {
return apiHttpGet("?action=query&meta=tokens", MetaQueryResponse.class).getQuery().getTokens();
}
public UserInfo queryUserInfo() throws IOException {
return apiHttpGet("?action=query&meta=userinfo&uiprop=blockinfo|groups|rights|ratelimits",
MetaQueryResponse.class).getQuery().getUserInfo();
}
public List queryFileArchive(String sha1base36) throws IOException {
FileArchiveQuery query = apiHttpGet("?action=query&list=filearchive&fasha1base36=" + sha1base36,
FileArchiveQueryResponse.class).getQuery();
return query != null ? query.getFilearchive() : Collections.emptyList();
}
private T apiHttpGet(String path, Class responseClass) throws IOException {
return httpGet(apiUrl.toExternalForm() + path + "&format=json", responseClass);
}
private T apiHttpPost(Map params, Class responseClass) throws IOException {
return httpPost(apiUrl.toExternalForm(), responseClass, params);
}
private T httpGet(String url, Class responseClass) throws IOException {
return httpCall(Verb.GET, url, responseClass, Collections.emptyMap(), Collections.emptyMap(), true);
}
private T httpPost(String url, Class responseClass, Map params) throws IOException {
return httpCall(Verb.POST, url, responseClass,
Map.of("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"), params, true);
}
private T httpCall(Verb verb, String url, Class responseClass, Map headers,
Map params, boolean retryOnTimeout) throws IOException {
OAuthRequest request = new OAuthRequest(verb, url);
request.setCharset(StandardCharsets.UTF_8.name());
params.forEach(request::addParameter);
headers.forEach(request::addHeader);
request.addHeader("User-Agent", userAgent);
oAuthService.signRequest(oAuthAccessToken, request);
try {
return jackson.readValue(oAuthService.execute(request).getBody(), responseClass);
} catch (SocketTimeoutException e) {
if (retryOnTimeout) {
return httpCall(verb, url, responseClass, headers, params, false);
} else {
throw e;
}
} catch (InterruptedException | ExecutionException e) {
throw new IOException(e);
}
}
public String queryRevisionContent(int pageId) throws IOException {
RevisionsPage rp = apiHttpGet("?action=query&prop=revisions&rvprop=content&rvslots=main&rvlimit=1&pageids=" + pageId,
RevisionsQueryResponse.class).getQuery().getPages().get(pageId);
if (rp != null) {
List revisions = rp.getRevisions();
if (CollectionUtils.isNotEmpty(revisions)) {
Map slots = revisions.get(0).getSlots();
if (MapUtils.isNotEmpty(slots)) {
Slot main = slots.get("main");
if (main != null) {
return main.getContent();
}
}
}
}
LOGGER.error("Couldn't find page content for {}: {}", pageId, rp);
return null;
}
public String getWikiHtmlPreview(String wikiCode, String pageTitle) throws IOException {
VisualEditorResponse apiResponse = apiHttpPost(Map.of(
"action", "visualeditor",
"format", "json",
"formatversion", "2",
"paction", "parsedoc",
"page", pageTitle,
"wikitext", wikiCode,
"pst", "true"
), VeApiResponse.class).getVisualeditor();
if (!"success".equals(apiResponse.getResult())) {
throw new IllegalArgumentException(apiResponse.toString());
}
return apiResponse.getContent();
}
public String getWikiHtmlPreview(String wikiCode, String pageTitle, String imgUrl) throws IOException {
Document doc = Jsoup.parse(getWikiHtmlPreview(wikiCode, pageTitle));
Element body = doc.getElementsByTag("body").get(0);
// Display image
Element imgLink = Utils.prependChildElement(body, "a", null, Map.of("href", imgUrl));
Utils.appendChildElement(imgLink, "img", null,
Map.of("src", imgUrl, "width", Integer.toString(imgPreviewWidth)));
// Display categories
Element lastSection = body.getElementsByTag("section").last();
Element catLinksDiv = Utils.appendChildElement(lastSection, "div", null,
Map.of("id", "catlinks", "class", "catlinks", "data-mw", "interface"));
Element normalCatLinksDiv = Utils.appendChildElement(catLinksDiv, "div", null,
Map.of("id", "mw-normal-catlinks", "class", "mw-normal-catlinks"));
Utils.appendChildElement(normalCatLinksDiv, "a", "Categories",
Map.of("href", "https://commons.wikimedia.org/wiki/Special:Categories", "title", "Special:Categories"));
normalCatLinksDiv.appendText(": ");
Element normalCatLinksList = new Element("ul");
normalCatLinksDiv.appendChild(normalCatLinksList);
Element hiddenCatLinksList = new Element("ul");
Utils.appendChildElement(catLinksDiv, "div", "Hidden categories: ",
Map.of("id", "mw-hidden-catlinks", "class", "mw-hidden-catlinks mw-hidden-cats-user-shown"))
.appendChild(hiddenCatLinksList);
for (Element link : lastSection.getElementsByTag("link")) {
String category = link.attr("href").replace("#" + pageTitle.replace(" ", "%20"), "").replace("./Category:", "");
String href = "https://commons.wikimedia.org/wiki/Category:" + category;
Element list = normalCatLinksList;
try {
list = self.isHiddenCategory(category) ? hiddenCatLinksList : normalCatLinksList;
} catch (CategoryNotFoundException | CategoryPageNotFoundException e) {
LOGGER.warn("Category/page not found: {}", e.getMessage());
}
Element item = new Element("li");
list.appendChild(item);
Utils.appendChildElement(item, "a", sanitizeCategory(category),
Map.of("href", href, "title", "Category:" + category));
link.remove();
}
return doc.toString();
}
static class VeApiResponse {
private VisualEditorResponse visualeditor;
public VisualEditorResponse getVisualeditor() {
return visualeditor;
}
public void setVisualeditor(VisualEditorResponse visualeditor) {
this.visualeditor = visualeditor;
}
}
static class VisualEditorResponse {
private String result;
private String content;
public String getResult() {
return result;
}
public void setResult(String result) {
this.result = result;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
@Override
public String toString() {
return "VisualEditorResponse [" + (result != null ? "result=" + result + ", " : "")
+ (content != null ? "content=" + content : "") + "]";
}
}
/**
* Returns the API bot account name. Used for User-Agent and Commons categories.
*
* @return the API bot account name
*/
public String getAccount() {
return account;
}
/**
* Determines if a Commons category is hidden, using the special
* {@code __HIDDENCAT__} behavior switch. See documentation.
*
* @param category category to check
* @return {@code true} if the category is hidden
* @throws CategoryNotFoundException if the category is not found
* @throws CategoryPageNotFoundException if no page is found for the category
*/
- @Transactional
+ @Transactional(transactionManager = "commonsTransactionManager")
@Cacheable("hiddenCategories")
public boolean isHiddenCategory(String category) {
return self.getCategoryPage(category).getProps().stream().anyMatch(pp -> "hiddencat".equals(pp.getPropname()));
}
/**
* Determines if a Commons category exists and is not a redirect.
*
* @param category category to check
* @return {@code true} if the category exists and is not a redirect
*/
@Cacheable("upToDateCategories")
public boolean isUpToDateCategory(String category) {
try {
return self.getCategoryPage(category).getRedirect() == null;
} catch (CategoryNotFoundException | CategoryPageNotFoundException e) {
return false;
}
}
public Set findNonUpToDateCategories(Collection categories) {
return categories.parallelStream()
.flatMap(s -> Arrays.stream(s.split(";")))
.filter(c -> !c.isEmpty() && !self.isUpToDateCategory(c))
.collect(Collectors.toSet());
}
private static String sanitizeCategory(String category) {
return category.replace(' ', '_').split("#")[0];
}
@Cacheable("categoryPages")
public CommonsPage getCategoryPage(String category) {
return pageRepository.findByCategoryTitle(categoryRepository
.findByTitle(sanitizeCategory(category))
.orElseThrow(() -> new CategoryNotFoundException(category)).getTitle())
.orElseThrow(() -> new CategoryPageNotFoundException(category));
}
- @Transactional
+ @Transactional(transactionManager = "commonsTransactionManager")
@Cacheable("subCategories")
public Set getSubCategories(String category) {
return categoryLinkRepository
.findIdByTypeAndIdTo(CommonsCategoryLinkType.subcat, sanitizeCategory(category)).stream()
.map(c -> c.getFrom().getTitle()).collect(Collectors.toSet());
}
@Cacheable("subCategoriesByDepth")
public Set getSubCategories(String category, int depth) {
LocalDateTime start = now();
LOGGER.debug("Fetching '{}' subcategories with depth {}...", category, depth);
Set subcats = self.getSubCategories(category);
Set result = subcats.stream().map(CommonsService::sanitizeCategory)
.collect(Collectors.toCollection(ConcurrentHashMap::newKeySet));
if (depth > 0) {
subcats.parallelStream().forEach(s -> result.addAll(self.getSubCategories(s, depth - 1)));
}
LOGGER.debug("Fetching '{}' subcategories with depth {} completed in {}", category, depth,
Duration.between(now(), start));
return result;
}
- @Transactional
+ @Transactional(transactionManager = "commonsTransactionManager")
@Cacheable("filesInCategory")
public Set getFilesInCategory(String category) {
return categoryLinkRepository
.findIdByTypeAndIdTo(CommonsCategoryLinkType.file, sanitizeCategory(category));
}
- @Transactional
+ @Transactional(transactionManager = "commonsTransactionManager")
@Cacheable("filesPageInCategory")
public Page getFilesInCategory(String category, Pageable page) {
return categoryLinkRepository
.findIdByTypeAndIdTo(CommonsCategoryLinkType.file, sanitizeCategory(category), page);
}
public String getPageContent(CommonsPage page) throws IOException {
return queryRevisionContent(page.getId());
}
public Set cleanupCategories(Set categories) {
LocalDateTime start = now();
LOGGER.info("Cleaning {} categories with depth {}...", categories.size(), catSearchDepth);
Set result = new HashSet<>();
Set lowerCategories = categories.stream().map(c -> c.toLowerCase(Locale.ENGLISH))
.collect(Collectors.toSet());
for (Iterator it = categories.iterator(); it.hasNext();) {
String c = it.next().toLowerCase(Locale.ENGLISH);
if (c.endsWith("s")) {
c = c.substring(0, c.length() - 1);
}
final String fc = c;
// Quickly remove instances of rockets, spacecraft, satellites and so on
if (lowerCategories.stream().anyMatch(lc -> lc.contains("(" + fc + ")"))) {
it.remove();
}
}
for (String cat : categories) {
Set subcats = self.getSubCategories(cat, catSearchDepth);
if (subcats.parallelStream().noneMatch(categories::contains)) {
result.add(cat);
}
}
LOGGER.info("Cleaning {} categories with depth {} completed in {}", categories.size(), catSearchDepth,
Duration.between(now(), start));
if (!categories.isEmpty() && result.isEmpty()) {
throw new IllegalStateException("Cleaning " + categories + " removed all categories!");
}
// Make sure all imported files get reviewed
result.add("Spacemedia files (review needed)");
return result;
}
public static String formatWikiCode(String badWikiCode) {
return badWikiCode.replaceAll("]*href=\"([^\"]*)\"[^>]*>([^<]*)", "[$1 $2]");
}
public String upload(String wikiCode, String filename, URL url, String sha1) throws IOException {
return doUpload(wikiCode, normalizeFilename(filename), url, sha1, true);
}
public String normalizeFilename(String filename) {
// replace forbidden chars, see https://www.mediawiki.org/wiki/Manual:$wgIllegalFileChars
return filename.replace('/', '-').replace(':', '-').replace('\\', '-').replace('.', '_');
}
private synchronized String doUpload(String wikiCode, String filename, URL url, String sha1, boolean renewTokenIfBadToken)
throws IOException {
Map params = new HashMap<>(Map.of(
"action", "upload",
"comment", "#Spacemedia - Upload of " + url + " via [[:Commons:Spacemedia]]",
"format", "json",
"filename", Objects.requireNonNull(filename, "filename"),
"ignorewarnings", "1",
"text", Objects.requireNonNull(wikiCode, "wikiCode"),
"token", token
));
if (hasUploadByUrlRight()) {
params.put("url", url.toExternalForm());
} else {
throw new UnsupportedOperationException("Application is not yet able to upload by file, only by URL");
}
ensureUploadRate();
LOGGER.info("Uploading {} as {}..", url, filename);
UploadApiResponse apiResponse = apiHttpPost(params, UploadApiResponse.class);
LOGGER.info("Upload of {} as {}: {}", url, filename, apiResponse);
UploadResponse upload = apiResponse.getUpload();
UploadError error = apiResponse.getError();
if (error != null) {
if (renewTokenIfBadToken && "badtoken".equals(error.getCode())) {
token = queryTokens().getCsrftoken();
return doUpload(wikiCode, filename, url, sha1, false);
}
if ("fileexists-no-change".equals(error.getCode())) {
Matcher m = EXACT_DUPE_ERROR.matcher(error.getInfo());
if (m.matches()) {
return m.group(1);
}
}
throw new IllegalArgumentException(error.toString());
} else if (!"Success".equals(upload.getResult())) {
throw new IllegalArgumentException(apiResponse.toString());
}
if (!sha1.equalsIgnoreCase(upload.getImageInfo().getSha1())) {
throw new IllegalStateException(String.format(
"SHA1 mismatch for %s ! Expected %s, got %s", url, sha1, upload.getImageInfo().getSha1()));
}
return upload.getFilename();
}
private void ensureUploadRate() throws IOException {
LocalDateTime fiveSecondsAgo = now().minusSeconds(DELAY);
if (lastUpload != null && lastUpload.isAfter(fiveSecondsAgo)) {
try {
Thread.sleep(DELAY - SECONDS.between(now(), lastUpload.plusSeconds(DELAY)));
} catch (InterruptedException e) {
throw new IOException(e);
}
}
lastUpload = now();
}
}