diff --git a/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsDbConfiguration.java b/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsDbConfiguration.java index 779c092b..edeecd82 100644 --- a/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsDbConfiguration.java +++ b/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsDbConfiguration.java @@ -1,47 +1,49 @@ package org.wikimedia.commons.donvip.spacemedia.data.commons; +import javax.persistence.EntityManagerFactory; import javax.sql.DataSource; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.autoconfigure.jdbc.DataSourceProperties; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.boot.orm.jpa.EntityManagerFactoryBuilder; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.data.jpa.repository.config.EnableJpaRepositories; -import org.springframework.jdbc.datasource.DataSourceTransactionManager; +import org.springframework.orm.jpa.JpaTransactionManager; import org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean; import org.springframework.transaction.PlatformTransactionManager; import org.springframework.transaction.annotation.EnableTransactionManagement; @Configuration @EnableTransactionManagement @EnableJpaRepositories( entityManagerFactoryRef = "commonsEntityManagerFactory", transactionManagerRef = "commonsTransactionManager", basePackageClasses = {CommonsDbConfiguration.class}) public class CommonsDbConfiguration { @Bean(name = "commonsDataSourceProperties") @ConfigurationProperties("commons.datasource") public DataSourceProperties dataSourceProperties() { return new DataSourceProperties(); } @Bean(name = "commonsDataSource") @ConfigurationProperties("commons.datasource.hikari") public DataSource dataSource() { return dataSourceProperties().initializeDataSourceBuilder().build(); } @Bean(name = "commonsEntityManagerFactory") public LocalContainerEntityManagerFactoryBean entityManagerFactory(EntityManagerFactoryBuilder builder, @Qualifier("commonsDataSource") DataSource dataSource) { return builder.dataSource(dataSource).packages(getClass().getPackage().getName()).persistenceUnit("commons").build(); } @Bean(name = "commonsTransactionManager") - public PlatformTransactionManager transactionManager() { - return new DataSourceTransactionManager(dataSource()); + public PlatformTransactionManager transactionManager( + @Qualifier("commonsEntityManagerFactory") EntityManagerFactory entityManagerFactory) { + return new JpaTransactionManager(entityManagerFactory); } } diff --git a/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsPageProp.java b/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsPageProp.java index 05993769..1fa38ce8 100644 --- a/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsPageProp.java +++ b/src/main/java/org/wikimedia/commons/donvip/spacemedia/data/commons/CommonsPageProp.java @@ -1,103 +1,104 @@ package org.wikimedia.commons.donvip.spacemedia.data.commons; import java.io.Serializable; import java.util.Objects; import javax.persistence.Column; import javax.persistence.Entity; +import javax.persistence.FetchType; import javax.persistence.Id; import javax.persistence.JoinColumn; import javax.persistence.Lob; import javax.persistence.ManyToOne; import javax.persistence.Table; /** * Mediawiki * page_props table - * + * *
  * +-------------+---------------+------+-----+---------+-------+
  * | Field       | Type          | Null | Key | Default | Extra |
  * +-------------+---------------+------+-----+---------+-------+
  * | pp_page     | int(11)       | NO   | PRI | NULL    |       |
  * | pp_propname | varbinary(60) | NO   | PRI | NULL    |       |
  * | pp_value    | blob          | NO   |     | NULL    |       |
  * | pp_sortkey  | float         | YES  |     | NULL    |       |
  * +-------------+---------------+------+-----+---------+-------+
  * 
*/ @Entity @Table(name = "page_props") public class CommonsPageProp implements Serializable { private static final long serialVersionUID = 1L; @Id - @ManyToOne + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "pp_page", nullable = false) private CommonsPage page; @Id @Column(name = "pp_propname", nullable = false, length = 60, columnDefinition = "VARBINARY") private String propname; @Lob @Column(name = "pp_value", nullable = false, columnDefinition = "BLOB") private byte[] value; @Column(name = "pp_sortkey", nullable = true) private Float sortkey; public CommonsPage getPage() { return page; } public void setPage(CommonsPage page) { this.page = page; } public String getPropname() { return propname; } public void setPropname(String propname) { this.propname = propname; } public byte[] getValue() { return value; } public void setValue(byte[] value) { this.value = value; } public Float getSortkey() { return sortkey; } public void setSortkey(Float sortkey) { this.sortkey = sortkey; } @Override public int hashCode() { return Objects.hash(page, propname); } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null || getClass() != obj.getClass()) return false; CommonsPageProp other = (CommonsPageProp) obj; return Objects.equals(page, other.page) && Objects.equals(propname, other.propname); } @Override public String toString() { return "CommonsPageProp [" + (propname != null ? "propname=" + propname + ", " : "") + (value != null ? "value=" + value + ", " : "") + "sortkey=" + sortkey + "]"; } } diff --git a/src/main/java/org/wikimedia/commons/donvip/spacemedia/service/CommonsService.java b/src/main/java/org/wikimedia/commons/donvip/spacemedia/service/CommonsService.java index 3d94ca8f..08e60a15 100644 --- a/src/main/java/org/wikimedia/commons/donvip/spacemedia/service/CommonsService.java +++ b/src/main/java/org/wikimedia/commons/donvip/spacemedia/service/CommonsService.java @@ -1,572 +1,572 @@ package org.wikimedia.commons.donvip.spacemedia.service; import static java.time.LocalDateTime.now; import static java.time.temporal.ChronoUnit.SECONDS; import java.io.IOException; import java.math.BigInteger; import java.net.SocketTimeoutException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.LocalDateTime; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import javax.annotation.PostConstruct; import javax.annotation.Resource; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.MapUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.cache.annotation.Cacheable; import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsCategoryLinkId; import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsCategoryLinkRepository; import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsCategoryLinkType; import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsCategoryRepository; import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsImage; import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsImageRepository; import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsOldImage; import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsOldImageRepository; import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsPage; import org.wikimedia.commons.donvip.spacemedia.data.commons.CommonsPageRepository; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.FileArchive; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.FileArchiveQuery; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.FileArchiveQueryResponse; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.Limit; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.MetaQueryResponse; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.Revision; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.RevisionsPage; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.RevisionsQueryResponse; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.Slot; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.Tokens; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.UploadApiResponse; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.UploadError; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.UploadResponse; import org.wikimedia.commons.donvip.spacemedia.data.commons.api.UserInfo; import org.wikimedia.commons.donvip.spacemedia.exception.CategoryNotFoundException; import org.wikimedia.commons.donvip.spacemedia.exception.CategoryPageNotFoundException; import org.wikimedia.commons.donvip.spacemedia.utils.Utils; import com.fasterxml.jackson.databind.ObjectMapper; import com.github.scribejava.apis.MediaWikiApi; import com.github.scribejava.core.builder.ServiceBuilder; import com.github.scribejava.core.model.OAuth1AccessToken; import com.github.scribejava.core.model.OAuthRequest; import com.github.scribejava.core.model.Verb; import com.github.scribejava.core.oauth.OAuth10aService; @Service public class CommonsService { private static final Logger LOGGER = LoggerFactory.getLogger(CommonsService.class); private static final Pattern EXACT_DUPE_ERROR = Pattern.compile( "The upload is an exact duplicate of the current version of \\[\\[:File:(.+)\\]\\]\\."); /** * Minimal delay between successive uploads, in seconds. */ private static final int DELAY = 5; @Autowired private CommonsImageRepository imageRepository; @Autowired private CommonsOldImageRepository oldImageRepository; @Autowired private CommonsCategoryRepository categoryRepository; @Autowired private CommonsPageRepository pageRepository; @Autowired private CommonsCategoryLinkRepository categoryLinkRepository; @Autowired private ObjectMapper jackson; /** * Self-autowiring to call {@link Cacheable} methods, otherwise the cache is * skipped. Spring cache is only trigerred on external calls. */ @Resource private CommonsService self; @Value("${commons.api.url}") private URL apiUrl; @Value("${commons.api.rest.url}") private URL restApiUrl; @Value("${commons.cat.search.depth}") private int catSearchDepth; @Value("${commons.img.preview.width}") private int imgPreviewWidth; private final String account; private final String userAgent; private final OAuth10aService oAuthService; private final OAuth1AccessToken oAuthAccessToken; private UserInfo userInfo; private String token; private LocalDateTime lastUpload; public CommonsService( @Value("${application.version}") String appVersion, @Value("${application.contact}") String appContact, @Value("${flickr4java.version}") String flickr4javaVersion, @Value("${spring-boot.version}") String bootVersion, @Value("${scribejava.version}") String scribeVersion, @Value("${commons.api.account}") String apiAccount, @Value("${commons.api.oauth1.consumer-token}") String consumerToken, @Value("${commons.api.oauth1.consumer-secret}") String consumerSecret, @Value("${commons.api.oauth1.access-token}") String accessToken, @Value("${commons.api.oauth1.access-secret}") String accessSecret ) { account = apiAccount; // Comply to Wikimedia User-Agent Policy: https://meta.wikimedia.org/wiki/User-Agent_policy if (!account.toLowerCase(Locale.ENGLISH).contains("bot")) { throw new IllegalArgumentException("Bot account must include 'bot' in its name!"); } userAgent = String.format("%s/%s (%s - %s) %s/%s %s/%s %s/%s", "Spacemedia", appVersion, appContact, apiAccount, "SpringBoot", bootVersion, "ScribeJava", scribeVersion, "Flickr4Java", flickr4javaVersion); oAuthService = new ServiceBuilder(consumerToken).apiSecret(consumerSecret).build(MediaWikiApi.instance()); oAuthAccessToken = new OAuth1AccessToken(accessToken, accessSecret); } @PostConstruct public void init() throws IOException { userInfo = queryUserInfo(); LOGGER.info("Identified to Wikimedia Commons API as {}", userInfo.getName()); if (!hasUploadRight() && !hasUploadByUrlRight()) { LOGGER.warn("Wikimedia Commons user account has no upload right!"); } Limit uploadRate = userInfo.getRateLimits().getUpload().getUser(); LOGGER.info("Upload rate limited to {} hits every {} seconds.", uploadRate.getHits(), uploadRate.getSeconds()); // Fetch CSRF token, mandatory for upload using the Mediawiki API token = queryTokens().getCsrftoken(); } private boolean hasUploadByUrlRight() { return userInfo.getRights().contains("upload_by_url"); } private boolean hasUploadRight() { return userInfo.getRights().contains("upload"); } public Set findFilesWithSha1(String sha1) throws IOException { // See https://www.mediawiki.org/wiki/Manual:Image_table#img_sha1 // The SHA-1 hash of the file contents in base 36 format, zero-padded to 31 characters String sha1base36 = String.format("%31s", new BigInteger(sha1, 16).toString(36)).replace(' ', '0'); Set files = imageRepository.findBySha1(sha1base36).stream().map(CommonsImage::getName).collect(Collectors.toSet()); if (files.isEmpty()) { files.addAll(oldImageRepository.findBySha1(sha1base36).stream().map(CommonsOldImage::getName).collect(Collectors.toSet())); } if (files.isEmpty()) { files.addAll(queryFileArchive(sha1base36).stream().map(FileArchive::getName).collect(Collectors.toSet())); } return files; } public synchronized Tokens queryTokens() throws IOException { return apiHttpGet("?action=query&meta=tokens", MetaQueryResponse.class).getQuery().getTokens(); } public UserInfo queryUserInfo() throws IOException { return apiHttpGet("?action=query&meta=userinfo&uiprop=blockinfo|groups|rights|ratelimits", MetaQueryResponse.class).getQuery().getUserInfo(); } public List queryFileArchive(String sha1base36) throws IOException { FileArchiveQuery query = apiHttpGet("?action=query&list=filearchive&fasha1base36=" + sha1base36, FileArchiveQueryResponse.class).getQuery(); return query != null ? query.getFilearchive() : Collections.emptyList(); } private T apiHttpGet(String path, Class responseClass) throws IOException { return httpGet(apiUrl.toExternalForm() + path + "&format=json", responseClass); } private T apiHttpPost(Map params, Class responseClass) throws IOException { return httpPost(apiUrl.toExternalForm(), responseClass, params); } private T httpGet(String url, Class responseClass) throws IOException { return httpCall(Verb.GET, url, responseClass, Collections.emptyMap(), Collections.emptyMap(), true); } private T httpPost(String url, Class responseClass, Map params) throws IOException { return httpCall(Verb.POST, url, responseClass, Map.of("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"), params, true); } private T httpCall(Verb verb, String url, Class responseClass, Map headers, Map params, boolean retryOnTimeout) throws IOException { OAuthRequest request = new OAuthRequest(verb, url); request.setCharset(StandardCharsets.UTF_8.name()); params.forEach(request::addParameter); headers.forEach(request::addHeader); request.addHeader("User-Agent", userAgent); oAuthService.signRequest(oAuthAccessToken, request); try { return jackson.readValue(oAuthService.execute(request).getBody(), responseClass); } catch (SocketTimeoutException e) { if (retryOnTimeout) { return httpCall(verb, url, responseClass, headers, params, false); } else { throw e; } } catch (InterruptedException | ExecutionException e) { throw new IOException(e); } } public String queryRevisionContent(int pageId) throws IOException { RevisionsPage rp = apiHttpGet("?action=query&prop=revisions&rvprop=content&rvslots=main&rvlimit=1&pageids=" + pageId, RevisionsQueryResponse.class).getQuery().getPages().get(pageId); if (rp != null) { List revisions = rp.getRevisions(); if (CollectionUtils.isNotEmpty(revisions)) { Map slots = revisions.get(0).getSlots(); if (MapUtils.isNotEmpty(slots)) { Slot main = slots.get("main"); if (main != null) { return main.getContent(); } } } } LOGGER.error("Couldn't find page content for {}: {}", pageId, rp); return null; } public String getWikiHtmlPreview(String wikiCode, String pageTitle) throws IOException { VisualEditorResponse apiResponse = apiHttpPost(Map.of( "action", "visualeditor", "format", "json", "formatversion", "2", "paction", "parsedoc", "page", pageTitle, "wikitext", wikiCode, "pst", "true" ), VeApiResponse.class).getVisualeditor(); if (!"success".equals(apiResponse.getResult())) { throw new IllegalArgumentException(apiResponse.toString()); } return apiResponse.getContent(); } public String getWikiHtmlPreview(String wikiCode, String pageTitle, String imgUrl) throws IOException { Document doc = Jsoup.parse(getWikiHtmlPreview(wikiCode, pageTitle)); Element body = doc.getElementsByTag("body").get(0); // Display image Element imgLink = Utils.prependChildElement(body, "a", null, Map.of("href", imgUrl)); Utils.appendChildElement(imgLink, "img", null, Map.of("src", imgUrl, "width", Integer.toString(imgPreviewWidth))); // Display categories Element lastSection = body.getElementsByTag("section").last(); Element catLinksDiv = Utils.appendChildElement(lastSection, "div", null, Map.of("id", "catlinks", "class", "catlinks", "data-mw", "interface")); Element normalCatLinksDiv = Utils.appendChildElement(catLinksDiv, "div", null, Map.of("id", "mw-normal-catlinks", "class", "mw-normal-catlinks")); Utils.appendChildElement(normalCatLinksDiv, "a", "Categories", Map.of("href", "https://commons.wikimedia.org/wiki/Special:Categories", "title", "Special:Categories")); normalCatLinksDiv.appendText(": "); Element normalCatLinksList = new Element("ul"); normalCatLinksDiv.appendChild(normalCatLinksList); Element hiddenCatLinksList = new Element("ul"); Utils.appendChildElement(catLinksDiv, "div", "Hidden categories: ", Map.of("id", "mw-hidden-catlinks", "class", "mw-hidden-catlinks mw-hidden-cats-user-shown")) .appendChild(hiddenCatLinksList); for (Element link : lastSection.getElementsByTag("link")) { String category = link.attr("href").replace("#" + pageTitle.replace(" ", "%20"), "").replace("./Category:", ""); String href = "https://commons.wikimedia.org/wiki/Category:" + category; Element list = normalCatLinksList; try { list = self.isHiddenCategory(category) ? hiddenCatLinksList : normalCatLinksList; } catch (CategoryNotFoundException | CategoryPageNotFoundException e) { LOGGER.warn("Category/page not found: {}", e.getMessage()); } Element item = new Element("li"); list.appendChild(item); Utils.appendChildElement(item, "a", sanitizeCategory(category), Map.of("href", href, "title", "Category:" + category)); link.remove(); } return doc.toString(); } static class VeApiResponse { private VisualEditorResponse visualeditor; public VisualEditorResponse getVisualeditor() { return visualeditor; } public void setVisualeditor(VisualEditorResponse visualeditor) { this.visualeditor = visualeditor; } } static class VisualEditorResponse { private String result; private String content; public String getResult() { return result; } public void setResult(String result) { this.result = result; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } @Override public String toString() { return "VisualEditorResponse [" + (result != null ? "result=" + result + ", " : "") + (content != null ? "content=" + content : "") + "]"; } } /** * Returns the API bot account name. Used for User-Agent and Commons categories. * * @return the API bot account name */ public String getAccount() { return account; } /** * Determines if a Commons category is hidden, using the special * {@code __HIDDENCAT__} behavior switch. See documentation. * * @param category category to check * @return {@code true} if the category is hidden * @throws CategoryNotFoundException if the category is not found * @throws CategoryPageNotFoundException if no page is found for the category */ - @Transactional + @Transactional(transactionManager = "commonsTransactionManager") @Cacheable("hiddenCategories") public boolean isHiddenCategory(String category) { return self.getCategoryPage(category).getProps().stream().anyMatch(pp -> "hiddencat".equals(pp.getPropname())); } /** * Determines if a Commons category exists and is not a redirect. * * @param category category to check * @return {@code true} if the category exists and is not a redirect */ @Cacheable("upToDateCategories") public boolean isUpToDateCategory(String category) { try { return self.getCategoryPage(category).getRedirect() == null; } catch (CategoryNotFoundException | CategoryPageNotFoundException e) { return false; } } public Set findNonUpToDateCategories(Collection categories) { return categories.parallelStream() .flatMap(s -> Arrays.stream(s.split(";"))) .filter(c -> !c.isEmpty() && !self.isUpToDateCategory(c)) .collect(Collectors.toSet()); } private static String sanitizeCategory(String category) { return category.replace(' ', '_').split("#")[0]; } @Cacheable("categoryPages") public CommonsPage getCategoryPage(String category) { return pageRepository.findByCategoryTitle(categoryRepository .findByTitle(sanitizeCategory(category)) .orElseThrow(() -> new CategoryNotFoundException(category)).getTitle()) .orElseThrow(() -> new CategoryPageNotFoundException(category)); } - @Transactional + @Transactional(transactionManager = "commonsTransactionManager") @Cacheable("subCategories") public Set getSubCategories(String category) { return categoryLinkRepository .findIdByTypeAndIdTo(CommonsCategoryLinkType.subcat, sanitizeCategory(category)).stream() .map(c -> c.getFrom().getTitle()).collect(Collectors.toSet()); } @Cacheable("subCategoriesByDepth") public Set getSubCategories(String category, int depth) { LocalDateTime start = now(); LOGGER.debug("Fetching '{}' subcategories with depth {}...", category, depth); Set subcats = self.getSubCategories(category); Set result = subcats.stream().map(CommonsService::sanitizeCategory) .collect(Collectors.toCollection(ConcurrentHashMap::newKeySet)); if (depth > 0) { subcats.parallelStream().forEach(s -> result.addAll(self.getSubCategories(s, depth - 1))); } LOGGER.debug("Fetching '{}' subcategories with depth {} completed in {}", category, depth, Duration.between(now(), start)); return result; } - @Transactional + @Transactional(transactionManager = "commonsTransactionManager") @Cacheable("filesInCategory") public Set getFilesInCategory(String category) { return categoryLinkRepository .findIdByTypeAndIdTo(CommonsCategoryLinkType.file, sanitizeCategory(category)); } - @Transactional + @Transactional(transactionManager = "commonsTransactionManager") @Cacheable("filesPageInCategory") public Page getFilesInCategory(String category, Pageable page) { return categoryLinkRepository .findIdByTypeAndIdTo(CommonsCategoryLinkType.file, sanitizeCategory(category), page); } public String getPageContent(CommonsPage page) throws IOException { return queryRevisionContent(page.getId()); } public Set cleanupCategories(Set categories) { LocalDateTime start = now(); LOGGER.info("Cleaning {} categories with depth {}...", categories.size(), catSearchDepth); Set result = new HashSet<>(); Set lowerCategories = categories.stream().map(c -> c.toLowerCase(Locale.ENGLISH)) .collect(Collectors.toSet()); for (Iterator it = categories.iterator(); it.hasNext();) { String c = it.next().toLowerCase(Locale.ENGLISH); if (c.endsWith("s")) { c = c.substring(0, c.length() - 1); } final String fc = c; // Quickly remove instances of rockets, spacecraft, satellites and so on if (lowerCategories.stream().anyMatch(lc -> lc.contains("(" + fc + ")"))) { it.remove(); } } for (String cat : categories) { Set subcats = self.getSubCategories(cat, catSearchDepth); if (subcats.parallelStream().noneMatch(categories::contains)) { result.add(cat); } } LOGGER.info("Cleaning {} categories with depth {} completed in {}", categories.size(), catSearchDepth, Duration.between(now(), start)); if (!categories.isEmpty() && result.isEmpty()) { throw new IllegalStateException("Cleaning " + categories + " removed all categories!"); } // Make sure all imported files get reviewed result.add("Spacemedia files (review needed)"); return result; } public static String formatWikiCode(String badWikiCode) { return badWikiCode.replaceAll("]*href=\"([^\"]*)\"[^>]*>([^<]*)", "[$1 $2]"); } public String upload(String wikiCode, String filename, URL url, String sha1) throws IOException { return doUpload(wikiCode, normalizeFilename(filename), url, sha1, true); } public String normalizeFilename(String filename) { // replace forbidden chars, see https://www.mediawiki.org/wiki/Manual:$wgIllegalFileChars return filename.replace('/', '-').replace(':', '-').replace('\\', '-').replace('.', '_'); } private synchronized String doUpload(String wikiCode, String filename, URL url, String sha1, boolean renewTokenIfBadToken) throws IOException { Map params = new HashMap<>(Map.of( "action", "upload", "comment", "#Spacemedia - Upload of " + url + " via [[:Commons:Spacemedia]]", "format", "json", "filename", Objects.requireNonNull(filename, "filename"), "ignorewarnings", "1", "text", Objects.requireNonNull(wikiCode, "wikiCode"), "token", token )); if (hasUploadByUrlRight()) { params.put("url", url.toExternalForm()); } else { throw new UnsupportedOperationException("Application is not yet able to upload by file, only by URL"); } ensureUploadRate(); LOGGER.info("Uploading {} as {}..", url, filename); UploadApiResponse apiResponse = apiHttpPost(params, UploadApiResponse.class); LOGGER.info("Upload of {} as {}: {}", url, filename, apiResponse); UploadResponse upload = apiResponse.getUpload(); UploadError error = apiResponse.getError(); if (error != null) { if (renewTokenIfBadToken && "badtoken".equals(error.getCode())) { token = queryTokens().getCsrftoken(); return doUpload(wikiCode, filename, url, sha1, false); } if ("fileexists-no-change".equals(error.getCode())) { Matcher m = EXACT_DUPE_ERROR.matcher(error.getInfo()); if (m.matches()) { return m.group(1); } } throw new IllegalArgumentException(error.toString()); } else if (!"Success".equals(upload.getResult())) { throw new IllegalArgumentException(apiResponse.toString()); } if (!sha1.equalsIgnoreCase(upload.getImageInfo().getSha1())) { throw new IllegalStateException(String.format( "SHA1 mismatch for %s ! Expected %s, got %s", url, sha1, upload.getImageInfo().getSha1())); } return upload.getFilename(); } private void ensureUploadRate() throws IOException { LocalDateTime fiveSecondsAgo = now().minusSeconds(DELAY); if (lastUpload != null && lastUpload.isAfter(fiveSecondsAgo)) { try { Thread.sleep(DELAY - SECONDS.between(now(), lastUpload.plusSeconds(DELAY))); } catch (InterruptedException e) { throw new IOException(e); } } lastUpload = now(); } }