diff --git a/panda.py b/panda.py index 0edce04..fc4c2a9 100644 --- a/panda.py +++ b/panda.py @@ -1,294 +1,304 @@ #!/usr/bin/python # -*- coding: utf-8 -*- """ A tool to transfer Flickr Explore photos to Wikimedia Commons. Fork flickrripper.py """ # # (C) Multichill, 2009 # (C) Pywikibot team, 2009-2020 # (C) shizhao, 2020 # # Distributed under the terms of the MIT license. # from __future__ import absolute_import, division, unicode_literals import base64 import hashlib import io import re import json import pywikibot from pywikibot import config, textlib from pywikibot.comms.http import fetch from pywikibot.specialbots import UploadRobot from pywikibot.tools import PY2 try: import flickrapi # see: http://stuvel.eu/projects/flickrapi except ImportError as e: flickrapi = e from scripts import flickrripper if not PY2: from urllib.parse import urlencode else: from urllib import urlencode import datetime # see https://www.flickr.com/services/api/flickr.interestingness.getList.html def interestingness(flickr, date, per_page='1'): while True: try: json_bytes = flickr.do_flickr_call('flickr.interestingness.getList', date=date, format='json', extras='license', per_page=per_page) json_str = json_bytes.decode('UTF-8') return json.loads(json_str) except flickrapi.exceptions.FlickrError: pywikibot.output('Flickr api problem, sleeping') pywikibot.sleep(30) def insertTags(insert_str, str, pos): '''插入字符 @str 原始字符串 @insert_str 要插入的字符串 @pos 要插入的位置(在该位置之前插入) ''' str_list = list(str) nPos = str_list.index(pos) str_list.insert(nPos, insert_str) return "".join(str_list) def isAllowedLicense(license): """ Fork flickrripper.py Check if the image contains the right license. """ if flickrripper.flickr_allowed_license[int(license)]: return True else: return False def getFlinfoDescription(photo_id): """ Fork flickrripper.py Get the description from https://tools.wmflabs.org/redpanda/flinfo/flinfo.php TODO: Add exception handling, try a couple of times """ parameters = urlencode({'id': photo_id, 'raw': 'on'}) return fetch( 'https://tools.wmflabs.org/redpanda/flinfo/flinfo.php?%s' % parameters).text +def blackCategory(text): + blacklist = ['[[Category:United Kingdom]]', '[[Category:London]]'] + for black in blacklist: + if black in text: + text = text.replace(black, '') + pywikibot.output('Remove category: ' + black) + return text + + def buildDescription(photoInfo, flinfoDescription='', flickrreview=False, reviewer='', addCategory=''): """Fork flickrripper.py Build the final description for the image. The description is based on the info from flickrinfo and improved. """ description = flinfoDescription # https://tools.wmflabs.org/redpanda/flinfo/flinfo.php?id=47526502491&repo=flickr&raw=on&user_lang=zh if 'Blacklisted user' not in description: datetaken = re.search(r'\|Date=(.*)\n', description).group(1) if datetaken: datetaken = '{{Taken on|%s}}' % (datetaken) description = re.sub(r'\|Date=.*\n', "|Date=%s\n" % (datetaken), description) if flickrreview: if reviewer: description = description.replace( '{{flickrreview}}', '{{flickrreview|%s|' '{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-' '{{subst:CURRENTDAY2}}}}' % reviewer) if addCategory: description = description + addCategory if '{{subst:unc}}' not in description: # Request category check description = description + '{{subst:chc}}\n' tags = flickrripper.getTags(photoInfo) if tags: tags_str = ', '.join(tags) tags_str = '|other_fields={{Information field|Name=Flickr tags|Value=%s}}\n' % tags_str #re_str = re.search( r'\|other_versions=.*?\n}}',description).group() #insertTags(tags_str, re_str, '}') description = re.sub(r'\|other_versions=.*?\n}}', "|other_versions=\n%s}}" % (tags_str), description) + description = blackCategory(description) description = description.replace('\r\n', '\n') return description def PhotoSize(photoSizes): for size in photoSizes.find('sizes').findall('size'): height = size.attrib['height'] width = size.attrib['width'] size = int(height) * int(width) return size def processPhoto(date, flickr, license, photo_id='', flickrreview=False, reviewer='', addCategory=''): """Fork flickrripper.py Process a single Flickr photo. For each image: * Check the license * Check if it isn't already on Commons * Build suggested filename * Check for name collision and maybe alter it * Pull description from Flinfo * Show image and description to user * Add a nice hotcat lookalike for the adding of categories * Filter the categories * Upload the image """ if isAllowedLicense(license): site = pywikibot.Site('commons', 'commons') if photo_id: pywikibot.output('photo_id is ' + str(photo_id)) (photoInfo, photoSizes) = flickrripper.getPhoto(flickr, photo_id) # Get the url of the largest photo photoUrl = flickrripper.getPhotoUrl(photoSizes) # Should download the photo only once photo = flickrripper.downloadPhoto(photoUrl) # Don't upload duplicate images, should add override option duplicates = flickrripper.findDuplicateImages(photo) size = PhotoSize(photoSizes) if duplicates: dupfilename = duplicates.pop() pywikibot.output('Found duplicate image at {}' .format(dupfilename)) filepage = pywikibot.Page(site, u"File:"+dupfilename) filepage_text = filepage.text inFlickr = '[[Category:Photos in Flickr Explore]]' if inFlickr not in filepage_text: filepage.text = filepage_text + '\n%s\n' % (inFlickr) filepage.save( u'[[User:Red panda bot|BOT]]: + [[Category:Photos in Flickr Explore]]: ' + date) elif size < 200 * 1024: pywikibot.output( 'Photo size is {}. too small! '.format(size)) else: pywikibot.output('Photo size is {}.'.format(size)) filename = flickrripper.getFilename(photoInfo, photo_url=photoUrl) #if not photoInfo.find('photo').attrib['originalformat']: # urlformat = photoUrl.split(".")[-1] # filename = filename + urlformat flinfoDescription = getFlinfoDescription(photo_id) if 'Blacklisted user' not in flinfoDescription: photoDescription = buildDescription(photoInfo, flinfoDescription, flickrreview, reviewer, addCategory) pywikibot.output(filename) bot = UploadRobot(photoUrl, - description=photoDescription, - useFilename=filename, - keepFilename=True, - verifyDescription=False, - uploadByUrl=True, - summary='[[:Category:Photos in Flickr Explore|In Flickr Explore]]: ' + date) + description=photoDescription, + useFilename=filename, + keepFilename=True, + verifyDescription=False, + uploadByUrl=True, + summary='[[:Category:Photos in Flickr Explore|In Flickr Explore]]: ' + date) bot.upload_image(debug=False) if license == '7': page = pywikibot.Page(site, u'User:Red panda bot/license7') page_text = page.text page.text = page_text + u'\n* [[:File:' + filename + ']]\n' page.save( u'[[User:Red panda bot|BOT]]: + [[:File:' + filename + ']]' ) elif license == '8': page = pywikibot.Page(site, u'User:Red panda bot/license8') page_text = page.text page.text = page_text + u'\n* [[:File:' + filename + ']]\n' page.save( u'[[User:Red panda bot|BOT]]: + [[:File:' + filename + ']]' ) elif license == '10': page = pywikibot.Page(site, u'User:Red panda bot/license10') page_text = page.text page.text = page_text + u'\n* [[:File:' + filename + ']]\n' page.save( u'[[User:Red panda bot|BOT]]: + [[:File:' + filename + ']]' ) pywikibot.output('=' * 25) return 1 else: pywikibot.output('Error: ' + flinfoDescription) pywikibot.output('=' * 25) # else: # pywikibot.output('Invalid license') return 0 # ======================================= addCategory = '[[Category:Flickr files uploaded by ' + \ config.usernames['commons']['commons'] + \ ']]\n[[Category:Photos in Flickr Explore]]\n' totalPhotos = 0 uploadedPhotos = 0 flickrreview = False reviewer = '' site = pywikibot.Site('commons', 'commons') flickr = flickrapi.FlickrAPI( config.flickr['api_key'], config.flickr['api_secret']) """ ============================================== cron ============================================== """ dt = datetime.datetime.now() - datetime.timedelta(days=3) date = dt.strftime('%Y-%m-%d') #pywikibot.output('Flickr Explore: ' + date) # begin in 2004-01-07 for arg in pywikibot.handleArgs(): if arg.startswith('-start:'): date = arg[7:] #datepage = pywikibot.Page(site, u'User:Red panda bot/status') #date = datepage.text #for i in range(0, 30): # dt = datetime.date.fromisoformat(date) + datetime.timedelta(days=1) # date = dt.strftime('%Y-%m-%d') # pywikibot.output('Flickr Explore: ' + date) # i+=1 data = interestingness(flickr, date, per_page='500') for photo in data['photos']['photo']: photo_id = photo['id'] license = photo['license'] uploadedPhotos += processPhoto(date, flickr, license, photo_id, flickrreview, reviewer, addCategory) totalPhotos += 1 pywikibot.output('Finished running') pywikibot.output('Total photos: ' + str(totalPhotos)) pywikibot.output('Uploaded photos: ' + str(uploadedPhotos)) page = pywikibot.Page(site, u'User:Red panda bot/status') page.text = date page.save(u'update to ' + date) pywikibot.output('Date: ' + date)