diff --git a/panda.py b/panda.py new file mode 100644 index 0000000..1f17e89 --- /dev/null +++ b/panda.py @@ -0,0 +1,246 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +from __future__ import absolute_import, division, unicode_literals + +import base64 +import hashlib +import io +import re +import json + +import pywikibot +from pywikibot import config, textlib +from pywikibot.comms.http import fetch +from pywikibot.specialbots import UploadRobot +from pywikibot.tools import PY2 + +try: + import flickrapi # see: http://stuvel.eu/projects/flickrapi +except ImportError as e: + flickrapi = e +from scripts import flickrripper + +if not PY2: + from urllib.parse import urlencode +else: + from urllib import urlencode +#import xml + +#print(xml.etree.ElementTree.dump(photoInfo.find('photo'))) +#print(xml.etree.ElementTree.dump(photoInfo.find('photo').find('tags'))) + +#result=[] +#for tag in photoInfo.find('photo').find('tags').findall('tag'): +# result.append(tag.attrib['raw'].lower()) +#print(result) + +#print(flickrripper.getFilename(photoInfo)) +#print(dir(flickrripper)) + +#see https://www.flickr.com/services/api/flickr.interestingness.getList.html +def interestingness(flickr, date, per_page='1'): + json_bytes = flickr.do_flickr_call('flickr.interestingness.getList', + date = date, + format = 'json', + extras = 'license', + per_page = per_page) + json_str = json_bytes.decode('UTF-8') + return json.loads(json_str) + +def insertTags(insert_str, str, pos): + '''插入字符 + @str 原始字符串 + @insert_str 要插入的字符串 + @pos 要插入的位置(在该位置之前插入) + ''' + str_list = list(str) + nPos = str_list.index(pos) + str_list.insert(nPos, insert_str) + return "".join(str_list) + +def isAllowedLicense(license): + """ + Fork flickrripper.py + + Check if the image contains the right license. + + """ + if flickrripper.flickr_allowed_license[int(license)]: + return True + else: + return False + +def getFlinfoDescription(photo_id): + """ + Fork flickrripper.py + + Get the description from http://wikipedia.ramselehof.de/flinfo.php. + + TODO: Add exception handling, try a couple of times + """ + parameters = urlencode({'id': photo_id, 'raw': 'on'}) + + return fetch( + 'https://tools.wmflabs.org/redpanda/flinfo/flinfo.php?%s' % parameters).text + + + +def buildDescription(photoInfo, flinfoDescription='', flickrreview=False, reviewer='', + addCategory=''): + """Fork flickrripper.py + + Build the final description for the image. + + The description is based on the info from flickrinfo and improved. + + """ + description = flinfoDescription + datetaken = re.search( r'\|Date=(.*)\n',description).group(1) + if datetaken: + datetaken = '{{Taken on|%s}}' % (datetaken) + description = re.sub(r'\|Date=.*\n', "|Date=%s\n" % (datetaken), description) + if flickrreview: + if reviewer: + description = description.replace( + '{{flickrreview}}', + '{{flickrreview|%s|' + '{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-' + '{{subst:CURRENTDAY2}}}}' % reviewer) + if addCategory: + #description = description.replace('{{subst:unc}}\n', '') + description = description + addCategory + if '{{subst:unc}}' not in description: + # Request category check + description = description + '{{subst:chc}}\n' + tags = flickrripper.getTags(photoInfo) + if tags: + tags_str = ', '.join(tags) + tags_str = '|other_fields={{Information field|Name=Flickr tags|Value=%s}}\n' % tags_str + #re_str = re.search( r'\|other_versions=.*?\n}}',description).group() + #insertTags(tags_str, re_str, '}') + description = re.sub(r'\|other_versions=.*?\n}}', "|other_versions=\n%s}}" % (tags_str), description) + description = description.replace('\r\n', '\n') + return description + +#description = buildDescription(photoInfo, flinfoDescription=flickrripper.getFlinfoDescription(photo_id='495671756'), flickrreview=False, reviewer='', +# addCategory='[[Category:Flickr files uploaded by ' + config.usernames['commons']['commons'] + ']]\n[[Category:Photos in Flickr Explore]]\n') + +#print(description) + +def processPhoto(date, flickr, license, photo_id='', flickrreview=False, reviewer='', + addCategory=''): + """Fork flickrripper.py + + Process a single Flickr photo. + + For each image: + * Check the license + * Check if it isn't already on Commons + * Build suggested filename + * Check for name collision and maybe alter it + * Pull description from Flinfo + * Show image and description to user + * Add a nice hotcat lookalike for the adding of categories + * Filter the categories + * Upload the image + """ + if isAllowedLicense(license): + #print(license) + site = pywikibot.Site('commons', 'commons') + if photo_id: + pywikibot.output(str(photo_id)) + (photoInfo, photoSizes) = flickrripper.getPhoto(flickr, photo_id) + # Get the url of the largest photo + photoUrl = flickrripper.getPhotoUrl(photoSizes) + # Should download the photo only once + photo = flickrripper.downloadPhoto(photoUrl) + + # Don't upload duplicate images, should add override option + duplicates = flickrripper.findDuplicateImages(photo) + if duplicates: + dupfilename = duplicates.pop() + pywikibot.output('Found duplicate image at {}' + .format(dupfilename)) + filepage = pywikibot.Page(site, u"File:"+dupfilename) + filepage_text = filepage.text + if '[[Category:Photos in Flickr Explore]]' not in filepage_text: + filepage.text = filepage_text + '\n[[Category:Photos in Flickr Explore]]\n' + filepage.save(u"[[User:Red panda bot|BOT]]: + [[Category:Photos in Flickr Explore]]: "+ date) + pywikibot.output('='*25) + else: + filename = flickrripper.getFilename(photoInfo) + flinfoDescription = getFlinfoDescription(photo_id) + photoDescription = buildDescription(photoInfo, flinfoDescription, + flickrreview, reviewer, + addCategory) + #pywikibot.output(photoDescription) + pywikibot.output(filename) + bot = UploadRobot(photoUrl, + description=photoDescription, + useFilename=filename, + keepFilename=True, + verifyDescription=False, + uploadByUrl=True, + summary='[[:Category:Photos in Flickr Explore|In Flickr Explore]]: '+ date) + bot.upload_image(debug=False) + if license == '7': + page = pywikibot.Page(site, u"User:Red panda bot/license7") + page_text = page.text + page.text = page_text + u"\n* [[:File:" + filename + ']]\n' + page.save(u"[[User:Red panda bot|BOT]]: + [[:File:" + filename + ']]') + elif license == '8': + page = pywikibot.Page(site, u"User:Red panda bot/license8") + page_text = page.text + page.text = page_text + u"\n* [[:File:" + filename + ']]\n' + page.save(u"[[User:Red panda bot|BOT]]: + [[:File:" + filename + ']]') + elif license == '10': + page = pywikibot.Page(site, u"User:Red panda bot/license10") + page_text = page.text + page.text = page_text + u"\n* [[:File:" + filename + ']]\n' + page.save(u"[[User:Red panda bot|BOT]]: + [[:File:" + filename + ']]') + pywikibot.output('='*25) + return 1 + #else: + # pywikibot.output('Invalid license') + return 0 + + +#======================================= + +addCategory='[[Category:Flickr files uploaded by ' + config.usernames['commons']['commons'] + ']]\n[[Category:Photos in Flickr Explore]]\n' +totalPhotos = 0 +uploadedPhotos = 0 +flickrreview=False +reviewer='' +site = pywikibot.Site('commons', 'commons') + +flickr = flickrapi.FlickrAPI(config.flickr['api_key'], config.flickr['api_secret']) +#(photoInfo, photoSizes) = flickrripper.getPhoto(flickr, 495671756) + +#print(flickrripper.isAllowedLicense(photoInfo)) +#print(photoInfo.find('photo').attrib['id']) + +#begin in 2004-01-07 +for arg in pywikibot.handleArgs(): + if arg.startswith('-start:'): + date = arg[7:] + +#date = '2004-01-11' +data = interestingness(flickr, date, per_page='500') +for photo in data['photos']['photo']: + photo_id = photo['id'] + license = photo['license'] + uploadedPhotos += processPhoto(date, flickr, license, photo_id, flickrreview, + reviewer, addCategory) + totalPhotos += 1 +pywikibot.output('Finished running') +pywikibot.output('Total photos: ' + str(totalPhotos)) +pywikibot.output('Uploaded photos: ' + str(uploadedPhotos)) + +page = pywikibot.Page(site, u"User:Red panda bot/status") +page.text = date +page.save(u"update to " + date) +pywikibot.output('Date: ' + date) + +#print(data['photos']['photo'])