Page MenuHomePhabricator
Paste P129

"Bot" to migrate cards from mingle to phabricator
ActivePublic

Authored by Gilles on Dec 8 2014, 6:07 PM.
Tokens
"Evil Spooky Haunted Tree" token, awarded by hashar."The World Burns" token, awarded by greg."Mountain of Wealth" token, awarded by Qgil.
#!/usr/bin/env python
'''
This script is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This script is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
See <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
'''
import os
import re
import sys
import urllib2
import socket
from xml.dom import minidom
from bs4 import BeautifulSoup
from phabricator import Phabricator
import unicodedata
socket.setdefaulttimeout( 30 )
phab = Phabricator()
phab.update_interfaces()
mingleProjectProperty = 'Release tree - Epic Story'
defaultProject = 'PHID-PROJ-qfqb3v2nklkvljicr6ak' # Multimedia
projectMap = {
534: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer
8: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer
12: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer
184: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer
72: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer
60: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer
62: 'PHID-PROJ-cabyqp5sf4hyvauln3sq', # Media Viewer
532: 'PHID-PROJ-7gh7qm2t4b6ny5rtublq', # Upload Wizard
76: 'PHID-PROJ-7gh7qm2t4b6ny5rtublq', # Upload Wizard
77: 'PHID-PROJ-7gh7qm2t4b6ny5rtublq', # Upload Wizard
10: 'PHID-PROJ-7gh7qm2t4b6ny5rtublq', # Upload Wizard
941: 'PHID-PROJ-7gh7qm2t4b6ny5rtublq', # Upload Wizard
531: 'PHID-PROJ-nwze4kl6xadc2dokzxxs' # Structured data
}
userMap = {
'fflorin': 'Fabrice_Florin',
'gdubuc': 'Gilles',
'mholmquist': 'MarkTraceur',
'gtisza': 'Tgr',
'pginer': 'Pginer-WMF'
}
minglePriorityProperty = 'Priority'
priorityMap = {
'Must have': 80,
'Should have': 50,
'Could have': 25,
'Won\'t have': 10
}
mingleOwnerProperty = 'Owner'
ownerMap = {
'fflorin': 'PHID-USER-dbudsaorcqut7sg3vvbi',
'gdubuc': 'PHID-USER-papbtlagfolot4dzerne',
'mholmquist': 'PHID-USER-nvavrb7ko66hv3xap6sb',
'gtisza': 'PHID-USER-a6p24cvyblhfzc7we7nc',
'pginer': 'PHID-USER-c47vnc2yxmwfvvc4367q'
}
mingleStatusProperty = 'Status'
statusMap = {
'Accepted': 'resolved'
}
cardTypeWhitelist = [ 'Bug', 'Story', 'Tech debt', 'Scope Increase (UNPLANNED)', 'Task' ]
def getText(nodelist):
rc = []
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
rc.append(node.data)
return ''.join(rc)
def uploadImageToPhabricator( image64, name ):
result = phab.file.upload( data_base64=image64, name=name )
phid = result.response
result = phab.file.info( phid=phid )
return result.response['objectName']
def transloadImages( html ):
images = {}
parsedHtml = BeautifulSoup( html )
parsedImgs = parsedHtml.find_all( 'img' )
for parsedImg in parsedImgs:
try: image = urllib2.urlopen( parsedImg.get( 'src' ), timeout=30 )
except urllib2.HTTPError as e:
continue
except AttributeError as e: #data:encoded img, couldn't be bothered making this work as it didn't display in mingle
continue
imageBinary = image.read()
image64 = imageBinary.encode( 'base64' )
name = parsedImg.get( 'alt' ) or str( parsedImg ).replace( ' ', '\s*' ).replace( '/>', '\s*/>' )
images[ name ] = uploadImageToPhabricator( image64, parsedImg.get( 'alt' ) )
return images
def ghettoHtmlToRemarkup( html, images, mingleSite, project ):
remarkup = html
for k, v in images.iteritems():
imageRegexp = re.compile( k )
remarkup = imageRegexp.sub( r'{' + v + ', size=full}', remarkup )
remarkup = remarkup.replace( '&nbsp;', '' )
remarkup = remarkup.replace( '</p>', '' )
remarkup = remarkup.replace( '</ol>', '' )
remarkup = remarkup.replace( '</span>', '' )
remarkup = remarkup.replace( '</ul>', '' )
remarkup = remarkup.replace( '</li>', '' )
remarkup = remarkup.replace( '</h1>', ' =' )
remarkup = remarkup.replace( '</h2>', ' ==' )
remarkup = remarkup.replace( '</h3>', ' ===' )
remarkup = remarkup.replace( '</h4>', ' ====' )
remarkup = remarkup.replace( '</blockquote>', '```' )
remarkup = remarkup.replace( '</b>', '**' )
remarkup = remarkup.replace( '</strong>', '** ' )
remarkup = remarkup.replace( '</s>', '~~' )
remarkup = remarkup.replace( '</strike>', '~~' )
remarkup = remarkup.replace( '<br />', '' )
remarkup = remarkup.replace( '&lt;', '<' )
remarkup = remarkup.replace( '&gt;', '>' )
remarkup = remarkup.replace( '&gt;', '' )
remarkup = remarkup.replace( '&#123;', '{' )
remarkup = remarkup.replace( '&#125;', '}' )
remarkup = remarkup.replace( '</div>', '' )
remarkup = remarkup.replace( '</em>', '//' )
remarkup = re.sub( r'<span[^>]*>', r'', remarkup )
remarkup = re.sub( r'<p[^>]*>', r'', remarkup )
remarkup = re.sub( r'<div[^>]*>', r'', remarkup )
remarkup = re.sub( r'<ol[^>]*>', r'', remarkup )
remarkup = re.sub( r'<ul[^>]*>', r'', remarkup )
# Special case for redundant Mingle links: we turn them into card numbers, to be processed later below
matchMingleLinks = re.compile( '<a .*href="' + mingleSite + '/projects/' + project + '([^"]+)"[^>]*>([^<]+)</a>' )
remarkup = matchMingleLinks.sub( '\\2', remarkup)
remarkup = re.sub( r'<a .*href="([^"]+)"[^>]*>([^<]+)</a>', r' [[\1 | \2]] ', remarkup)
remarkup = re.sub( r'\s*<li[^>]*>', r'\n * ', remarkup )
remarkup = re.sub( r'<h1[^>]*>', r'= ', remarkup )
remarkup = re.sub( r'<h2[^>]*>', r'== ', remarkup )
remarkup = re.sub( r'<h3[^>]*>', r'=== ', remarkup )
remarkup = re.sub( r'<h4[^>]*>', r'==== ', remarkup )
remarkup = re.sub( r'<strong[^>]*>', r' **', remarkup )
remarkup = re.sub( r'<s[^>]*>', r'~~', remarkup )
remarkup = re.sub( r'<blockquote[^>]*>', r'```', remarkup )
remarkup = re.sub( r'<b[^>]*>', r'**', remarkup )
remarkup = re.sub( r'<em[^>]*>', r'//', remarkup )
remarkup = re.sub( r'#([0-9]+)', ' [[' + mingleSite + '/projects/' + project + '/cards/\\1 | #\\1]] ', remarkup )
return remarkup
def postComment( cardUrl, phabId, username, datetime, comment, mingleSite, project ):
if username in userMap:
username = '@' + userMap[ username ]
comment = ghettoHtmlToRemarkup( comment, {}, mingleSite, project )
date = datetime.replace( 'T', ' at ' )
date = date.replace( 'Z', '' )
comment = '>>! In [[' + cardUrl + ' | mingle]] on ' + date + ', ' + username + ' wrote:\n\n' + comment
phab.maniphest.update( id=phabId, comments=comment )
def processCard( mingleSite, project, cardNumber ):
url = mingleSite + '/api/v2/projects/' + project + '/cards/' + str( cardNumber ) + '.xml'
try: cardResponse = urllib2.urlopen( url, timeout=30 )
except urllib2.URLError as e:
print "Card " + str( cardNumber ) + " not found"
return
xmlDocument = minidom.parseString( cardResponse.read() )
cardType = xmlDocument.getElementsByTagName( 'card_type' )[0].getElementsByTagName( 'name' )[0].firstChild.nodeValue
if not cardType in cardTypeWhitelist:
print "Card " + str( cardNumber ) + " is an undesirable card type (" + cardType + ")"
return
descriptionUrl = xmlDocument.getElementsByTagName( 'rendered_description' )[0].attributes['url'].value
descriptionResponse = urllib2.urlopen( descriptionUrl, timeout=30 )
descriptionHtml = descriptionResponse.read()
images = transloadImages( descriptionHtml )
name = xmlDocument.getElementsByTagName( 'name' )[0].firstChild.nodeValue
descriptionSimpleHtmlElement = xmlDocument.getElementsByTagName( 'description' )[0].firstChild
if descriptionSimpleHtmlElement == None:
descriptionSimpleHtml = ''
else:
descriptionSimpleHtml = descriptionSimpleHtmlElement.nodeValue
cardUrl = mingleSite + '/projects/' + project + '/cards/' + str( cardNumber )
description = '//Migrated from: ' + cardUrl + ' //\n\n' + ghettoHtmlToRemarkup( descriptionSimpleHtml, images, mingleSite, project )
projects = [ defaultProject ]
properties = xmlDocument.getElementsByTagName( 'property' )
projectCardId = 0
projectPriority = 90
projectOwner = None
projectStatus = 'open'
for prop in properties:
propName = prop.getElementsByTagName( 'name' )[0].firstChild.nodeValue
if propName == mingleProjectProperty:
numberElements = prop.getElementsByTagName( 'number' )
if len( numberElements ) > 0:
projectCardId = int( numberElements[0].firstChild.nodeValue )
if propName == minglePriorityProperty:
priority = prop.getElementsByTagName( 'value' )[0].firstChild
if priority != None and priority.nodeValue in priorityMap:
projectPriority = priorityMap[ priority.nodeValue ]
if propName == mingleOwnerProperty:
ownerElements = prop.getElementsByTagName( 'login' )
if len( ownerElements ) > 0:
owner = ownerElements[0].firstChild.nodeValue
if owner in ownerMap:
projectOwner = ownerMap[ owner ]
if propName == mingleStatusProperty:
status = prop.getElementsByTagName( 'value' )[0].firstChild
if status != None and status.nodeValue in statusMap:
projectStatus = statusMap[ status.nodeValue ]
if projectCardId in projectMap:
projects.append( projectMap[ projectCardId ] )
result = phab.maniphest.createtask( title=name, description=description, projectPHIDs=projects, priority=projectPriority, ownerPHID=projectOwner )
commentsUrl = mingleSite + '/api/v2/projects/' + project + '/cards/' + str( cardNumber ) + '/comments.xml'
commentsResponse = urllib2.urlopen( commentsUrl, timeout=30 )
commentsXmlDocument = minidom.parseString( commentsResponse.read() )
comments = commentsXmlDocument.getElementsByTagName( 'comment' )
for comment in reversed( comments ):
content = comment.getElementsByTagName( 'content' )[0].firstChild.nodeValue
datetime = comment.getElementsByTagName( 'created_at' )[0].firstChild.nodeValue
username = comment.getElementsByTagName( 'login' )[0].firstChild.nodeValue
postComment( cardUrl, result.response['id'], username, datetime, content, mingleSite, project )
if projectStatus != 'open':
phab.maniphest.update( id=result.response['id'], status=projectStatus)
print result.response['uri']
for i in range(1, 1100):
processCard( 'https://wikimedia.mingle.thoughtworks.com', 'multimedia', i )

Event Timeline

Gilles edited the content of this paste. (Show Details)Dec 8 2014, 6:07 PM
Gilles changed the title of this paste from untitled to "Bot" to migrate cards from mingle to phabricator.
Gilles updated the paste's language from autodetect to python.
Gilles added a project: Phabricator.
Gilles added a comment.EditedDec 8 2014, 6:10 PM

Requires those libraries:
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
https://github.com/disqus/python-phabricator

The latter relies on the local arcanist configuration: https://secure.phabricator.com/book/phabricator/article/arcanist/.

You must point this to https://phab-01.wmflabs.org/ while you're developing and testing.

Qgil awarded a token.Dec 8 2014, 6:56 PM
greg awarded a token.Dec 8 2014, 7:11 PM
hashar awarded a token.Dec 8 2014, 8:57 PM
hashar added a subscriber: hashar.
hashar removed a subscriber: hashar.
Gilles edited the content of this paste. (Show Details)Dec 10 2014, 3:52 PM
Tgr added a subscriber: Tgr.Dec 10 2014, 8:57 PM

Should probably be changed to make bugs searchable by Mingle id - the backlink is not enough due to the shortcomings of Phabricator's search. See T78191.

@Gilles this is phabulous! You should commit it to the phabricator/tools repo.

Arrbee added a subscriber: Arrbee.Dec 11 2014, 3:10 PM