scrape deployment calendar for patches to swat

Authored by mmodell on Aug 19 2016, 6:38 AM.
from bs4 import BeautifulSoup
from datetime import datetime
import dateutil.parser
from import tzlocal
from dateutil.relativedelta import relativedelta
import re
import requests
import scap.cli as cli
@cli.command('swat', help='Mediawiki SWAT deployment helper.')
class Swat(cli.Application):
@cli.argument('--cherry-pick', action='store_true',
help='Cherry-pick the patches instead of merging.')
@cli.argument('-b', '--branch', nargs="+",
help='One or more branches to merge into. Default: current branch.')
@cli.argument('--base', nargs=1,
help='Rebase the patch against this commit before merging.')
@cli.argument('--changeid', nargs="+",
help='The ChangeId of a patch to merge.')
@cli.argument('--start', action='store_true',
help='Enqueue patches from the deployment calendar and begin SWAT '
+ 'deployment.')
def main(self, *extra_args):
if self.arguments.start == True:
def scrape_deployment_calendar(self):
gerrit_link_re = re.compile('')
def match_gerrit_link(tag):
return ( == 'a'
and tag.has_attr('href')
and gerrit_link_re.match(tag['href']))
r = requests.get('')
soup = BeautifulSoup( r.text, 'lxml' )
# find all swat deploy windows on the deployment calendar
for tag in soup.find_all(title="SWAT deploys"):
# look up 3 levels to find the TR tag
row = tag.parent.parent.parent
if != 'tr':
# get the timestamp from the TR tag's id attribute
datestring = row['id'].rsplit('-', 1)[1] + "Z"
# parse the date string
window_start = dateutil.parser.parse(datestring)
window_end = window_start + relativedelta(hours=+1)
# if the current time is not within this window, continue to next
if NOW <= window_start or NOW > window_end:
# get all the names for developers with patches to deploy
people = row.find_all(class_='ircnick-container')
if len(people):
print("from %s to %s" % (window_start, window_end))
for person in people:
name = person.text
cell = person.find_parent('td')
if == 'p':
# All but the first developers' names are nested inside a <p>
r = person.parent
r = person
# get the list immediately following the developer name
nextTag = r.find_next_sibling('ul')
if not nextTag:
# find all gerrit links in this list
links = nextTag.find_all(match_gerrit_link)
if not links:
print("------ ------------------------------------------")
for link in links:
print("%s - %s" % (link.text, link['href']))