Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Paste
P13068
(An Untitled Masterwork)
Active
Public
Actions
Authored by
RLazarus
on Oct 26 2020, 1:50 PM.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Award Token
Flag For Later
Tags
None
Referenced Files
F32414686: raw-paste-data.txt
Oct 26 2020, 1:50 PM
2020-10-26 13:50:13 (UTC+0)
Subscribers
None
rzl
@cumin1001
:
~
$
cat
pool_services
.
py
import
datetime
import
json
import
logging
import
pprint
import
subprocess
import
sys
import
time
from
typing
import
Dict
DC_TO
=
'eqiad'
DC_FROM
=
'codfw'
# /srv/deployment/spicerack$ python3
# >>> from cookbooks.sre.switchdc import services
# >>> set(services.load_services().keys()) - set(services.EXCLUDED_SERVICES) - set(services.MEDIAWIKI_SERVICES)
SERVICES
=
[
'apertium'
,
'api-gateway'
,
'citoid'
,
'cxserver'
,
'echostore'
,
'eventgate-analytics'
,
'eventgate-analytics-external'
,
'eventgate-logging-external'
,
'eventgate-main'
,
'eventstreams'
,
'graphoid'
,
'kartotherian'
,
'mathoid'
,
'mobileapps'
,
'ores'
,
'parsoid'
,
'proton'
,
'push-notifications'
,
'recommendation-api'
,
'restbase'
,
'restbase-async'
,
'schema'
,
'search'
,
'sessionstore'
,
'termbox'
,
'wdqs'
,
'wdqs-internal'
,
'wikifeeds'
,
'zotero'
]
SERVICE_RE
=
'|'
.
join
(
SERVICES
)
logging
.
basicConfig
(
format
=
'[
%(levelname)s
%(asctime)s
]
%(message)s
'
,
level
=
logging
.
INFO
)
def
check_preconditions
():
# Check preconditions: Each service should be pooled in DC_FROM. We don't check DC_TO status
# because repooling is idempotent.
out
=
subprocess
.
run
(
[
'/usr/bin/confctl'
,
'--object-type'
,
'discovery'
,
'select'
,
f
'dnsdisc={SERVICE_RE}'
,
'get'
],
check
=
True
,
capture_output
=
True
,
text
=
True
)
.
stdout
pooled
:
Dict
[
str
,
Dict
[
str
,
bool
]]
=
{}
# e.g. pooled['swift']['eqiad'] = False (depooled)
for
line
in
out
.
splitlines
():
data
=
json
.
loads
(
line
)
dc
=
next
(
key
for
key
in
data
if
key
!=
'tags'
)
service
=
data
[
'tags'
]
.
split
(
'='
,
1
)[
1
]
is_pooled
=
data
[
dc
][
'pooled'
]
pooled
.
setdefault
(
service
,
{})[
dc
]
=
is_pooled
logging
.
info
(
f
'Starting state:
\n
{pprint.pformat(pooled)}'
)
errors
=
False
for
service
in
SERVICES
:
if
service
not
in
pooled
:
logging
.
error
(
f
'{service} is not in conftool data.'
)
errors
=
True
continue
if
not
pooled
[
service
][
DC_FROM
]:
logging
.
error
(
f
'{service} is not pooled in {DC_FROM}.'
)
errors
=
True
return
errors
def
set_ttls
(
seconds
:
int
):
logging
.
info
(
f
'Setting TTLs to {seconds} seconds...'
)
subprocess
.
run
([
'/usr/bin/confctl'
,
'--object-type'
,
'discovery'
,
'select'
,
f
'dnsdisc={SERVICE_RE},name={DC_TO}'
,
f
'set/ttl={seconds}'
],
check
=
True
)
def
pool_services
():
# Pool services one at a time, sleeping in between.
for
i
,
service
in
enumerate
(
SERVICES
):
if
i
>
0
:
# No need to sleep before the first one.
logging
.
info
(
f
'Pausing 3m before pooling {service}...'
)
time
.
sleep
(
180
)
logging
.
info
(
f
'Pooling {service}...'
)
subprocess
.
run
([
'/usr/bin/confctl'
,
'--object-type'
,
'discovery'
,
'select'
,
f
'dnsdisc={service},name={DC_TO}'
,
'set/pooled=true'
],
check
=
True
)
def
main
()
->
int
:
errors
=
check_preconditions
()
if
errors
:
return
1
# Reduce TTLs. This isn't strictly needed when repooling, but it means we can revert more
# quickly if needed, and means troubleshooting data in graphs will be sharper.
set_ttls
(
10
)
logging
.
info
(
'Waiting 5m for original TTL to expire...'
)
time
.
sleep
(
300
)
pool_services
()
# Restore TTLs.
set_ttls
(
300
)
logging
.
info
(
'Done.'
)
return
0
if
__name__
==
'__main__'
:
sys
.
exit
(
main
())
Event Timeline
RLazarus
created this paste.
Oct 26 2020, 1:50 PM
2020-10-26 13:50:13 (UTC+0)
Log In to Comment