Event Timeline
root@db1077.eqiad.wmnet[parsercache]> select count(*) from pc199; +----------+ | count(*) | +----------+ | 1248326 | +----------+ root@db1077.eqiad.wmnet[parsercache]> optimize table pc199; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc199 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc199 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ 2 rows in set (3 min 29.366 sec) root@db1077:/srv/sqldata/parsercache# du -ms pc199.ibd 9925 pc199.ibd root@db1077.eqiad.wmnet[parsercache]> select count(keyname) from pc199 where keyname like 'arwiki:%dtreply=1%'; +----------------+ | count(keyname) | +----------------+ | 2130 | +----------------+ root@db1077.eqiad.wmnet[parsercache]> select count(keyname) from pc199 where keyname like 'arwiki:%'; +----------------+ | count(keyname) | +----------------+ | 20632 | +----------------+ 10% of arwiki rows are dtreply=1 root@db1077.eqiad.wmnet[parsercache]> delete from pc199 where keyname like 'arwiki:%dtreply=1%'; Query OK, 2130 rows affected (0.185 sec) root@db1077.eqiad.wmnet[parsercache]> optimize table pc199; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc199 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc199 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ 2 rows in set (42.342 sec) root@db1077:/srv/sqldata/parsercache# du -ms pc199.ibd 9905 pc199.ibd root@db1077.eqiad.wmnet[parsercache]> delete from pc199 where keyname like 'arwiki:%'; Query OK, 18502 rows affected (0.668 sec) root@db1077.eqiad.wmnet[parsercache]> optimize table pc199; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc199 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc199 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ 2 rows in set (46.510 sec) root@db1077:/srv/sqldata/parsercache# du -ms pc199.ibd 9749 pc199.ibd arwiki total: >>> 9925-9749 176 arwiki dtreply=1: >>> 9925-9905 20 arwiki dtreply=1 percentage: >>> 20/176 0.11363636363636363
root@db1077.eqiad.wmnet[parsercache]> select count(*) from pc127; +----------+ | count(*) | +----------+ | 1249354 | +----------+ root@db1077:/srv/sqldata/parsercache# du -ms pc127.ibd 11197 pc127.ibd root@db1077.eqiad.wmnet[parsercache]> optimize table pc127; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc127 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc127 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ 2 rows in set (3 min 18.082 sec) root@db1077:/srv/sqldata/parsercache# du -ms pc127.ibd 9929 pc127.ibd root@db1077.eqiad.wmnet[parsercache]> select count(keyname) from pc127 where keyname like 'arwiki:%dtreply=1%'; +----------------+ | count(keyname) | +----------------+ | 2073 | +----------------+ root@db1077.eqiad.wmnet[parsercache]> select count(keyname) from pc127 where keyname like 'arwiki:%'; +----------------+ | count(keyname) | +----------------+ | 20564 | +----------------+ 10% of arwiki keys have dtreply=1 root@db1077.eqiad.wmnet[parsercache]> delete from pc127 where keyname like 'arwiki:%dtreply=1%'; Query OK, 2073 rows affected (0.150 sec) root@db1077.eqiad.wmnet[parsercache]> optimize table pc127; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc127 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc127 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ root@db1077:/srv/sqldata/parsercache# du -ms pc127.ibd 9905 pc127.ibd root@db1077.eqiad.wmnet[parsercache]> delete from pc127 where keyname like 'arwiki:%'; Query OK, 18491 rows affected (0.648 sec) root@db1077.eqiad.wmnet[parsercache]> optimize table pc127; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc127 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc127 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ 2 rows in set (48.506 sec) root@db1077:/srv/sqldata/parsercache# du -ms pc127.ibd 9749 pc127.ibd arwiki total: >>> 9929-9749 180 arwiki dtreply=1: >>> 9929-9905 24 arwiki dtreply=1 percentage: >>> 24/180 0.13333333333333333
Summary
dtreply=1 keys currently are responsible for ~0.4% of disk space usage in total. This on it's own is fine. However, it's only enabled for 3 wikis currently (ar/cs/hu).
Looking at arwiki, we can see the extension is responsible for ~12% of arwiki's disk space. This implies that rolling it out to large wikis will have a very significant impact on disk usage.
Testing with pc1010 (do not use db1077 anymore as it was truncated two days ago due to disk space issues - I think Stevie's results might have been a bit affected by that, as it was also recently truncated (a few weeks ago), I don't recall exactly when):
Table pc200:
mysql:root@localhost [parsercache]> select count(*) from pc200; +----------+ | count(*) | +----------+ | 1417394 | +----------+ 1 row in set (0.861 sec) mysql:root@localhost [parsercache]> select count(keyname) from pc200 where keyname like '%dtreply%'; +----------------+ | count(keyname) | +----------------+ | 4211 | +----------------+ 1 row in set (0.440 sec) root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc200.ibd 12513 pc200.ibd mysql:root@localhost [parsercache]> optimize table pc200; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc200 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc200 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ 2 rows in set (3 min 41.359 sec) root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc200.ibd 11337 pc200.ibd mysql:root@localhost [parsercache]> delete from pc200 where keyname like '%dtreply=1%'; Query OK, 4211 rows affected (2.924 sec) mysql:root@localhost [parsercache]> optimize table pc200; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc200 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc200 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ 2 rows in set (52.777 sec) root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc200.ibd 11241 pc200.ibd mysql:root@localhost [parsercache]> delete from pc210 where keyname like '%dtreply=1%'; Query OK, 4143 rows affected (3.114 sec) mysql:root@localhost [parsercache]> optimize table pc210; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc210 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc210 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ 2 rows in set (43.946 sec) root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc210.ibd 11249 pc210.ibd
0.2% of the keys on pc200 table are dtreply.
Deleting those rows+optimizing gave us 96MB back, that is around 0.8% disk space of the table.
Table pc210:
mysql:root@localhost [parsercache]> select count(*) from pc210; +----------+ | count(*) | +----------+ | 1417722 | +----------+ 1 row in set (0.398 sec) mysql:root@localhost [parsercache]> select count(keyname) from pc210 where keyname like '%dtreply%'; +----------------+ | count(keyname) | +----------------+ | 4143 | +----------------+ 1 row in set (0.448 sec) root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc210.ibd 12481 pc210.ibd mysql:root@localhost [parsercache]> optimize table pc210; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc210 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc210 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ 2 rows in set (3 min 25.917 sec) root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc210.ibd 11341 pc210.ibd
0.29% of the keys on pc210 table are dtreply.
Deleting those rows+optimizing gave us 110MB back, that is around 1% disk space of the table.
Table pc150:
mysql:root@localhost [parsercache]> select count(*) from pc150; +----------+ | count(*) | +----------+ | 1415264 | +----------+ 1 row in set (1.374 sec) mysql:root@localhost [parsercache]> select count(keyname) from pc150 where keyname like '%dtreply%'; +----------------+ | count(keyname) | +----------------+ | 4149 | +----------------+ 1 row in set (0.501 sec) root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc150.ibd 12393 pc150.ibd mysql:root@localhost [parsercache]> optimize table pc150; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc150 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc150 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ 2 rows in set (3 min 34.128 sec) root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc150.ibd 11313 pc150.ibd mysql:root@localhost [parsercache]> delete from pc150 where keyname like '%dtreply=1%'; Query OK, 4149 rows affected (2.755 sec) mysql:root@localhost [parsercache]> optimize table pc150; +-------------------+----------+----------+-------------------------------------------------------------------+ | Table | Op | Msg_type | Msg_text | +-------------------+----------+----------+-------------------------------------------------------------------+ | parsercache.pc150 | optimize | note | Table does not support optimize, doing recreate + analyze instead | | parsercache.pc150 | optimize | status | OK | +-------------------+----------+----------+-------------------------------------------------------------------+ 2 rows in set (43.764 sec) root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc150.ibd 11221 pc150.ibd
0.29% of the keys on pc150 table are dtreply.
Deleting those rows+optimizing gave us 92MB back, that is around 0.8% disk space of the table.
We can probably assume that for all the 255 tables, they all have 1% of those keys (despite on which wiki the feature is enabled by default or not)
root@pc1010.eqiad.wmnet[parsercache]> create temporary table pc001_dtvals_arwiki select length(value) as l from pc001 where keyname like 'arwiki:%dtreply=1%'; Query OK, 926 rows affected (0.090 sec) Records: 926 Duplicates: 0 Warnings: 0 root@pc1010.eqiad.wmnet[parsercache]> create temporary table pc001_vals_arwiki select length(value) as l from pc001 where keyname like 'arwiki:%'; Query OK, 15544 rows affected (0.098 sec) Records: 15544 Duplicates: 0 Warnings: 0 root@pc1010.eqiad.wmnet[parsercache]> select sum(l) from pc001_dtvals_arwiki; +---------+ | sum(l) | +---------+ | 7005452 | +---------+ 1 row in set (0.002 sec) root@pc1010.eqiad.wmnet[parsercache]> select sum(l) from pc001_vals_arwiki; +----------+ | sum(l) | +----------+ | 97593298 | +----------+ 1 row in set (0.019 sec) percentage rows: (926/15544)*100 = 5.95% percentage space: (7005452/97593298)*100 = 7.18%
pc1, table pc123:
root@pc1010.eqiad.wmnet[parsercache]> select keyname,length(value) from pc123 where keyname like 'enwiki:pcache:idhash:14596159%'; +--------------------------------------------------------------------------------------+---------------+ | keyname | length(value) | +--------------------------------------------------------------------------------------+---------------+ | enwiki:pcache:idhash:14596159-0!dtreply=1!thumbsize=7!tmh-videojs!responsiveimages=0 | 12848 | +--------------------------------------------------------------------------------------+---------------+ 1 row in set (0.001 sec)
pc3, table pc129:
root@pc1009.eqiad.wmnet[parsercache]> select keyname,length(value) from pc129 where keyname='enwiki:pcache:idhash:14596159-0!dtreply=1!tmh-videojs'; +-------------------------------------------------------+---------------+ | keyname | length(value) | +-------------------------------------------------------+---------------+ | enwiki:pcache:idhash:14596159-0!dtreply=1!tmh-videojs | 12889 | +-------------------------------------------------------+---------------+ 1 row in set (0.002 sec)
pc3, table pc153:
root@pc1009.eqiad.wmnet[parsercache]> select keyname,length(value) from pc153 where keyname='enwiki:pcache:idhash:14596159-0!canonical'; +-------------------------------------------+---------------+ | keyname | length(value) | +-------------------------------------------+---------------+ | enwiki:pcache:idhash:14596159-0!canonical | 12431 | +-------------------------------------------+---------------+ 1 row in set (0.001 sec)
(The queries to find these take about 15-20mins.)
For this one example, the closest thing we have to a comparison between the canonical entry and a dtreply entry shows a 3% larger value.