Page MenuHomePhabricator
Paste P15202

(An Untitled Masterwork)
ActivePublic

Authored by Kormat on Wed, Apr 7, 7:43 AM.
root@db1077.eqiad.wmnet[parsercache]> stop slave;
Query OK, 0 rows affected (0.080 sec)
root@db1077:/srv/sqldata/parsercache# du -ms pc052.ibd
11277 pc052.ibd
root@db1077.eqiad.wmnet[parsercache]> optimize table pc052;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table | Op | Msg_type | Msg_text |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc052 | optimize | note | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc052 | optimize | status | OK |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (3 min 22.271 sec)
root@db1077:/srv/sqldata/parsercache# du -ms pc052.ibd
9921 pc052.ibd
root@db1077.eqiad.wmnet[parsercache]> select count(keyname) from pc052 where keyname like '%dtreply=1%';
+----------------+
| count(keyname) |
+----------------+
| 3305 |
+----------------+
root@db1077.eqiad.wmnet[parsercache]> delete from pc052 where keyname like '%dtreply=1%';
Query OK, 3305 rows affected (3.007 sec)
root@db1077.eqiad.wmnet[parsercache]> optimize table pc052;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table | Op | Msg_type | Msg_text |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc052 | optimize | note | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc052 | optimize | status | OK |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (50.657 sec)
root@db1077:/srv/sqldata/parsercache# du -ms pc052.ibd
9881 pc052.ibd
root@db1077.eqiad.wmnet[parsercache]> select count(*) from pc052;
+----------+
| count(*) |
+----------+
| 1247236 |
+----------+
>>> 3305/(3305+1247236)
0.0026428561718488237
>>> 1-(9881/9921)
0.004031851627860128
Percentage of keys that contain dtreply=1: 0.26%
Disk space percentage used by dtreply=1 keys: 0.4%

Event Timeline

root@db1077.eqiad.wmnet[parsercache]> select count(*) from pc199;
+----------+
| count(*) |
+----------+
|  1248326 |
+----------+

root@db1077.eqiad.wmnet[parsercache]> optimize table pc199;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc199 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc199 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (3 min 29.366 sec)

root@db1077:/srv/sqldata/parsercache# du -ms pc199.ibd
9925	pc199.ibd

root@db1077.eqiad.wmnet[parsercache]> select count(keyname) from pc199 where keyname like 'arwiki:%dtreply=1%';
+----------------+
| count(keyname) |
+----------------+
|           2130 |
+----------------+

root@db1077.eqiad.wmnet[parsercache]> select count(keyname) from pc199 where keyname like 'arwiki:%';
+----------------+
| count(keyname) |
+----------------+
|          20632 |
+----------------+

10% of arwiki rows are dtreply=1

root@db1077.eqiad.wmnet[parsercache]> delete from pc199 where keyname like 'arwiki:%dtreply=1%';
Query OK, 2130 rows affected (0.185 sec)

root@db1077.eqiad.wmnet[parsercache]> optimize table pc199;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc199 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc199 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (42.342 sec)

root@db1077:/srv/sqldata/parsercache# du -ms pc199.ibd
9905	pc199.ibd

root@db1077.eqiad.wmnet[parsercache]> delete from pc199 where keyname like 'arwiki:%';
Query OK, 18502 rows affected (0.668 sec)


root@db1077.eqiad.wmnet[parsercache]> optimize table pc199;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc199 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc199 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (46.510 sec)

root@db1077:/srv/sqldata/parsercache# du -ms pc199.ibd
9749	pc199.ibd

arwiki total:
>>> 9925-9749
176

arwiki dtreply=1:
>>> 9925-9905
20

arwiki dtreply=1 percentage:
>>> 20/176
0.11363636363636363
root@db1077.eqiad.wmnet[parsercache]> select count(*) from pc127;
+----------+
| count(*) |
+----------+
|  1249354 |
+----------+

root@db1077:/srv/sqldata/parsercache# du -ms pc127.ibd
11197	pc127.ibd

root@db1077.eqiad.wmnet[parsercache]> optimize table pc127;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc127 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc127 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (3 min 18.082 sec)

root@db1077:/srv/sqldata/parsercache# du -ms pc127.ibd
9929	pc127.ibd

root@db1077.eqiad.wmnet[parsercache]> select count(keyname) from pc127 where keyname like 'arwiki:%dtreply=1%';
+----------------+
| count(keyname) |
+----------------+
|           2073 |
+----------------+

root@db1077.eqiad.wmnet[parsercache]> select count(keyname) from pc127 where keyname like 'arwiki:%';
+----------------+
| count(keyname) |
+----------------+
|          20564 |
+----------------+

10% of arwiki keys have dtreply=1

root@db1077.eqiad.wmnet[parsercache]> delete from pc127 where keyname like 'arwiki:%dtreply=1%';
Query OK, 2073 rows affected (0.150 sec)

root@db1077.eqiad.wmnet[parsercache]> optimize table pc127;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc127 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc127 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+

root@db1077:/srv/sqldata/parsercache# du -ms pc127.ibd
9905	pc127.ibd

root@db1077.eqiad.wmnet[parsercache]> delete from pc127 where keyname like 'arwiki:%';
Query OK, 18491 rows affected (0.648 sec)

root@db1077.eqiad.wmnet[parsercache]> optimize table pc127;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc127 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc127 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (48.506 sec)

root@db1077:/srv/sqldata/parsercache# du -ms pc127.ibd
9749	pc127.ibd

arwiki total:
>>> 9929-9749
180

arwiki dtreply=1:
>>> 9929-9905
24

arwiki dtreply=1 percentage:
>>> 24/180
0.13333333333333333

Summary

dtreply=1 keys currently are responsible for ~0.4% of disk space usage in total. This on it's own is fine. However, it's only enabled for 3 wikis currently (ar/cs/hu).

Looking at arwiki, we can see the extension is responsible for ~12% of arwiki's disk space. This implies that rolling it out to large wikis will have a very significant impact on disk usage.

Testing with pc1010 (do not use db1077 anymore as it was truncated two days ago due to disk space issues - I think Stevie's results might have been a bit affected by that, as it was also recently truncated (a few weeks ago), I don't recall exactly when):

Table pc200:

mysql:root@localhost [parsercache]> select count(*) from pc200;
+----------+
| count(*) |
+----------+
|  1417394 |
+----------+
1 row in set (0.861 sec)

mysql:root@localhost [parsercache]> select count(keyname) from pc200 where keyname like '%dtreply%';
+----------------+
| count(keyname) |
+----------------+
|           4211 |
+----------------+
1 row in set (0.440 sec)

root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc200.ibd
12513	pc200.ibd

mysql:root@localhost [parsercache]> optimize table pc200;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc200 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc200 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (3 min 41.359 sec)

root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc200.ibd
11337	pc200.ibd


mysql:root@localhost [parsercache]> delete from pc200 where keyname like '%dtreply=1%';
Query OK, 4211 rows affected (2.924 sec)

mysql:root@localhost [parsercache]> optimize table pc200;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc200 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc200 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (52.777 sec)

root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc200.ibd
11241	pc200.ibd

mysql:root@localhost [parsercache]> delete from pc210 where keyname like '%dtreply=1%';
Query OK, 4143 rows affected (3.114 sec)

mysql:root@localhost [parsercache]> optimize table pc210;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc210 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc210 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (43.946 sec)

root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc210.ibd
11249	pc210.ibd

0.2% of the keys on pc200 table are dtreply.
Deleting those rows+optimizing gave us 96MB back, that is around 0.8% disk space of the table.

Table pc210:

mysql:root@localhost [parsercache]> select count(*) from pc210;
+----------+
| count(*) |
+----------+
|  1417722 |
+----------+
1 row in set (0.398 sec)


mysql:root@localhost [parsercache]> select count(keyname) from pc210 where keyname like '%dtreply%';
+----------------+
| count(keyname) |
+----------------+
|           4143 |
+----------------+
1 row in set (0.448 sec)

root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc210.ibd
12481	pc210.ibd


mysql:root@localhost [parsercache]> optimize table pc210;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc210 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc210 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (3 min 25.917 sec)

root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc210.ibd
11341	pc210.ibd

0.29% of the keys on pc210 table are dtreply.
Deleting those rows+optimizing gave us 110MB back, that is around 1% disk space of the table.

Table pc150:

mysql:root@localhost [parsercache]> select count(*) from pc150;
+----------+
| count(*) |
+----------+
|  1415264 |
+----------+
1 row in set (1.374 sec)

mysql:root@localhost [parsercache]> select count(keyname) from pc150 where keyname like '%dtreply%';
+----------------+
| count(keyname) |
+----------------+
|           4149 |
+----------------+
1 row in set (0.501 sec)

root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc150.ibd
12393	pc150.ibd

mysql:root@localhost [parsercache]> optimize table pc150;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc150 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc150 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (3 min 34.128 sec)

root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc150.ibd
11313	pc150.ibd

mysql:root@localhost [parsercache]> delete from pc150 where keyname like '%dtreply=1%';
Query OK, 4149 rows affected (2.755 sec)

mysql:root@localhost [parsercache]> optimize table pc150;
+-------------------+----------+----------+-------------------------------------------------------------------+
| Table             | Op       | Msg_type | Msg_text                                                          |
+-------------------+----------+----------+-------------------------------------------------------------------+
| parsercache.pc150 | optimize | note     | Table does not support optimize, doing recreate + analyze instead |
| parsercache.pc150 | optimize | status   | OK                                                                |
+-------------------+----------+----------+-------------------------------------------------------------------+
2 rows in set (43.764 sec)

root@pc1010:/srv/sqldata-cache/parsercache# du -ms pc150.ibd
11221	pc150.ibd

0.29% of the keys on pc150 table are dtreply.
Deleting those rows+optimizing gave us 92MB back, that is around 0.8% disk space of the table.

We can probably assume that for all the 255 tables, they all have 1% of those keys (despite on which wiki the feature is enabled by default or not)