Page MenuHomePhabricator
Paste P43339

es data check
ActivePublic

Authored by jcrespo on Jan 25 2023, 10:45 AM.
Tags
None
Referenced Files
F36487556: es data check
Jan 25 2023, 10:45 AM
Subscribers
None
# gather the current last id from a "good" db
root@cumin2002:~$ mysql.py -BN -h es2021 information_schema -e "SELECT table_schema FROM tables WHERE table_name='blobs_cluster26'" | while read db; do echo -n "$db "; mysql.py -BN -h es2021 $db -e "SELECT max(blob_id) FROM blobs_cluster26"; done > tables_to_check.txt
# check growth on the largest wikis (e.g. Wikidata grew 3.8 million records since 14th Jan, enwiki 1 million)
root@es2021:/srv/sqldata$ mysqlbinlog es2021-bin.008050 | less
mysql:root@localhost [wikidatawiki]> select count(*) FROM blobs_cluster26 WHERE blob_id >= 354204267;
+----------+
| count(*) |
+----------+
| 3800281 |
+----------+
1 row in set (1.668 sec)
mysql:root@localhost [enwiki]> select count(*) FROM blobs_cluster26 WHERE blob_id >= 105902404;
+----------+
| count(*) |
+----------+
| 999381 |
+----------+
1 row in set (0.424 sec)
# check a buffer on all tables (this is all rows on all wikis except on the largest ones) of the latest 4 million ids
root@cumin2002:~$ grep -v NULL tables_to_check.txt | while read db rows; do echo -e "\n== $db ==\n"; db-compare $db blobs_cl
uster26 blob_id es1021 es2021 es2020 --step=100 --from-value=$(($rows - 4000000)) || break ; done