Page MenuHomePhabricator
Paste P16317

Script to get word count of all wikis from Toolforge CirrusSearch elasticsearch replicas
ActivePublic

Authored by LucasWerkmeister on Jun 7 2021, 9:21 PM.
#!/usr/bin/env bash
for port in 8243 8443 8643; do
curl -s https://cloudelastic.wikimedia.org:"$port"/_aliases |
jq -r '
del(.[".ltrstore"]) |
del(.[".tasks"]) |
to_entries |
.[] |
.value.aliases |
keys |
.[1]
' |
grep -F _content |
while IFS= read -r index; do
words=$(curl \
-s \
-XGET \
-H 'Content-Type: application/json' \
-d '{"query":{"bool":{"filter":[{"terms":{"namespace":[0]}}]}},"aggs":{"word_count":{"sum":{"field":"text.word_count"}}},"stats":["sum_word_count"]}' \
https://cloudelastic.wikimedia.org:"$port"/"$index"/_search |
jq -r .aggregations.word_count.value);
printf '%25s %15d\n' "${index%_content}" "$words";
done;
done