Page MenuHomePhabricator
Paste P32761

percentiles for source data
ActivePublic

Authored by EBernhardson on Aug 22 2022, 11:07 PM.
Tags
None
Referenced Files
F35484503: percentiles for source data
Aug 22 2022, 11:07 PM
Subscribers
None
curl -H 'Content-Type: application/json' https://search.svc.codfw.wmnet:9243/viwiki_content/_search?pretty -d @agg_source_percentiles.json
{
"took" : 26749,
"timed_out" : false,
"_shards" : {
"total" : 6,
"successful" : 6,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1275406,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"source_size_percentiles" : {
"values" : {
"95.0" : 24481.140067274257,
"99.0" : 64590.40516208515,
"99.9" : 190648.60697161386,
"99.99" : 410348.0594599379,
"99.999" : 640872.1392799616,
"99.9999" : 966307.698398486,
"99.99999" : 1088590.0
}
}
}
}
query source: (see https://phabricator.wikimedia.org/P32760 for unmangled):
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"source_size_percentiles": {
"percentiles": {
"percents": [95, 99, 99.9, 99.99, 99.999, 99.9999, 99.99999],
"script": {
"source": "int fieldLength(def source, String field) { if (!source.containsKey(field)) { return 0; } return valueLength(source.get(field));}int valueLength(def value) { if (value == null) { return 0; } else if (value instanceof List) { int size = 0; for (def item : value) { size += valueLength(item); } return size; } else if (value instanceof String) { return value.length(); } else if (value instanceof Map) { int size = 0; for (def entry : value.entrySet()) { size += valueLength(entry.getKey()); size += valueLength(entry.getValue()); } return size; } else { /* int, long, etc */ return value.toString().length(); }}return fieldLength(params._source, 'auxiliary_text') + fieldLength(params._source, 'category') + fieldLength(params._source, 'external_link') + fieldLength(params._source, 'heading') + fieldLength(params._source, 'language') + fieldLength(params._source, 'outgoing_link') + fieldLength(params._source, 'redirect') + fieldLength(params._source, 'template') + fieldLength(params._source, 'weighted_tags') + fieldLength(params._source, 'content_model') + fieldLength(params._source, 'create_timestamp') + fieldLength(params._source, 'file_bits') + fieldLength(params._source, 'file_height') + fieldLength(params._source, 'file_media_type') + fieldLength(params._source, 'file_mime') + fieldLength(params._source, 'file_resolution') + fieldLength(params._source, 'file_size') + fieldLength(params._source, 'file_text') + fieldLength(params._source, 'file_width') + fieldLength(params._source, 'local_sites_with_dupe') + fieldLength(params._source, 'namespace_text') + fieldLength(params._source, 'opening_text') + fieldLength(params._source, 'source_text') + fieldLength(params._source, 'text') + fieldLength(params._source, 'timestamp') + fieldLength(params._source, 'title') + fieldLength(params._source, 'wiki') + fieldLength(params._source, 'wikibase_item');"
}
}
}
}
}