Page MenuHomePhabricator

ORES blocking itself on insert and delete
Closed, ResolvedPublic

Description

From T201412#4500670

Multiple instances of ORES blocking itself on insert and delete (needs better ordering so it doesn't cause contention):

{
  "_index": "logstash-2018.08.14",
  "_type": "mediawiki",
  "_id": "AWU3ARTb8VFZOIjHvo7Q",
  "_version": 1,
  "_score": null,
  "_source": {
    "server": "eu.wikipedia.org",
    "db_server": "10.64.0.205",
    "wiki": "euwiki",
    "channel": "DBQuery",
    "type": "mediawiki",
    "error": "Deadlock found when trying to get lock; try restarting transaction (10.64.0.205)",
    "http_method": "GET",
    "@version": 1,
    "host": "mw1312",
    "shard": "s3",
    "sql1line": "INSERT IGNORE INTO `ores_classification` (oresc_rev,oresc_model,oresc_class,oresc_probability,oresc_is_predicted) VALUES ('6382416','77','0','0.52696843053581','0'),('6382417','77','0','0.34148302655215','0'),('6382419','77','0','0.34148302655215','0'),('6382420','77','0','0.34148302655215','0'),('6382421','77','0','0.22037256092671','0'),('6382422','77','0','0.48006704197061','0'),('6382423','77','0','0.15567784718498','0'),('6382426','77','0','0.22797251192274','0'),('6382427','77','0','0.22797251192274','0'),('6382428','77','0','0.56836638467799','0'),('6382429','77','0','0.56836638467799','0'),('6382430','77','0','0.34202639222287','0'),('6382431','77','0','0.34202639222287','0'),('6382432','77','0','0.33486016107173','0'),('6382433','77','0','0.33486016107173','0'),('6382434','77','0','0.34613081505756','0'),('6382435','77','0','0.56836638467799','0'),('6382436','77','0','0.35359614666786','0'),('6382437','77','0','0.56836638467799','0'),('6382438','77','0','0.56836638467799','0'),('6382439','77','0','0.34857276119443','0'),('6382440','77','0','0.56836638467799','0'),('6382441','77','0','0.56836638467799','0'),('6382442','77','0','0.45711496506235','0'),('6382443','77','0','0.34451258218223','0'),('6382444','77','0','0.56836638467799','0'),('6382445','77','0','0.56836638467799','0'),('6382446','77','0','0.17095028291659','0'),('6382447','77','0','0.45711496506235','0'),('6382448','77','0','0.16681720837499','0')",
    "fname": "ORES\\Storage\\SqlScoreStorage::storeScores",
    "errno": 1213,
    "unique_id": "W3Ju3QpAAK4AAIyy@g8AAABE",
    "method": "Wikimedia\\Rdbms\\Database::makeQueryException",
    "level": "ERROR",
    "ip": "10.64.16.22",
    "mwversion": "1.32.0-wmf.16",
    "message": "ORES\\Storage\\SqlScoreStorage::storeScores\t10.64.0.205\t1213\tDeadlock found when trying to get lock; try restarting transaction (10.64.0.205)\tINSERT IGNORE INTO `ores_classification` (oresc_rev,oresc_model,oresc_class,oresc_probability,oresc_is_predicted) VALUES ('6382416','77','0','0.52696843053581','0'),('6382417','77','0','0.34148302655215','0'),('6382419','77','0','0.34148302655215','0'),('6382420','77','0','0.34148302655215','0'),('6382421','77','0','0.22037256092671','0'),('6382422','77','0','0.48006704197061','0'),('6382423','77','0','0.15567784718498','0'),('6382426','77','0','0.22797251192274','0'),('6382427','77','0','0.22797251192274','0'),('6382428','77','0','0.56836638467799','0'),('6382429','77','0','0.56836638467799','0'),('6382430','77','0','0.34202639222287','0'),('6382431','77','0','0.34202639222287','0'),('6382432','77','0','0.33486016107173','0'),('6382433','77','0','0.33486016107173','0'),('6382434','77','0','0.34613081505756','0'),('6382435','77','0','0.56836638467799','0'),('6382436','77','0','0.35359614666786','0'),('6382437','77','0','0.56836638467799','0'),('6382438','77','0','0.56836638467799','0'),('6382439','77','0','0.34857276119443','0'),('6382440','77','0','0.56836638467799','0'),('6382441','77','0','0.56836638467799','0'),('6382442','77','0','0.45711496506235','0'),('6382443','77','0','0.34451258218223','0'),('6382444','77','0','0.56836638467799','0'),('6382445','77','0','0.56836638467799','0'),('6382446','77','0','0.17095028291659','0'),('6382447','77','0','0.45711496506235','0'),('6382448','77','0','0.16681720837499','0')",
    "normalized_message": "{fname}\t{db_server}\t{errno}\t{error}\t{sql1line}",
    "url": "/w/api.php?action=query&format=json&continue=-%7C%7C&rccontinue=20180716144359%7C22924687&list=recentchanges&rclimit=500&rcnamespace=0%7C2%7C4%7C6%7C10%7C14%7C100%7C828&rcprop=comment%7Cflags%7Cids%7Cloginfo%7Coresscores%7Csizes%7Ctags%7Ctimestamp%7Ctitle%7Cuser",
    "tags": [
      "syslog",
      "es",
      "es"
    ],
    "reqId": "W3Ju3QpAAK4AAIyy@g8AAABE",
    "referrer": null,
    "@timestamp": "2018-08-14T05:55:42.000Z",
    "db_name": "euwiki",
    "db_user": "wikiuser"
  },
  "fields": {
    "@timestamp": [
      1534226142000
    ]
  },
  "sort": [
    1534226142000
  ]
}
{
  "_index": "logstash-2018.08.14",
  "_type": "mediawiki",
  "_id": "AWU3ARQ6KHrb1-g-Ps_P",
  "_version": 1,
  "_score": null,
  "_source": {
    "server": "jobrunner.discovery.wmnet",
    "db_server": "10.64.0.205",
    "wiki": "euwiki",
    "channel": "DBQuery",
    "type": "mediawiki",
    "error": "Deadlock found when trying to get lock; try restarting transaction (10.64.0.205)",
    "http_method": "POST",
    "@version": 1,
    "host": "mw1300",
    "shard": "s3",
    "sql1line": "DELETE FROM `ores_classification` WHERE oresc_rev IN ('6380744','6382466','6382466','5794247','6353451','6382465','6381000','6046271','6382463','6382471','6382474','6382475','6382368','5627264','6112872','6382175','6382494','0','6382496','6382496','6382497','6382461','6382498','6382499','6206904','6382506','5953789','5962015','5960567','0','0','0','0','0','0','0','6382519','6382508','6382498','6382498','6382498','6382484','6382515','6382484','6382489','6382469','6382513','6382489','6382489','6382515','6382484','6382515','6382518','6382473')  AND oresc_model = '77'",
    "fname": "ORES\\Storage\\SqlScoreStorage::cleanUpOldScores",
    "errno": 1213,
    "method": "Wikimedia\\Rdbms\\Database::makeQueryException",
    "level": "ERROR",
    "ip": "10.64.16.65",
    "mwversion": "1.32.0-wmf.16",
    "message": "ORES\\Storage\\SqlScoreStorage::cleanUpOldScores\t10.64.0.205\t1213\tDeadlock found when trying to get lock; try restarting transaction (10.64.0.205)\tDELETE FROM `ores_classification` WHERE oresc_rev IN ('6380744','6382466','6382466','5794247','6353451','6382465','6381000','6046271','6382463','6382471','6382474','6382475','6382368','5627264','6112872','6382175','6382494','0','6382496','6382496','6382497','6382461','6382498','6382499','6206904','6382506','5953789','5962015','5960567','0','0','0','0','0','0','0','6382519','6382508','6382498','6382498','6382498','6382484','6382515','6382484','6382489','6382469','6382513','6382489','6382489','6382515','6382484','6382515','6382518','6382473')  AND oresc_model = '77'",
    "normalized_message": "{fname}\t{db_server}\t{errno}\t{error}\t{sql1line}",
    "url": "/rpc/RunSingleJob.php",
    "tags": [
      "syslog",
      "es",
      "es"
    ],
    "reqId": "W3JuzApAEDUAAGUTSS0AAABL",
    "referrer": null,
    "@timestamp": "2018-08-14T05:55:41.000Z",
    "db_name": "euwiki",
    "db_user": "wikiuser"
  },
  "fields": {
    "@timestamp": [
      1534226141000
    ]
  },
  "sort": [
    1534226141000
  ]
}

Event Timeline

Restricted Application added a subscriber: Aklapper. · View Herald Transcript

I see what's going on here now. It's because the logic for score storage has been duplicated several places and they are getting out of sync. More precisely, the API hook handler in the ores extension caches scores for any queried revision (that was asked about ores scores too) which can be not the latest revision of the page and then tries to cache it, which is basically telling the ScoreStorage to cleanup old scores but the score that is just put is already old and then another revision comes in.
I was against putting this logic in the API hook handler. Now I just remove it.

Change 452990 had a related patch set uploaded (by Ladsgroup; owner: Amir Sarabadani):
[mediawiki/extensions/ORES@master] Do not try to cache scores that have been requested via API

https://gerrit.wikimedia.org/r/452990

Change 452990 merged by jenkins-bot:
[mediawiki/extensions/ORES@master] Do not try to cache scores that have been requested via API

https://gerrit.wikimedia.org/r/452990