mobileapps experienced some downtime today, with about 7 minutes between failure and recovery:
15:25 -icinga-wm:#wikimedia-operations- PROBLEM - Mobileapps LVS eqiad on mobileapps.svc.eqiad.wmnet is CRITICAL:
/{domain}/v1/page/mobile-sections-lead/{title} (retrieve lead section of en.wp main page via
mobile-sections-lead) is CRITICAL: Test retrieve lead section of en.wp main page via mobile-sections-lead
returned the unexpected status 500 (expecting: 200): /{domain}/v1/page/mobile-sections-remaining/{title}
(retrieve remaining sections of en.wp main page via mobile-s
15:32 -icinga-wm:#wikimedia-operations- RECOVERY - Mobileapps LVS eqiad on mobileapps.svc.eqiad.wmnet is OK: All
endpoints are healthylooking at the logs on scb1001
with tail -20 /srv/log/mobileapps/main.log | jq 'select(.status == 500)'
mobileapps logs are not in logstash (as per T121571: Improve logstash logging for graphoid, mobileapps, mathoid) but taking for example request-id "5fd74013-0696-44a2-892b-43fd113d486d" yields two logs from restbase
{
"_index": "logstash-2016.06.21",
"_type": "restbase",
"_id": "AVVzbQ_ww3dCNxx2ZHRh",
"_score": null,
"_source": {
"host": "restbase1013",
"level": "ERROR",
"version": "1.0",
"@version": "1",
"@timestamp": "2016-06-21T14:46:14.750Z",
"source_host": "10.64.32.80",
"pid": 1963,
"message": "500: https://mediawiki.org/wiki/HyperSwitch/errors/query_error",
"res.name": "HTTPError",
"res.message": "500: https://mediawiki.org/wiki/HyperSwitch/errors/query_error",
"res.status": 500,
"res.headers.access-control-allow-origin": "*",
"res.headers.access-control-allow-headers": "accept, content-type",
"res.headers.access-control-expose-headers": "etag",
"res.headers.x-xss-protection": "1; mode=block",
"res.headers.x-content-type-options": "nosniff",
"res.headers.x-frame-options": "SAMEORIGIN",
"res.headers.content-security-policy": "default-src 'none'; frame-ancestors 'none'",
"res.headers.x-content-security-policy": "default-src 'none'; frame-ancestors 'none'",
"res.headers.x-webkit-csp": "default-src 'none'; frame-ancestors 'none'",
"res.headers.content-type": "application/json; charset=utf-8",
"res.headers.vary": "Accept-Encoding",
"res.headers.date": "Tue, 21 Jun 2016 14:46:14 GMT",
"res.headers.access-control-allow-methods": "GET",
"res.headers.cache-control": "private, max-age=0, s-maxage=0, must-revalidate",
"res.headers.x-request-id": "5fd74013-0696-44a2-892b-43fd113d486d",
"res.body.status": 500,
"res.body.type": "https://mediawiki.org/wiki/HyperSwitch/errors/query_error",
"res.body.title": "Error in Cassandra table storage backend",
"res.body.detail": "500: https://mediawiki.org/wiki/HyperSwitch/errors/query_error",
"res.body.method": "get",
"res.body.uri": "/www.wikidata.org/v1/page/html/Q24510758",
"res.requestName": "from_backend",
"stack": "HTTPError: 500: https://mediawiki.org/wiki/HyperSwitch/errors/query_error\n at Request.P.try.bind.then.err.retry.HTTPError.status (/srv/deployment/restbase/deploy/node_modules/hyperswitch/node_modules/preq/index.js:219:23)\n at Request.tryCatcher (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/util.js:16:23)\n at Promise._settlePromiseFromHandler (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/promise.js:502:31)\n at Promise._settlePromise (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/promise.js:559:18)\n at Promise._settlePromise0 (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/promise.js:604:10)\n at Promise._settlePromises (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/promise.js:683:18)\n at Promise._fulfill (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/promise.js:628:18)\n at Request._callback (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/nodeback.js:45:21)\n at Request.self.callback (/srv/deployment/restbase/deploy/node_modules/hyperswitch/node_modules/preq/node_modules/request/request.js:200:22)\n at emitTwo (events.js:87:13)\n at Request.emit (events.js:172:7)\n at Request.<anonymous> (/srv/deployment/restbase/deploy/node_modules/hyperswitch/node_modules/preq/node_modules/request/request.js:1067:10)\n at emitOne (events.js:82:20)\n at Request.emit (events.js:169:7)\n at IncomingMessage.<anonymous> (/srv/deployment/restbase/deploy/node_modules/hyperswitch/node_modules/preq/node_modules/request/request.js:988:12)\n at emitNone (events.js:72:20)\n at IncomingMessage.emit (events.js:166:7)",
"latency": 5068,
"levelPath": "error/request",
"root_req.method": "get",
"root_req.uri": "/www.wikidata.org/v1/page/mobile-sections/Q24510758?redirect=false",
"root_req.headers.cache-control": "no-cache",
"root_req.headers.content-length": "0",
"root_req.headers.user-agent": "ChangePropagation/WMF",
"root_req.headers.x-client-ip": "::ffff:10.64.0.16",
"root_req.headers.x-forwarded-for": "::ffff:10.64.0.16",
"root_req.headers.x-request-id": "5fd74013-0696-44a2-892b-43fd113d486d",
"root_req.headers.x-request-class": "internal_update",
"root_req.headers.x-triggered-by": "resource_change:http://www.wikidata.org/api/rest_v1/page/html/Q24510758",
"request_id": "5fd74013-0696-44a2-892b-43fd113d486d",
"type": "restbase",
"tags": [
"es",
"gelf",
"normalized_message_untrimmed"
],
"normalized_message": "500: https://mediawiki.org/wiki/HyperSwitch/errors/query_error",
"gelf_level": "3"
},
"sort": [
1466520374750
]
}{
"_index": "logstash-2016.06.21",
"_type": "restbase",
"_id": "AVVzbQ_ww3dCNxx2ZHRf",
"_score": null,
"_source": {
"host": "restbase1011",
"level": "ERROR",
"version": "1.0",
"@version": "1",
"@timestamp": "2016-06-21T14:46:14.693Z",
"source_host": "10.64.0.113",
"pid": 1875,
"message": "500: query_error",
"res.name": "HTTPError",
"res.message": "500: query_error",
"res.status": 500,
"res.body.type": "query_error",
"res.body.title": "Error in Cassandra table storage backend",
"res.body.stack": "Error: Operation timed out - received only 1 responses.\n at ResponseError.DriverError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/errors.js:14:19)\n at new ResponseError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/errors.js:51:24)\n at FrameReader.readError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/readers.js:309:13)\n at Parser.parseBody (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/streams.js:201:66)\n at Parser._transform (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/streams.js:113:10)\n at Parser.Transform._read (_stream_transform.js:167:10)\n at Parser.Transform._write (_stream_transform.js:155:12)\n at doWrite (_stream_writable.js:292:12)\n at writeOrBuffer (_stream_writable.js:278:5)\n at Parser.Writable.write (_stream_writable.js:207:11)\n at Protocol.ondata (_stream_readable.js:528:20)\n at emitOne (events.js:77:13)\n at Protocol.emit (events.js:169:7)\n at readableAddChunk (_stream_readable.js:146:16)\n at Protocol.Readable.push (_stream_readable.js:110:10)\n at Protocol.Transform.push (_stream_transform.js:128:32)",
"res.body.err.name": "ResponseError",
"res.body.err.stack": "Error: Operation timed out - received only 1 responses.\n at ResponseError.DriverError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/errors.js:14:19)\n at new ResponseError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/errors.js:51:24)\n at FrameReader.readError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/readers.js:309:13)\n at Parser.parseBody (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/streams.js:201:66)\n at Parser._transform (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/streams.js:113:10)\n at Parser.Transform._read (_stream_transform.js:167:10)\n at Parser.Transform._write (_stream_transform.js:155:12)\n at doWrite (_stream_writable.js:292:12)\n at writeOrBuffer (_stream_writable.js:278:5)\n at Parser.Writable.write (_stream_writable.js:207:11)\n at Protocol.ondata (_stream_readable.js:528:20)\n at emitOne (events.js:77:13)\n at Protocol.emit (events.js:169:7)\n at readableAddChunk (_stream_readable.js:146:16)\n at Protocol.Readable.push (_stream_readable.js:110:10)\n at Protocol.Transform.push (_stream_transform.js:128:32)",
"res.body.err.message": "Operation timed out - received only 1 responses.",
"res.body.err.info": "Represents an error message from the server",
"res.body.err.code": 4608,
"res.body.err.consistencies": 6,
"res.body.err.received": 1,
"res.body.err.blockFor": 2,
"res.body.err.isDataPresent": 1,
"res.body.err.coordinator": "10.64.0.232:9042",
"res.body.err.query": "select \"title\",\"page_id\",\"rev\",\"latest_rev\",\"tid\",\"namespace\",\"restrictions\",\"tags\",\"renames\",\"nextrev_tid\",\"latest_tid\",\"user_id\",\"user_text\",\"timestamp\",\"comment\",\"redirect\",\"_domain\",\"_del\" from \"local_group_default_T_title__revisions\".\"data\" where \"title\" = ? AND \"_domain\" = ?",
"res.body.req.uri.path.0": "www.wikidata.org",
"res.body.req.uri.path.1": "sys",
"res.body.req.uri.path.2": "table",
"res.body.req.uri.path.3": "title_revisions",
"res.body.req.uri.path.4": "",
"res.body.req.body.table": "title_revisions",
"res.body.req.body.attributes.title": "Q24510758",
"res.body.req.body.attributes._domain": "www.wikidata.org",
"res.body.req.body.limit": 1,
"res.body.req.method": "get",
"res.body.req.params.domain": "www.wikidata.org",
"res.body.req.params.api": "sys",
"res.body.req.params.table": "title_revisions",
"res.innerBody.type": "query_error",
"res.innerBody.title": "Error in Cassandra table storage backend",
"res.innerBody.stack": "Error: Operation timed out - received only 1 responses.\n at ResponseError.DriverError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/errors.js:14:19)\n at new ResponseError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/errors.js:51:24)\n at FrameReader.readError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/readers.js:309:13)\n at Parser.parseBody (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/streams.js:201:66)\n at Parser._transform (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/streams.js:113:10)\n at Parser.Transform._read (_stream_transform.js:167:10)\n at Parser.Transform._write (_stream_transform.js:155:12)\n at doWrite (_stream_writable.js:292:12)\n at writeOrBuffer (_stream_writable.js:278:5)\n at Parser.Writable.write (_stream_writable.js:207:11)\n at Protocol.ondata (_stream_readable.js:528:20)\n at emitOne (events.js:77:13)\n at Protocol.emit (events.js:169:7)\n at readableAddChunk (_stream_readable.js:146:16)\n at Protocol.Readable.push (_stream_readable.js:110:10)\n at Protocol.Transform.push (_stream_transform.js:128:32)",
"res.innerBody.err.name": "ResponseError",
"res.innerBody.err.stack": "Error: Operation timed out - received only 1 responses.\n at ResponseError.DriverError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/errors.js:14:19)\n at new ResponseError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/errors.js:51:24)\n at FrameReader.readError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/readers.js:309:13)\n at Parser.parseBody (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/streams.js:201:66)\n at Parser._transform (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/streams.js:113:10)\n at Parser.Transform._read (_stream_transform.js:167:10)\n at Parser.Transform._write (_stream_transform.js:155:12)\n at doWrite (_stream_writable.js:292:12)\n at writeOrBuffer (_stream_writable.js:278:5)\n at Parser.Writable.write (_stream_writable.js:207:11)\n at Protocol.ondata (_stream_readable.js:528:20)\n at emitOne (events.js:77:13)\n at Protocol.emit (events.js:169:7)\n at readableAddChunk (_stream_readable.js:146:16)\n at Protocol.Readable.push (_stream_readable.js:110:10)\n at Protocol.Transform.push (_stream_transform.js:128:32)",
"res.innerBody.err.message": "Operation timed out - received only 1 responses.",
"res.innerBody.err.info": "Represents an error message from the server",
"res.innerBody.err.code": 4608,
"res.innerBody.err.consistencies": 6,
"res.innerBody.err.received": 1,
"res.innerBody.err.blockFor": 2,
"res.innerBody.err.isDataPresent": 1,
"res.innerBody.err.coordinator": "10.64.0.232:9042",
"res.innerBody.err.query": "select \"title\",\"page_id\",\"rev\",\"latest_rev\",\"tid\",\"namespace\",\"restrictions\",\"tags\",\"renames\",\"nextrev_tid\",\"latest_tid\",\"user_id\",\"user_text\",\"timestamp\",\"comment\",\"redirect\",\"_domain\",\"_del\" from \"local_group_default_T_title__revisions\".\"data\" where \"title\" = ? AND \"_domain\" = ?",
"res.innerBody.req.uri.path.0": "www.wikidata.org",
"res.innerBody.req.uri.path.1": "sys",
"res.innerBody.req.uri.path.2": "table",
"res.innerBody.req.uri.path.3": "title_revisions",
"res.innerBody.req.uri.path.4": "",
"res.innerBody.req.body.table": "title_revisions",
"res.innerBody.req.body.attributes.title": "Q24510758",
"res.innerBody.req.body.attributes._domain": "www.wikidata.org",
"res.innerBody.req.body.limit": 1,
"res.innerBody.req.method": "get",
"res.innerBody.req.params.domain": "www.wikidata.org",
"res.innerBody.req.params.api": "sys",
"res.innerBody.req.params.table": "title_revisions",
"res.internalReq.uri.path.0": "www.wikidata.org",
"res.internalReq.uri.path.1": "sys",
"res.internalReq.uri.path.2": "table",
"res.internalReq.uri.path.3": "title_revisions",
"res.internalReq.uri.path.4": "",
"res.internalReq.body.table": "title_revisions",
"res.internalReq.body.attributes.title": "Q24510758",
"res.internalReq.body.attributes._domain": "www.wikidata.org",
"res.internalReq.body.limit": 1,
"res.internalReq.method": "get",
"res.internalReq.params.domain": "www.wikidata.org",
"res.internalReq.params.api": "sys",
"res.internalReq.params.table": "title_revisions",
"res.headers.access-control-allow-origin": "*",
"res.headers.access-control-allow-methods": "GET",
"res.headers.access-control-allow-headers": "accept, content-type",
"res.headers.access-control-expose-headers": "etag",
"res.headers.cache-control": "private, max-age=0, s-maxage=0, must-revalidate",
"res.headers.x-content-type-options": "nosniff",
"res.headers.x-frame-options": "SAMEORIGIN",
"res.headers.x-xss-protection": "1; mode=block",
"res.headers.content-security-policy": "default-src 'none'; frame-ancestors 'none'",
"res.headers.x-content-security-policy": "default-src 'none'; frame-ancestors 'none'",
"res.headers.x-webkit-csp": "default-src 'none'; frame-ancestors 'none'",
"res.headers.x-request-id": "5fd74013-0696-44a2-892b-43fd113d486d",
"stack": "Error: Operation timed out - received only 1 responses.\n at ResponseError.DriverError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/errors.js:14:19)\n at new ResponseError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/errors.js:51:24)\n at FrameReader.readError (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/readers.js:309:13)\n at Parser.parseBody (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/streams.js:201:66)\n at Parser._transform (/srv/deployment/restbase/deploy/node_modules/restbase-mod-table-cassandra/node_modules/cassandra-driver/lib/streams.js:113:10)\n at Parser.Transform._read (_stream_transform.js:167:10)\n at Parser.Transform._write (_stream_transform.js:155:12)\n at doWrite (_stream_writable.js:292:12)\n at writeOrBuffer (_stream_writable.js:278:5)\n at Parser.Writable.write (_stream_writable.js:207:11)\n at Protocol.ondata (_stream_readable.js:528:20)\n at emitOne (events.js:77:13)\n at Protocol.emit (events.js:169:7)\n at readableAddChunk (_stream_readable.js:146:16)\n at Protocol.Readable.push (_stream_readable.js:110:10)\n at Protocol.Transform.push (_stream_transform.js:128:32)",
"latency": 5005,
"levelPath": "error/request",
"root_req.method": "get",
"root_req.uri": "/www.wikidata.org/v1/page/summary/Q24510758?redirect=false",
"root_req.headers.cache-control": "no-cache",
"root_req.headers.content-length": "0",
"root_req.headers.user-agent": "ChangePropagation/WMF",
"root_req.headers.x-client-ip": "::ffff:10.64.16.21",
"root_req.headers.x-forwarded-for": "::ffff:10.64.16.21",
"root_req.headers.x-request-id": "5fd74013-0696-44a2-892b-43fd113d486d",
"root_req.headers.x-request-class": "internal_update",
"root_req.headers.x-triggered-by": "resource_change:http://www.wikidata.org/api/rest_v1/page/html/Q24510758",
"request_id": "5fd74013-0696-44a2-892b-43fd113d486d",
"type": "restbase",
"tags": [
"es",
"gelf",
"normalized_message_untrimmed"
],
"normalized_message": "500: query_error",
"gelf_level": "3"
},
"sort": [
1466520374693
]
}which indicates a problem talking to cassandra "res.innerBody.err.message": "Operation timed out - received only 1 responses.", and this lines up with a reboot for kernel upgrade of restbase1007 (mobileapps dashboard: https://grafana.wikimedia.org/dashboard/db/mobileapps?from=1466518372060&to=1466519972060)