Page MenuHomePhabricator

Parsoid should return 400 for transforms with missing info
Closed, ResolvedPublic

Description

We are receiving some erroneous transform requests relatively frequently (~200 req/day), cf https://logstash.wikimedia.org/#dashboard/temp/AVTDBDtJDxp7yus25R9g . When dp or data-mw is not supplied, Parsoid returns a 500 status, but that is clearly not its fault but the client's, so it should return 400. A sample RESTBase log entry:

{
  "_index": "logstash-2016.05.18",
  "_type": "restbase",
  "_id": "AVTC4FoLctfB5PjeEa_k",
  "_score": null,
  "_source": {
    "host": "restbase1009",
    "level": "ERROR",
    "version": "1.0",
    "@version": "1",
    "@timestamp": "2016-05-18T07:59:23.086Z",
    "source_host": "10.64.48.110",
    "pid": 289,
    "message": "500",
    "res.name": "HTTPError",
    "res.message": "500",
    "res.status": 500,
    "res.headers.x-powered-by": "Express",
    "res.headers.access-control-allow-origin": "*",
    "res.headers.content-type": "text/plain; charset=utf-8",
    "res.headers.etag": "W/\"478-hf1obi0lJJE0HGFMCWk5oQ\"",
    "res.headers.vary": "Accept-Encoding",
    "res.headers.date": "Wed, 18 May 2016 07:59:23 GMT",
    "res.headers.access-control-allow-methods": "GET",
    "res.headers.access-control-allow-headers": "accept, content-type",
    "res.headers.access-control-expose-headers": "etag",
    "res.headers.cache-control": "private, max-age=0, s-maxage=0, must-revalidate",
    "res.headers.x-content-type-options": "nosniff",
    "res.headers.x-frame-options": "SAMEORIGIN",
    "res.headers.x-xss-protection": "1; mode=block",
    "res.headers.content-security-policy": "default-src 'none'; frame-ancestors 'none'",
    "res.headers.x-content-security-policy": "default-src 'none'; frame-ancestors 'none'",
    "res.headers.x-webkit-csp": "default-src 'none'; frame-ancestors 'none'",
    "res.headers.x-request-id": "6ee30f08-1cce-11e6-ba17-4825b2ee06db",
    "res.body": "Cannot serialize transclusion without data-mw.parts or data-parsoid.src.\nError: Cannot serialize transclusion without data-mw.parts or data-parsoid.src.\n    at Object.handle (/srv/deployment/parsoid/deploy/src/lib/html2wt/DOMHandlers.js:1367:12)\n    at /srv/deployment/parsoid/deploy/node_modules/prfun/lib/index.js:532:26\n    at tryCatch2 (/srv/deployment/parsoid/deploy/node_modules/babybird/lib/promise.js:48:12)\n    at PrFunPromise.Promise (/srv/deployment/parsoid/deploy/node_modules/babybird/lib/promise.js:458:15)\n    at new PrFunPromise (/srv/deployment/parsoid/deploy/node_modules/prfun/lib/index.js:57:21)\n    at /srv/deployment/parsoid/deploy/node_modules/prfun/lib/index.js:530:18\n    at tryCatch1 (/srv/deployment/parsoid/deploy/node_modules/babybird/lib/promise.js:40:12)\n    at promiseReactionJob (/srv/deployment/parsoid/deploy/node_modules/babybird/lib/promise.js:269:19)\n    at PromiseReactionJobTask.call (/srv/deployment/parsoid/deploy/node_modules/babybird/lib/promise.js:284:3)\n    at flush (/srv/deployment/parsoid/deploy/node_modules/babybird/node_modules/asap/raw.js:50:29)\n    at process._tickCallback (node.js:415:13)",
    "res.requestName": "get_from_backend",
    "stack": "HTTPError: 500\n    at Request.P.try.bind.then.err.retry.HTTPError.status (/srv/deployment/restbase/deploy/node_modules/hyperswitch/node_modules/preq/index.js:219:23)\n    at Request.tryCatcher (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/util.js:16:23)\n    at Promise._settlePromiseFromHandler (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/promise.js:502:31)\n    at Promise._settlePromise (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/promise.js:559:18)\n    at Promise._settlePromise0 (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/promise.js:604:10)\n    at Promise._settlePromises (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/promise.js:683:18)\n    at Promise._fulfill (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/promise.js:628:18)\n    at Request._callback (/srv/deployment/restbase/deploy/node_modules/bluebird/js/release/nodeback.js:45:21)\n    at Request.self.callback (/srv/deployment/restbase/deploy/node_modules/hyperswitch/node_modules/preq/node_modules/request/request.js:200:22)\n    at emitTwo (events.js:87:13)\n    at Request.emit (events.js:172:7)\n    at Request.<anonymous> (/srv/deployment/restbase/deploy/node_modules/hyperswitch/node_modules/preq/node_modules/request/request.js:1067:10)\n    at emitOne (events.js:82:20)\n    at Request.emit (events.js:169:7)\n    at IncomingMessage.<anonymous> (/srv/deployment/restbase/deploy/node_modules/hyperswitch/node_modules/preq/node_modules/request/request.js:988:12)\n    at emitNone (events.js:72:20)\n    at IncomingMessage.emit (events.js:166:7)\n    at endReadableNT (_stream_readable.js:905:12)\n    at nextTickCallbackWith2Args (node.js:441:9)\n    at process._tickCallback (node.js:355:17)",
    "latency": 27,
    "levelPath": "error/request",
    "root_req.method": "post",
    "root_req.uri": "/uk.wikipedia.org/v1/transform/html/to/wikitext/%D0%AF%D0%BD%D0%B0_%D0%9C%D0%BE%D1%80%D0%B0%D0%B2%D1%86%D0%BE%D0%B2%D0%B0",
    "root_req.headers.content-length": "4193",
    "root_req.headers.content-type": "application/x-www-form-urlencoded",
    "root_req.headers.user-agent": "wikimedia/multi-http-client v1.0",
    "root_req.headers.x-client-ip": "::ffff:10.64.48.62",
    "root_req.headers.x-forwarded-for": "::ffff:10.64.48.62",
    "root_req.headers.x-request-id": "6ee30f08-1cce-11e6-ba17-4825b2ee06db",
    "root_req.headers.x-request-class": "internal",
    "request_id": "6ee30f08-1cce-11e6-ba17-4825b2ee06db",
    "type": "restbase",
    "tags": [
      "es",
      "gelf",
      "normalized_message_untrimmed"
    ],
    "normalized_message": "500",
    "gelf_level": "3"
  },
  "sort": [
    1463558363086
  ]
}

Parsoid stack trace:

Cannot serialize transclusion without data-mw.parts or data-parsoid.src.
Error: Cannot serialize transclusion without data-mw.parts or data-parsoid.src.
    at Object.handle (/srv/deployment/parsoid/deploy/src/lib/html2wt/DOMHandlers.js:1367:12)
    at /srv/deployment/parsoid/deploy/node_modules/prfun/lib/index.js:532:26
    at tryCatch2 (/srv/deployment/parsoid/deploy/node_modules/babybird/lib/promise.js:48:12)
    at PrFunPromise.Promise (/srv/deployment/parsoid/deploy/node_modules/babybird/lib/promise.js:458:15)
    at new PrFunPromise (/srv/deployment/parsoid/deploy/node_modules/prfun/lib/index.js:57:21)
    at /srv/deployment/parsoid/deploy/node_modules/prfun/lib/index.js:530:18
    at tryCatch1 (/srv/deployment/parsoid/deploy/node_modules/babybird/lib/promise.js:40:12)
    at promiseReactionJob (/srv/deployment/parsoid/deploy/node_modules/babybird/lib/promise.js:269:19)
    at PromiseReactionJobTask.call (/srv/deployment/parsoid/deploy/node_modules/babybird/lib/promise.js:284:3)
    at flush (/srv/deployment/parsoid/deploy/node_modules/babybird/node_modules/asap/raw.js:50:29)
    at process._tickCallback (node.js:415:13)

Event Timeline

mobrovac created this task.May 18 2016, 8:46 AM
Restricted Application added subscribers: Zppix, Aklapper. · View Herald TranscriptMay 18 2016, 8:46 AM
mobrovac updated the task description. (Show Details)May 18 2016, 8:47 AM

Change 289439 had a related patch set uploaded (by Arlolra):
T135596: Return client error for missing data attributes

https://gerrit.wikimedia.org/r/289439

Arlolra claimed this task.May 18 2016, 4:02 PM
Arlolra triaged this task as Normal priority.
Arlolra added a project: Parsoid-Web-API.

Change 289439 merged by jenkins-bot:
T135596: Return client error for missing data attributes

https://gerrit.wikimedia.org/r/289439

Arlolra closed this task as Resolved.May 18 2016, 9:20 PM