Page MenuHomePhabricator

Citoid logs fields explosion
Closed, ResolvedPublic

Description

It looks like citoid is logging garbage on certain errors in the form of "err_body_<number>: <number>" in its gelf / json logs. Additionally it looks like all HTTP response headers are also logged as key: value, e.g. err_headers_x-app-status: 1.

This can cause "fields explosion" problems in elasticsearch/logstash indices. See also T180051 for more context. One fix would be to use sth like err_response: <whole http response>, in other words using only one nested key/value pair as opposed to multiple key:value at top level.

{
  "_index": "logstash-2019.11.12",
  "_type": "citoid",
  "_id": "AW5fRqjXKWrIH1QREwG7",
  "_score": 1,
  "_source": {
    "err_body_192": 105,
    "err_body_191": 97,
    "err_body_190": 116,
    "err_body_199": 116,
    "err_body_198": 104,
    "err_body_197": 46,
    "err_body_196": 116,
    "err_body_195": 110,
    "err_body_194": 101,
    "err_body_193": 109,
    "err_body_29": 70,
    "err_body_27": 69,
    "err_body_28": 84,
    "err_body_25": 47,
    "err_body_26": 73,
    "err_body_23": 45,
    "err_body_24": 47,
    "err_body_21": 32,
    "err_body_22": 34,
    "err_body_20": 67,
    "version": "1.0",
    "tags": [
      "input-gelf-12201",
      "deprecated-input",
      "es",
      "gelf",
      "normalized_message_untrimmed"
    ],
    "err_body_18": 76,
    "err_headers_content-type": "text/html; charset=iso-8859-1",
    "err_body_19": 73,
    "err_body_16": 85,
    "err_body_17": 66,
    "err_body_14": 32,
    "err_body_15": 80,
    "err_body_12": 77,
    "err_body_13": 76,
    "err_body_10": 72,
    "err_body_11": 84,
    "err_body_2": 68,
    "err_body_50": 10,
    "err_body_1": 33,
    "err_body_51": 60,
    "err_body_0": 60,
    "err_levelPath": "warn/scraper",
    "err_body_49": 62,
    "err_body_47": 78,
    "err_body_9": 32,
    "err_body_48": 34,
    "err_body_8": 69,
    "err_body_45": 47,
    "err_body_7": 80,
    "err_body_46": 69,
    "err_body_6": 89,
    "err_body_43": 48,
    "err_body_5": 84,
    "err_body_44": 47,
    "err_body_4": 67,
    "err_body_41": 50,
    "err_body_3": 79,
    "err_body_42": 46,
    "err_body_40": 32,
    "err_body_38": 77,
    "err_body_39": 76,
    "err_body_36": 72,
    "err_body_37": 84,
    "err_body_34": 68,
    "err_body_35": 32,
    "err_body_32": 68,
    "err_body_33": 84,
    "err_body_30": 47,
    "err_body_31": 47,
    "err_body_72": 48,
    "err_body_73": 52,
    "err_body_70": 62,
    "err_body_71": 52,
    "err_body_156": 112,
    "err_body_155": 115,
    "err_body_154": 47,
    "err_body_153": 115,
    "err_body_152": 112,
    "err_body_151": 117,
    "err_body_150": 111,
    "err_body_159": 110,
    "err_body_158": 110,
    "err_body_157": 105,
    "err_body_69": 101,
    "err_body_67": 116,
    "err_body_68": 108,
    "err_body_65": 116,
    "err_body_66": 105,
    "err_body_63": 10,
    "err_body_64": 60,
    "err_body_61": 100,
    "err_body_62": 62,
    "err_body_60": 97,
    "err_body_167": 99,
    "err_body_166": 114,
    "err_body_165": 97,
    "err_body_164": 47,
    "err_body_163": 114,
    "err_body_162": 101,
    "normalized_message": "404",
    "err_body_161": 107,
    "err_body_160": 97,
    "err_body_169": 105,
    "err_body_168": 104,
    "err_body_58": 104,
    "err_body_59": 101,
    "err_body_56": 62,
    "err_body_57": 60,
    "err_body_54": 109,
    "err_body_55": 108,
    "err_body_52": 104,
    "err_body_53": 116,
    "err_body_94": 47,
    "err_body_170": 118,
    "err_body_95": 104,
    "err_body_92": 10,
    "err_body_93": 60,
    "err_body_90": 101,
    "err_body_91": 62,
    "err_body_178": 47,
    "err_body_177": 52,
    "err_body_176": 48,
    "err_body_175": 48,
    "err_body_174": 50,
    "err_body_173": 47,
    "err_body_172": 115,
    "err_body_171": 101,
    "err_body_179": 48,
    "gelf_level": "4",
    "err_body_89": 108,
    "err_headers_cache-control": "private",
    "err_body_87": 105,
    "err_body_88": 116,
    "err_body_85": 47,
    "err_body_86": 116,
    "err_body_83": 100,
    "err_body_181": 45,
    "err_body_84": 60,
    "err_body_180": 52,
    "err_body_81": 117,
    "err_body_82": 110,
    "err_body_80": 111,
    "err_body_189": 114,
    "err_body_188": 101,
    "err_body_187": 116,
    "err_body_186": 110,
    "message": "404",
    "err_body_185": 101,
    "err_body_184": 47,
    "err_body_183": 52,
    "err_body_182": 49,
    "err_body_78": 32,
    "err_body_79": 70,
    "err_body_76": 111,
    "err_body_77": 116,
    "err_body_74": 32,
    "err_body_75": 78,
    "err_body_112": 111,
    "err_body_233": 47,
    "err_body_111": 78,
    "err_body_232": 60,
    "err_body_110": 62,
    "err_body_231": 46,
    "err_body_230": 114,
    "type": "citoid",
    "err_body_119": 100,
    "err_body_118": 110,
    "err_body_239": 98,
    "err_body_117": 117,
    "err_body_238": 47,
    "err_body_116": 111,
    "err_body_237": 60,
    "err_body_115": 70,
    "err_body_236": 10,
    "err_body_114": 32,
    "err_body_235": 62,
    "err_body_113": 116,
    "err_body_234": 112,
    "err_headers_server": "Apache",
    "err_body_123": 49,
    "err_body_244": 60,
    "err_body_122": 104,
    "err_body_243": 62,
    "err_body_121": 47,
    "err_body_242": 121,
    "err_body_120": 60,
    "err_body_241": 100,
    "err_body_240": 111,
    "err_body_129": 84,
    "err_body_128": 62,
    "err_body_249": 108,
    "err_body_127": 112,
    "err_body_248": 109,
    "err_body_126": 60,
    "err_body_247": 116,
    "err_body_125": 10,
    "err_body_246": 104,
    "err_body_124": 62,
    "err_body_245": 47,
    "err_body_98": 100,
    "err_body_99": 62,
    "err_body_96": 101,
    "err_body_97": 97,
    "err_body_134": 101,
    "err_body_133": 114,
    "err_body_132": 32,
    "err_body_131": 101,
    "err_body_130": 104,
    "err_body_251": 10,
    "err_body_250": 62,
    "file": "/srv/service/node_modules/preq/index.js",
    "err_body_139": 116,
    "err_body_138": 115,
    "err_body_137": 101,
    "err_body_136": 117,
    "err_body_135": 113,
    "err_stack": "HTTPError: 404\n    at request.then (/srv/service/node_modules/preq/index.js:212:23)\n    at tryCatcher (/srv/service/node_modules/bluebird/js/release/util.js:16:23)\n    at Promise._settlePromiseFromHandler (/srv/service/node_modules/bluebird/js/release/promise.js:517:31)\n    at Promise._settlePromise (/srv/service/node_modules/bluebird/js/release/promise.js:574:18)\n    at Promise._settlePromise0 (/srv/service/node_modules/bluebird/js/release/promise.js:619:10)\n    at Promise._settlePromises (/srv/service/node_modules/bluebird/js/release/promise.js:699:18)\n    at _drainQueueStep (/srv/service/node_modules/bluebird/js/release/async.js:138:12)\n    at _drainQueue (/srv/service/node_modules/bluebird/js/release/async.js:131:9)\n    at Async._drainQueues (/srv/service/node_modules/bluebird/js/release/async.js:147:5)\n    at Immediate.Async.drainQueues (/srv/service/node_modules/bluebird/js/release/async.js:17:14)\n    at runCallback (timers.js:672:20)\n    at tryOnImmediate (timers.js:645:5)\n    at processImmediate [as _immediateCallback] (timers.js:617:5)",
    "err_body_145": 76,
    "err_body_144": 82,
    "err_body_143": 85,
    "err_body_142": 32,
    "err_body_141": 100,
    "err_body_140": 101,
    "@timestamp": "2019-11-12T11:01:36.448Z",
    "err_body_149": 114,
    "err_body_148": 103,
    "err_name": "citoid",
    "err_body_147": 47,
    "err_body_146": 32,
    "pid": 27,
    "host": "citoid-production-699db9c4-4bbsx",
    "source_host": "10.64.65.0",
    "level": "WARNING",
    "err_body_200": 109,
    "err_body_208": 111,
    "err_body_207": 110,
    "err_body_206": 32,
    "err_body_205": 115,
    "err_body_204": 97,
    "err_body_203": 119,
    "err_body_202": 32,
    "err_body_201": 108,
    "err_body_209": 116,
    "line": "212",
    "err_body_211": 102,
    "err_body_210": 32,
    "err_message": "404",
    "err_body_219": 32,
    "err_body_218": 110,
    "err_body_217": 111,
    "err_body_216": 32,
    "err_body_215": 100,
    "err_body_214": 110,
    "err_body_213": 117,
    "err_body_212": 111,
    "@version": "1",
    "err_headers_cteonnt-length": "252",
    "err_body_101": 98,
    "err_body_222": 105,
    "err_headers_connection": "close",
    "err_body_100": 60,
    "err_body_221": 104,
    "err_body_220": 116,
    "err_body_109": 49,
    "err_body_108": 104,
    "err_body_229": 101,
    "err_body_107": 60,
    "err_body_228": 118,
    "err_body_106": 10,
    "err_body_227": 114,
    "err_body_105": 62,
    "err_body_226": 101,
    "err_body_104": 121,
    "err_body_225": 115,
    "err_body_103": 100,
    "err_body_224": 32,
    "err_body_102": 111,
    "err_body_223": 115,
    "err_status": 404,
    "err_headers_date": "Tue, 12 Nov 2019 11:01:36 GMT"
  },
  "fields": {
    "@timestamp": [
      1573556496448
    ]
  }
}

Event Timeline

mobrovac triaged this task as Medium priority.
mobrovac subscribed.

This is actually a problem with the underlying preq library, which doesn't decode the body properly in cases of error. I shall fix that.

Also PR #85 for html-metadata is needed as that library is used under the hood by Citoid and also uses preq.

Change 551967 had a related patch set uploaded (by Mobrovac; owner: Mobrovac):
[mediawiki/services/citoid@master] Update preq and html-metadata

https://gerrit.wikimedia.org/r/551967

Change 551967 merged by jenkins-bot:
[mediawiki/services/citoid@master] Update preq and html-metadata

https://gerrit.wikimedia.org/r/551967

Change 552094 had a related patch set uploaded (by Mobrovac; owner: Mobrovac):
[operations/deployment-charts@master] Citoid: Update image to 2019-11-20-144606-production

https://gerrit.wikimedia.org/r/552094

Change 552094 merged by jenkins-bot:
[operations/deployment-charts@master] Citoid: Update image to 2019-11-20-144606-production

https://gerrit.wikimedia.org/r/552094

All of the patches have been merged and the new version of Citoid has been deployed, so this issue should be no more. Please reopen if that's not the case.

LGTM so far, thanks @mobrovac for working on this!