Page MenuHomePhabricator

Citoid logs fields explosion
Closed, ResolvedPublic

Description

It looks like citoid is logging garbage on certain errors in the form of "err_body_<number>: <number>" in its gelf / json logs. Additionally it looks like all HTTP response headers are also logged as key: value, e.g. err_headers_x-app-status: 1.

This can cause "fields explosion" problems in elasticsearch/logstash indices. See also T180051 for more context. One fix would be to use sth like err_response: <whole http response>, in other words using only one nested key/value pair as opposed to multiple key:value at top level.

{
  "_index": "logstash-2019.11.12",
  "_type": "citoid",
  "_id": "AW5fRqjXKWrIH1QREwG7",
  "_score": 1,
  "_source": {
    "err_body_192": 105,
    "err_body_191": 97,
    "err_body_190": 116,
    "err_body_199": 116,
    "err_body_198": 104,
    "err_body_197": 46,
    "err_body_196": 116,
    "err_body_195": 110,
    "err_body_194": 101,
    "err_body_193": 109,
    "err_body_29": 70,
    "err_body_27": 69,
    "err_body_28": 84,
    "err_body_25": 47,
    "err_body_26": 73,
    "err_body_23": 45,
    "err_body_24": 47,
    "err_body_21": 32,
    "err_body_22": 34,
    "err_body_20": 67,
    "version": "1.0",
    "tags": [
      "input-gelf-12201",
      "deprecated-input",
      "es",
      "gelf",
      "normalized_message_untrimmed"
    ],
    "err_body_18": 76,
    "err_headers_content-type": "text/html; charset=iso-8859-1",
    "err_body_19": 73,
    "err_body_16": 85,
    "err_body_17": 66,
    "err_body_14": 32,
    "err_body_15": 80,
    "err_body_12": 77,
    "err_body_13": 76,
    "err_body_10": 72,
    "err_body_11": 84,
    "err_body_2": 68,
    "err_body_50": 10,
    "err_body_1": 33,
    "err_body_51": 60,
    "err_body_0": 60,
    "err_levelPath": "warn/scraper",
    "err_body_49": 62,
    "err_body_47": 78,
    "err_body_9": 32,
    "err_body_48": 34,
    "err_body_8": 69,
    "err_body_45": 47,
    "err_body_7": 80,
    "err_body_46": 69,
    "err_body_6": 89,
    "err_body_43": 48,
    "err_body_5": 84,
    "err_body_44": 47,
    "err_body_4": 67,
    "err_body_41": 50,
    "err_body_3": 79,
    "err_body_42": 46,
    "err_body_40": 32,
    "err_body_38": 77,
    "err_body_39": 76,
    "err_body_36": 72,
    "err_body_37": 84,
    "err_body_34": 68,
    "err_body_35": 32,
    "err_body_32": 68,
    "err_body_33": 84,
    "err_body_30": 47,
    "err_body_31": 47,
    "err_body_72": 48,
    "err_body_73": 52,
    "err_body_70": 62,
    "err_body_71": 52,
    "err_body_156": 112,
    "err_body_155": 115,
    "err_body_154": 47,
    "err_body_153": 115,
    "err_body_152": 112,
    "err_body_151": 117,
    "err_body_150": 111,
    "err_body_159": 110,
    "err_body_158": 110,
    "err_body_157": 105,
    "err_body_69": 101,
    "err_body_67": 116,
    "err_body_68": 108,
    "err_body_65": 116,
    "err_body_66": 105,
    "err_body_63": 10,
    "err_body_64": 60,
    "err_body_61": 100,
    "err_body_62": 62,
    "err_body_60": 97,
    "err_body_167": 99,
    "err_body_166": 114,
    "err_body_165": 97,
    "err_body_164": 47,
    "err_body_163": 114,
    "err_body_162": 101,
    "normalized_message": "404",
    "err_body_161": 107,
    "err_body_160": 97,
    "err_body_169": 105,
    "err_body_168": 104,
    "err_body_58": 104,
    "err_body_59": 101,
    "err_body_56": 62,
    "err_body_57": 60,
    "err_body_54": 109,
    "err_body_55": 108,
    "err_body_52": 104,
    "err_body_53": 116,
    "err_body_94": 47,
    "err_body_170": 118,
    "err_body_95": 104,
    "err_body_92": 10,
    "err_body_93": 60,
    "err_body_90": 101,
    "err_body_91": 62,
    "err_body_178": 47,
    "err_body_177": 52,
    "err_body_176": 48,
    "err_body_175": 48,
    "err_body_174": 50,
    "err_body_173": 47,
    "err_body_172": 115,
    "err_body_171": 101,
    "err_body_179": 48,
    "gelf_level": "4",
    "err_body_89": 108,
    "err_headers_cache-control": "private",
    "err_body_87": 105,
    "err_body_88": 116,
    "err_body_85": 47,
    "err_body_86": 116,
    "err_body_83": 100,
    "err_body_181": 45,
    "err_body_84": 60,
    "err_body_180": 52,
    "err_body_81": 117,
    "err_body_82": 110,
    "err_body_80": 111,
    "err_body_189": 114,
    "err_body_188": 101,
    "err_body_187": 116,
    "err_body_186": 110,
    "message": "404",
    "err_body_185": 101,
    "err_body_184": 47,
    "err_body_183": 52,
    "err_body_182": 49,
    "err_body_78": 32,
    "err_body_79": 70,
    "err_body_76": 111,
    "err_body_77": 116,
    "err_body_74": 32,
    "err_body_75": 78,
    "err_body_112": 111,
    "err_body_233": 47,
    "err_body_111": 78,
    "err_body_232": 60,
    "err_body_110": 62,
    "err_body_231": 46,
    "err_body_230": 114,
    "type": "citoid",
    "err_body_119": 100,
    "err_body_118": 110,
    "err_body_239": 98,
    "err_body_117": 117,
    "err_body_238": 47,
    "err_body_116": 111,
    "err_body_237": 60,
    "err_body_115": 70,
    "err_body_236": 10,
    "err_body_114": 32,
    "err_body_235": 62,
    "err_body_113": 116,
    "err_body_234": 112,
    "err_headers_server": "Apache",
    "err_body_123": 49,
    "err_body_244": 60,
    "err_body_122": 104,
    "err_body_243": 62,
    "err_body_121": 47,
    "err_body_242": 121,
    "err_body_120": 60,
    "err_body_241": 100,
    "err_body_240": 111,
    "err_body_129": 84,
    "err_body_128": 62,
    "err_body_249": 108,
    "err_body_127": 112,
    "err_body_248": 109,
    "err_body_126": 60,
    "err_body_247": 116,
    "err_body_125": 10,
    "err_body_246": 104,
    "err_body_124": 62,
    "err_body_245": 47,
    "err_body_98": 100,
    "err_body_99": 62,
    "err_body_96": 101,
    "err_body_97": 97,
    "err_body_134": 101,
    "err_body_133": 114,
    "err_body_132": 32,
    "err_body_131": 101,
    "err_body_130": 104,
    "err_body_251": 10,
    "err_body_250": 62,
    "file": "/srv/service/node_modules/preq/index.js",
    "err_body_139": 116,
    "err_body_138": 115,
    "err_body_137": 101,
    "err_body_136": 117,
    "err_body_135": 113,
    "err_stack": "HTTPError: 404\n    at request.then (/srv/service/node_modules/preq/index.js:212:23)\n    at tryCatcher (/srv/service/node_modules/bluebird/js/release/util.js:16:23)\n    at Promise._settlePromiseFromHandler (/srv/service/node_modules/bluebird/js/release/promise.js:517:31)\n    at Promise._settlePromise (/srv/service/node_modules/bluebird/js/release/promise.js:574:18)\n    at Promise._settlePromise0 (/srv/service/node_modules/bluebird/js/release/promise.js:619:10)\n    at Promise._settlePromises (/srv/service/node_modules/bluebird/js/release/promise.js:699:18)\n    at _drainQueueStep (/srv/service/node_modules/bluebird/js/release/async.js:138:12)\n    at _drainQueue (/srv/service/node_modules/bluebird/js/release/async.js:131:9)\n    at Async._drainQueues (/srv/service/node_modules/bluebird/js/release/async.js:147:5)\n    at Immediate.Async.drainQueues (/srv/service/node_modules/bluebird/js/release/async.js:17:14)\n    at runCallback (timers.js:672:20)\n    at tryOnImmediate (timers.js:645:5)\n    at processImmediate [as _immediateCallback] (timers.js:617:5)",
    "err_body_145": 76,
    "err_body_144": 82,
    "err_body_143": 85,
    "err_body_142": 32,
    "err_body_141": 100,
    "err_body_140": 101,
    "@timestamp": "2019-11-12T11:01:36.448Z",
    "err_body_149": 114,
    "err_body_148": 103,
    "err_name": "citoid",
    "err_body_147": 47,
    "err_body_146": 32,
    "pid": 27,
    "host": "citoid-production-699db9c4-4bbsx",
    "source_host": "10.64.65.0",
    "level": "WARNING",
    "err_body_200": 109,
    "err_body_208": 111,
    "err_body_207": 110,
    "err_body_206": 32,
    "err_body_205": 115,
    "err_body_204": 97,
    "err_body_203": 119,
    "err_body_202": 32,
    "err_body_201": 108,
    "err_body_209": 116,
    "line": "212",
    "err_body_211": 102,
    "err_body_210": 32,
    "err_message": "404",
    "err_body_219": 32,
    "err_body_218": 110,
    "err_body_217": 111,
    "err_body_216": 32,
    "err_body_215": 100,
    "err_body_214": 110,
    "err_body_213": 117,
    "err_body_212": 111,
    "@version": "1",
    "err_headers_cteonnt-length": "252",
    "err_body_101": 98,
    "err_body_222": 105,
    "err_headers_connection": "close",
    "err_body_100": 60,
    "err_body_221": 104,
    "err_body_220": 116,
    "err_body_109": 49,
    "err_body_108": 104,
    "err_body_229": 101,
    "err_body_107": 60,
    "err_body_228": 118,
    "err_body_106": 10,
    "err_body_227": 114,
    "err_body_105": 62,
    "err_body_226": 101,
    "err_body_104": 121,
    "err_body_225": 115,
    "err_body_103": 100,
    "err_body_224": 32,
    "err_body_102": 111,
    "err_body_223": 115,
    "err_status": 404,
    "err_headers_date": "Tue, 12 Nov 2019 11:01:36 GMT"
  },
  "fields": {
    "@timestamp": [
      1573556496448
    ]
  }
}

Details

Related Gerrit Patches:
operations/deployment-charts : masterCitoid: Update image to 2019-11-20-144606-production
mediawiki/services/citoid : masterUpdate preq and html-metadata

Event Timeline

Restricted Application added a subscriber: Aklapper. · View Herald TranscriptNov 12 2019, 1:08 PM
fgiunchedi updated the task description. (Show Details)Nov 12 2019, 1:12 PM
mobrovac claimed this task.Nov 12 2019, 1:33 PM
mobrovac triaged this task as Medium priority.
mobrovac added a subscriber: mobrovac.

This is actually a problem with the underlying preq library, which doesn't decode the body properly in cases of error. I shall fix that.

mobrovac updated the task description. (Show Details)Nov 12 2019, 1:40 PM

Also PR #85 for html-metadata is needed as that library is used under the hood by Citoid and also uses preq.

Change 551967 had a related patch set uploaded (by Mobrovac; owner: Mobrovac):
[mediawiki/services/citoid@master] Update preq and html-metadata

https://gerrit.wikimedia.org/r/551967

Change 551967 merged by jenkins-bot:
[mediawiki/services/citoid@master] Update preq and html-metadata

https://gerrit.wikimedia.org/r/551967

Change 552094 had a related patch set uploaded (by Mobrovac; owner: Mobrovac):
[operations/deployment-charts@master] Citoid: Update image to 2019-11-20-144606-production

https://gerrit.wikimedia.org/r/552094

Change 552094 merged by jenkins-bot:
[operations/deployment-charts@master] Citoid: Update image to 2019-11-20-144606-production

https://gerrit.wikimedia.org/r/552094

mobrovac closed this task as Resolved.Nov 20 2019, 5:26 PM

All of the patches have been merged and the new version of Citoid has been deployed, so this issue should be no more. Please reopen if that's not the case.

LGTM so far, thanks @mobrovac for working on this!