Page MenuHomePhabricator

[components-api,beta] Config not updated from remote source
Open, HighPublicBUG REPORT

Description

Bug

Tool: cluebotng-staging
What are you trying to do?

Deploy the latest component config, unexpectedly the latest was not deployed.

What does happen?

Previous build:
ID: 20250813-203850-tbophfxva3
Triggered: https://github.com/cluebotng/component-configs/actions/runs/16948749357/job/48036264958
Config: https://github.com/cluebotng/component-configs/blob/9c67af39f47ea48c10c4c2e7748d9d48a8711ae3/cluebotng-staging.yaml

Note: Tag for grafana-alloy is 0.1.5

Latest build:
ID: 20250813-204239-xwrb287a2s
Triggered: https://github.com/cluebotng/component-configs/actions/runs/16948828300
Config: https://github.com/cluebotng/component-configs/commit/cb8b1d4644d5d1b860f73724c4133ef0bfa8ed29 (current main)

Note: Tag for grafana-alloy is 0.1.6

Previous deployment:

{
    "deploy_id": "20250813-203850-tbophfxva3",
    "creation_time": "20250813-203850",
    "builds": {
        "bot": {
            "build_id": "cluebotng-staging-buildpacks-pipelinerun-r97b2",
            "build_status": "successful",
            "build_long_status": "You can see the logs with `toolforge build logs cluebotng-staging-buildpacks-pipelinerun-r97b2`"
        },
        "core": {
            "build_id": "cluebotng-staging-buildpacks-pipelinerun-ffprr",
            "build_status": "skipped",
            "build_long_status": "Reusing existing build"
        },
        "grafana-alloy": {
            "build_id": "cluebotng-staging-buildpacks-pipelinerun-ptlnt",
            "build_status": "successful",
            "build_long_status": "You can see the logs with `toolforge build logs cluebotng-staging-buildpacks-pipelinerun-ptlnt`"
        }
    },
    "runs": {
        "bot": {
            "run_status": "successful",
            "run_long_status": "[info] (Job bot created)"
        },
        "core": {
            "run_status": "successful",
            "run_long_status": "[info] (Job core is already up to date)"
        },
        "grafana-alloy": {
            "run_status": "successful",
            "run_long_status": "[info] (Job grafana-alloy created)"
        }
    },
    "tool_config": {
        "config_version": "v1beta1",
        "source_url": "https://raw.githubusercontent.com/cluebotng/component-configs/refs/heads/main/cluebotng-staging.yaml",
        "components": {
            "bot": {
                "build": {
                    "repository": "https://github.com/cluebotng/botng.git",
                    "ref": "v0.0.39",
                    "use_latest_versions": true
                },
                "run": {
                    "command": "botng --irc-relay --debug",
                    "cpu": "0.5",
                    "emails": null,
                    "filelog": null,
                    "filelog_stderr": null,
                    "filelog_stdout": null,
                    "memory": "1Gi",
                    "mount": null,
                    "port": 8118,
                    "replicas": null,
                    "health_check_script": null,
                    "health_check_http": null
                },
                "component_type": "continuous"
            },
            "core": {
                "build": {
                    "repository": "https://github.com/cluebotng/external-core.git",
                    "ref": "refs/tags/v0.0.2",
                    "use_latest_versions": true
                },
                "run": {
                    "command": "./cluebotng -l -m live_run",
                    "cpu": "0.5",
                    "emails": null,
                    "filelog": null,
                    "filelog_stderr": null,
                    "filelog_stdout": null,
                    "memory": null,
                    "mount": null,
                    "port": 3565,
                    "replicas": null,
                    "health_check_script": null,
                    "health_check_http": null
                },
                "component_type": "continuous"
            },
            "grafana-alloy": {
                "build": {
                    "repository": "https://github.com/cluebotng/external-grafana-alloy.git",
                    "ref": "refs/tags/v0.1.5",
                    "use_latest_versions": true
                },
                "run": {
                    "command": "run-alloy",
                    "cpu": "0.5",
                    "emails": null,
                    "filelog": null,
                    "filelog_stderr": null,
                    "filelog_stdout": null,
                    "memory": "0.5Gi",
                    "mount": "all",
                    "port": 8118,
                    "replicas": null,
                    "health_check_script": null,
                    "health_check_http": null
                },
                "component_type": "continuous"
            }
        }
    },
    "status": "successful",
    "long_status": "Finished at 2025-08-13 20:40:53.412594",
    "force_build": false,
    "force_run": false
}

Latest deployment:

{
    "deploy_id": "20250813-204239-xwrb287a2s",
    "creation_time": "20250813-204239",
    "builds": {
        "bot": {
            "build_id": "cluebotng-staging-buildpacks-pipelinerun-r97b2",
            "build_status": "skipped",
            "build_long_status": "Reusing existing build"
        },
        "core": {
            "build_id": "cluebotng-staging-buildpacks-pipelinerun-ffprr",
            "build_status": "skipped",
            "build_long_status": "Reusing existing build"
        },
        "grafana-alloy": {
            "build_id": "cluebotng-staging-buildpacks-pipelinerun-ptlnt",
            "build_status": "skipped",
            "build_long_status": "Reusing existing build"
        }
    },
    "runs": {
        "bot": {
            "run_status": "successful",
            "run_long_status": "[info] (Job bot is already up to date)"
        },
        "core": {
            "run_status": "successful",
            "run_long_status": "[info] (Job core is already up to date)"
        },
        "grafana-alloy": {
            "run_status": "successful",
            "run_long_status": "[info] (Job grafana-alloy is already up to date)"
        }
    },
    "tool_config": {
        "config_version": "v1beta1",
        "source_url": "https://raw.githubusercontent.com/cluebotng/component-configs/refs/heads/main/cluebotng-staging.yaml",
        "components": {
            "bot": {
                "build": {
                    "repository": "https://github.com/cluebotng/botng.git",
                    "ref": "v0.0.39",
                    "use_latest_versions": true
                },
                "run": {
                    "command": "botng --irc-relay --debug",
                    "cpu": "0.5",
                    "emails": null,
                    "filelog": null,
                    "filelog_stderr": null,
                    "filelog_stdout": null,
                    "memory": "1Gi",
                    "mount": null,
                    "port": 8118,
                    "replicas": null,
                    "health_check_script": null,
                    "health_check_http": null
                },
                "component_type": "continuous"
            },
            "core": {
                "build": {
                    "repository": "https://github.com/cluebotng/external-core.git",
                    "ref": "refs/tags/v0.0.2",
                    "use_latest_versions": true
                },
                "run": {
                    "command": "./cluebotng -l -m live_run",
                    "cpu": "0.5",
                    "emails": null,
                    "filelog": null,
                    "filelog_stderr": null,
                    "filelog_stdout": null,
                    "memory": null,
                    "mount": null,
                    "port": 3565,
                    "replicas": null,
                    "health_check_script": null,
                    "health_check_http": null
                },
                "component_type": "continuous"
            },
            "grafana-alloy": {
                "build": {
                    "repository": "https://github.com/cluebotng/external-grafana-alloy.git",
                    "ref": "refs/tags/v0.1.5",
                    "use_latest_versions": true
                },
                "run": {
                    "command": "run-alloy",
                    "cpu": "0.5",
                    "emails": null,
                    "filelog": null,
                    "filelog_stderr": null,
                    "filelog_stdout": null,
                    "memory": "0.5Gi",
                    "mount": "all",
                    "port": 8118,
                    "replicas": null,
                    "health_check_script": null,
                    "health_check_http": null
                },
                "component_type": "continuous"
            }
        }
    },
    "status": "successful",
    "long_status": "Finished at 2025-08-13 20:42:45.579403",
    "force_build": false,
    "force_run": false
}

Tag is not updated and build is re-used = latest config was not fetched?

Running toolforge components deployment create manually then updated the config as expected.

Event Timeline

@JJMC89 just FYI the link https://w.wiki/EYoq on https://wikitech.wikimedia.org/wiki/Help:Toolforge/Deploy_your_tool added the current iteration label, rather than the generic one... in case your change implies that was incorrect.

dcaro changed the task status from Open to In Progress.Aug 14 2025, 9:44 AM
dcaro claimed this task.
dcaro triaged this task as High priority.
dcaro edited projects, added Toolforge (Toolforge iteration 23); removed Toolforge.
dcaro moved this task from Next Up to In Progress on the Toolforge (Toolforge iteration 23) board.

This is weird, as both calls are to the same exact endpoint, so it's not likely a change in behavior between calls.

Has this happened again?

I'm thinking that it might be some caching involved (between the merge, and github serving the new file on the main ref http endpoint).

I'll do more local tests, but I have not been able to reproduce, I'll try also some tests using github (same process).

I haven't noticed it again, but also I haven't really been looking. There are a number of deploys over the last week that the logs could be checked for, but unfortunately I don't have time right now as I'm trying to get wrapped up for crossing back over the atlantic.

I wouldn't be surprised if there is caching in place and the purge just happened to be delayed slightly for some reason.

If the url could be passed a git repo, or be more dynamic (ala T401388) then the explicit (or resolved at latest) sha could be used which wouldn't matter if it was cached (e.g. https://raw.githubusercontent.com/cluebotng/component-configs/e7ca2ea5bb29d5d7583b0b286387374d8545f49a/cluebotng-staging.yaml rather than https://raw.githubusercontent.com/cluebotng/component-configs/refs/heads/main/cluebotng-staging.yaml).

Alternatively perhaps some parameterized string support to pass e.g. the deployment id into the source url as a cache busting mechanism (if supported by the remote).

Sorry, happened again....

Previous build from https://github.com/cluebotng/component-configs/actions/runs/17179445014/job/48739880611, triggered from https://github.com/cluebotng/component-configs/commit/f0a95292c4b0a952738219abff3fbc8522b4910d

tools.cluebotng-trainer@tools-bastion-13:~$ toolforge components deployment show 20250823-194435-equzziwtk2 --json
{
    "deploy_id": "20250823-194435-equzziwtk2",
    "creation_time": "20250823-194435",
    "builds": {
        "trainer": {
            "build_id": "cluebotng-trainer-buildpacks-pipelinerun-wxbbc",
            "build_status": "successful",
            "build_long_status": "You can see the logs with `toolforge build logs cluebotng-trainer-buildpacks-pipelinerun-wxbbc`"
        }
    },
    "runs": {
        "trainer": {
            "run_status": "successful",
            "run_long_status": "[info] (Job trainer created)"
        }
    },
    "tool_config": {
        "config_version": "v1beta1",
        "source_url": "https://raw.githubusercontent.com/cluebotng/component-configs/refs/heads/main/cluebotng-trainer.yaml",
        "components": {
            "trainer": {
                "build": {
                    "repository": "https://github.com/cluebotng/trainer.git",
                    "ref": "refs/tags/v1.0.6",
                    "use_latest_versions": true
                },
                "run": {
                    "command": "cbng-trainer run-edit-sets",
                    "cpu": null,
                    "emails": null,
                    "filelog": null,
                    "filelog_stderr": null,
                    "filelog_stdout": null,
                    "memory": null,
                    "mount": "none",
                    "retry": null,
                    "schedule": "13 */6 * * *",
                    "timeout": null
                },
                "component_type": "scheduled"
            }
        }
    },
    "status": "successful",
    "long_status": "Finished at 2025-08-23 19:46:20.345392",
    "force_build": false,
    "force_run": false
}

https://github.com/cluebotng/component-configs/actions/runs/17179544414/job/48740128290 triggered from https://github.com/cluebotng/component-configs/commit/1ed74821059dc9eff722a07a8a7da797a6e4d783

tools.cluebotng-trainer@tools-bastion-13:~$ toolforge components deployment show 20250823-195731-wq2edu3ujq --json
{
    "deploy_id": "20250823-195731-wq2edu3ujq",
    "creation_time": "20250823-195731",
    "builds": {
        "trainer": {
            "build_id": "cluebotng-trainer-buildpacks-pipelinerun-wxbbc",
            "build_status": "skipped",
            "build_long_status": "Reusing existing build"
        }
    },
    "runs": {
        "trainer": {
            "run_status": "successful",
            "run_long_status": "[info] (Job trainer is already up to date)"
        }
    },
    "tool_config": {
        "config_version": "v1beta1",
        "source_url": "https://raw.githubusercontent.com/cluebotng/component-configs/refs/heads/main/cluebotng-trainer.yaml",
        "components": {
            "trainer": {
                "build": {
                    "repository": "https://github.com/cluebotng/trainer.git",
                    "ref": "refs/tags/v1.0.6",
                    "use_latest_versions": true
                },
                "run": {
                    "command": "./deployment/entrypoint.sh run-edit-sets",
                    "cpu": null,
                    "emails": null,
                    "filelog": null,
                    "filelog_stderr": null,
                    "filelog_stdout": null,
                    "memory": null,
                    "mount": "none",
                    "retry": null,
                    "schedule": "13 */6 * * *",
                    "timeout": null
                },
                "component_type": "scheduled"
            }
        }
    },
    "status": "successful",
    "long_status": "Finished at 2025-08-23 19:57:33.706324",
    "force_build": false,
    "force_run": false
}

This endpoint is being cached:

via: 1.1 varnish
x-served-by: cache-lis1490020-LIS
x-cache: HIT

raw.githubusercontent.com is pointing at fastly:

$ dig +short -x 185.199.110.133 @ns1.fastly.host
133.http3.github.edgecdn.cloud.
133.http3.global.github.edgecdn.cloud.
m00m00:~ damian$ dig +short raw.githubusercontent.com
185.199.108.133
185.199.111.133
185.199.110.133
185.199.109.133

$ dig +short -x 185.199.110.133 @ns1.fastly.host
133.http3.github.edgecdn.cloud.
133.http3.global.github.edgecdn.cloud.

So this is probably the cache purge not making it in time...

I guess as a workaround I can add some sleep/loop/check, but since the cache is distributed that doesn't provide guarantees.

Basic cache busting does not appear to work:

$ for x in {1..3}; do curl -si https://raw.githubusercontent.com/cluebotng/component-configs/refs/heads/main/cluebotng-trainer.yaml | grep x-cache:; done
x-cache: HIT
x-cache: HIT
x-cache: HIT
$ for x in {1..3}; do curl -si 'https://raw.githubusercontent.com/cluebotng/component-configs/refs/heads/main/cluebotng-trainer.yaml?deployment='$(uuidgen) | grep x-cache:; done
x-cache: HIT
x-cache: HIT
x-cache: HIT
$ for x in {1..3}; do curl -si -H 'X-Deployment-Id: '$(uuidgen) 'https://raw.githubusercontent.com/cluebotng/component-configs/refs/heads/main/cluebotng-trainer.yaml' | grep x-cache:; done
x-cache: HIT
x-cache: HIT
x-cache: HIT

I see 2 reasonable paths forward:

  • Deploy API accepts sha and/or url which is then used in combination/place of source_url
  • Components support source_repo / source_path (maybe source_branch) in addition to source_url, which explicitly resolves the latest ref, which shouldn't be cached

For now I've basically replaced source_url with re-creating the config directly and then triggering a deploy - https://github.com/cluebotng/component-configs/compare/c285dcc662486ed6cba9b8c016f89655300af164...2a4b1dea1705c01553e558dc0c97caa873b261c8

This goes back to relying on SSH and has some side effects around how the secrets are managed, but until the API is easily usable externally or there is some solution here this is pretty much the only workable solution for now.

Example run using the above: https://github.com/cluebotng/component-configs/actions/runs/17190716103/job/48765956567

Deployment:

{
    "deploy_id": "20250824-154816-cwejvxwv88",
    "creation_time": "20250824-154816",
    "builds": {
        "bot": {
            "build_id": "cluebotng-staging-buildpacks-pipelinerun-5pnwv",
            "build_status": "skipped",
            "build_long_status": "Reusing existing build"
        },
        "core": {
            "build_id": "cluebotng-staging-buildpacks-pipelinerun-xls4z",
            "build_status": "skipped",
            "build_long_status": "Reusing existing build"
        },
        "grafana-alloy": {
            "build_id": "cluebotng-staging-buildpacks-pipelinerun-wbrcr",
            "build_status": "skipped",
            "build_long_status": "Reusing existing build"
        }
    },
    "runs": {
        "bot": {
            "run_status": "successful",
            "run_long_status": "[info] (Job bot is already up to date)"
        },
        "core": {
            "run_status": "successful",
            "run_long_status": "[info] (Job core is already up to date)"
        },
        "grafana-alloy": {
            "run_status": "successful",
            "run_long_status": "[info] (Job grafana-alloy is already up to date)"
        }
    },
    "tool_config": {
        "config_version": "v1beta1",
        "source_url": null,
        "components": {
            "bot": {
                "build": {
                    "repository": "https://github.com/cluebotng/botng.git",
                    "ref": "v0.0.39",
                    "use_latest_versions": true
                },
                "run": {
                    "command": "botng --irc-relay --debug",
                    "cpu": "0.5",
                    "emails": null,
                    "filelog": null,
                    "filelog_stderr": null,
                    "filelog_stdout": null,
                    "memory": "1Gi",
                    "mount": null,
                    "port": 8118,
                    "replicas": null,
                    "health_check_script": null,
                    "health_check_http": "/health"
                },
                "component_type": "continuous"
            },
            "core": {
                "build": {
                    "repository": "https://github.com/cluebotng/external-core.git",
                    "ref": "refs/tags/v0.0.2",
                    "use_latest_versions": true
                },
                "run": {
                    "command": "./cluebotng -l -m live_run",
                    "cpu": "0.5",
                    "emails": null,
                    "filelog": null,
                    "filelog_stderr": null,
                    "filelog_stdout": null,
                    "memory": "0.5Gi",
                    "mount": null,
                    "port": 3565,
                    "replicas": null,
                    "health_check_script": null,
                    "health_check_http": null
                },
                "component_type": "continuous"
            },
            "grafana-alloy": {
                "build": {
                    "repository": "https://github.com/cluebotng/external-grafana-alloy.git",
                    "ref": "refs/tags/v0.1.7",
                    "use_latest_versions": true
                },
                "run": {
                    "command": "run-alloy",
                    "cpu": "0.5",
                    "emails": null,
                    "filelog": null,
                    "filelog_stderr": null,
                    "filelog_stdout": null,
                    "memory": "0.5Gi",
                    "mount": "all",
                    "port": 8118,
                    "replicas": null,
                    "health_check_script": null,
                    "health_check_http": "/health"
                },
                "component_type": "continuous"
            }
        }
    },
    "status": "successful",
    "long_status": "Finished at 2025-08-24 15:48:22.556093",
    "force_build": false,
    "force_run": false
}

Components support source_repo / source_path (maybe source_branch) in addition to source_url, which explicitly resolves the latest ref, which shouldn't be cached

I think this might be the solution for now, should be easy to implement, future-proof, as secure as the source_url (no ref injection). Note that this is going to have to work using the git protocol directly, because if we use http (ex. gh raw file), then we will probably end up with the same issue xd

Components support source_repo / source_path (maybe source_branch) in addition to source_url, which explicitly resolves the latest ref, which shouldn't be cached

I think this might be the solution for now, should be easy to implement, future-proof, as secure as the source_url (no ref injection). Note that this is going to have to work using the git protocol directly, because if we use http (ex. gh raw file), then we will probably end up with the same issue xd

I think git over https should also not be cached in this way - the GitHub API is also not cached, but returns base64 in json, which is a bit too specific for this use case.

fnegri changed the task status from In Progress to Open.Jan 13 2026, 5:45 PM
fnegri removed dcaro as the assignee of this task.
fnegri changed the subtype of this task from "Task" to "Bug Report".