Load the table with language data from CSV files.
Description
Description
| Status | Subtype | Assigned | Task | ||
|---|---|---|---|---|---|
| Open | None | T305468 Deploy the GDI Equity Landscape Dashboard | |||
| Open | None | T305475 Milestone: Ingest and Transform Input Data | |||
| Resolved | • ntsako | T315886 Load language data | |||
| Resolved | KCVelaga_WMF | T336047 Fix average_monthly_editors metric in Equity Landscape |
Event Timeline
Comment Actions
Loaded
SELECT * FROM ntsako.brief_projects_edited_metrics WHERE year=2021 SELECT * FROM ntsako.official_language_metrics SELECT * FROM ntsako.unesco_endangered_lang_metrics WHERE year=2021
Comment Actions
SELECT * FROM ntsako.brief_projects_edited_metrics WHERE year=2021 SELECT * FROM ntsako.unesco_endangered_lang_metrics WHERE year=2021
Take the above to production.
Comment Actions
Note that the active projects are those which are edited by at least 1% and 5 or more editors in the region each month on average during
Comment Actions
query
WITH
monthly_avg AS (
SELECT
country_code,
ROUND(AVG(distinct_editors)) AS average_monthly_acitve_editors,
wiki_db
FROM
wmf.geoeditors_monthly
WHERE
YEAR(month) = 2021
GROUP BY
country_code,
wiki_db),
monthly_sum AS (
SELECT
country_code,
SUM(average_monthly_acitve_editors) AS total_editors
FROM
monthly_avg
GROUP BY
country_code),
projects_edited AS (
SELECT
avg.country_code,
average_monthly_acitve_editors,
ROUND(average_monthly_acitve_editors / total_editors * 100, 2) AS proportion,
wiki_db
FROM
monthly_avg avg
LEFT JOIN
monthly_sum sum
ON avg.country_code = sum.country_code),
brief_projects_edited AS (
SELECT
country_code,
cdc.name AS country_name,
average_monthly_acitve_editors,
proportion,
wiki_db,
english_name AS project_label,
language_name AS language
FROM
projects_edited pe
LEFT JOIN
canonical_data.wikis cdw
ON pe.wiki_db = cdw.database_code
LEFT JOIN
canonical_data.countries cdc
ON pe.country_code = cdc.iso_code
WHERE
average_monthly_acitve_editors >= 5 AND
proportion >= 1)
SELECT *
FROM brief_projects_editedthe last WHERE statement can be removed for a non-filtered version
Comment Actions
Table loaded with the below query so as to use our country meta data table instead of canonical countries table.
WITH
all_countries AS (
SELECT DISTINCT *
FROM
(
SELECT '--' AS country_code, 'Unknown' AS name
UNION ALL
SELECT country_code_iso_2 AS country_code, canonical_country_name
FROM ntsako.country_meta_data
) AS countries
),
monthly_avg AS (
SELECT country_code,
ROUND(AVG(distinct_editors)) AS average_monthly_acitve_editors,
wiki_db
FROM wmf.geoeditors_monthly
WHERE month like '2021-%'
GROUP BY country_code, wiki_db
),
monthly_sum AS (
SELECT country_code,
SUM(average_monthly_acitve_editors) AS total_editors
FROM monthly_avg
GROUP BY country_code
),
projects_edited AS (
SELECT monthly_avg.country_code,
monthly_avg.average_monthly_acitve_editors,
ROUND(monthly_avg.average_monthly_acitve_editors / monthly_sum.total_editors * 100, 2) AS proportion,
monthly_avg.wiki_db
FROM monthly_avg
LEFT JOIN monthly_sum ON (monthly_avg.country_code = monthly_sum.country_code)
),
brief_projects_edited AS (
SELECT projects_edited.country_code,
countries.name AS country_name,
projects_edited.average_monthly_acitve_editors,
projects_edited.proportion,
projects_edited.wiki_db,
wikis.english_name AS project_label,
wikis.language_name AS language
FROM projects_edited
LEFT JOIN canonical_data.wikis wikis ON (projects_edited.wiki_db = wikis.database_code)
LEFT JOIN all_countries countries ON (projects_edited.country_code = countries.country_code)
)
SELECT country_code,
country_name,
average_monthly_acitve_editors,
proportion,
wiki_db,
project_label,
language
FROM brief_projects_edited