Page MenuHomePhabricator

Access output metrics
Closed, ResolvedPublic

Event Timeline

KCVelaga_WMF claimed this task.
KCVelaga_WMF moved this task from Backlog to In QA Review on the Equity-Landscape board.
KCVelaga_WMF added a subscriber: ntsako.
KCVelaga_WMF added a subscriber: JAnstee_WMF.

@JAnstee_WMF Access outputs QC is complete (sheet). Please review and sign-off.

@KCVelaga_WMF @ntsako Signing off on the data QA with the caveat that we need to relabel one change to the output labels FROM: access_presence_growth
TO: access

final query (outputs verified)

WITH
    access_inputs_hstack AS (
        SELECT
            geography_level,
            country_code_iso_2 AS country_code,
            COALESCE(internet_user_access, 0) AS internet_user_access,
            COALESCE(mobile_subscriptions, 0) AS mobile_subscriptions,
            COALESCE(access_to_basic_knowledge, 0) AS access_to_basic_knowledge,
            COALESCE(annual_connectivity_index_growth, 0) AS annual_connectivity_index_growth,
            COALESCE(annual_internet_growth, 0) AS annual_internet_growth,
            COALESCE(access_to_information, 0) AS access_to_information
        FROM
            input_metrics),
    
    access_inputs_vstack AS (
        SELECT
            geography_level,
            country_code,
            STACK(6, 
                    internet_user_access, 'internet_user_access', 
                    mobile_subscriptions, 'mobile_subscriptions',
                    access_to_basic_knowledge, 'access_to_basic_knowledge',
                    annual_connectivity_index_growth, 'annual_connectivity_index_growth',
                    annual_internet_growth, 'annual_internet_growth',
                    access_to_information, 'access_to_information')
                        AS (metric_value, metric)
        FROM
            access_inputs_hstack),
    
    access_input_ranks AS (
        SELECT
            geography_level,
            country_code,
            metric,
            PERCENT_RANK() OVER(
                PARTITION BY geography_level, metric
                ORDER BY metric_value) AS p_rank
        FROM 
            access_inputs_vstack),
        
    presence AS (
        SELECT
            geography_level,
            country_code,
            ROUND(
                PERCENT_RANK() OVER (
                    PARTITION BY geography_level
                    ORDER BY AVG(p_rank))*10, 2) AS metric_value,
            'access_presence' AS metric,
            AVG(p_rank) AS avg_presence_rank_input
        FROM 
            access_input_ranks
        WHERE
            metric IN ('internet_user_access', 'mobile_subscriptions',
                        'access_to_basic_knowledge', 'access_to_information')
        GROUP BY geography_level, country_code),
    
    growth AS (
        SELECT
            geography_level,
            country_code,
            ROUND(
                PERCENT_RANK() OVER (
                    PARTITION BY geography_level
                    ORDER BY AVG(p_rank))*10, 2) AS metric_value,
            'access_growth' AS metric
        FROM 
            access_input_ranks
        WHERE
            metric IN ('annual_internet_growth', 'annual_connectivity_index_growth')
        GROUP BY geography_level, country_code),
    
    access_pg_input AS (
        SELECT
            ip.geography_level,
            ip.country_code,
            avg_growth*avg_presence_rank_input AS average_growth_rate
        FROM (
            SELECT
                geography_level,
                country_code,
                AVG(metric_value) AS avg_growth
            FROM
                access_inputs_vstack
            WHERE 
                metric IN ('annual_internet_growth', 'annual_connectivity_index_growth')
            GROUP BY 
                geography_level, country_code) ip
        JOIN
            presence p
            ON ip.geography_level = p.geography_level AND ip.country_code = p.country_code),
    
    access_final AS (
        SELECT
            geography_level,
            country_code,
            ROUND(
                PERCENT_RANK() OVER (
                    PARTITION BY geography_level
                    ORDER BY average_growth_rate)*10, 2) AS metric_value,
            'access' AS metric
        FROM 
            access_pg_input)
        
SELECT
    geography_level,
    country_code,
    metric_value,
    metric
FROM 
    presence
UNION ALL
SELECT * FROM growth
UNION ALL
SELECT * FROM access_final