Page MenuHomePhabricator

Initial user-measured magru latency as violin plots, per country, Latin/South America

Authored By
CDanis
May 3 2024, 12:54 PM
Size
421 KB
Dimensions
755px × 6,354px
Referenced Files
None
Subscribers
None

Initial user-measured magru latency as violin plots, per country, Latin/South America

Initial user-measured magru latency as violin plots, per country, Latin/South America (6×755 px, 421 KB)

File Metadata

Mime Type
image/png
Attributes
Image
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16690340
Default Alt Text
Initial user-measured magru latency as violin plots, per country, Latin/South America (6×755 px, 421 KB)

Event Timeline

CDanis updated the name for this file from "image.png" to "Initial user-measured magru latency, per country, Latin/South America".May 3 2024, 12:55 PM
CDanis updated the name for this file from "Initial user-measured magru latency, per country, Latin/South America" to "Initial user-measured magru latency as violin plots, per country, Latin/South America".May 3 2024, 1:00 PM
import wmfdata
spark = wmfdata.spark.create_session(type='yarn-regular')

import pyspark.sql.functions as F
from pyspark.sql.functions import col

# All latency measurements since magru enabled for measurements
st = (spark.table('event.development_network_probe')
           .where('year=2024 and month=05 and ((day=2 and hour>=18) or (day=3))'))

# Each probe pulse is nested deeply inside, explode them out
explode = (st.withColumn('report', F.explode('reports'))
   .withColumn('probe', F.explode('report.pulses'))
   .where('probe.pulse_number==2')
   .select(col('report.target_name'), col('geocoded_data'), col('probe.request_time_ms')))

# Then filter down to Central/South America, and only compare DCs in the Americas
eeee = (explode.where(
            (col('geocoded_data.continent')=='South America')
             | col('geocoded_data.country_code').isin(['MX','GT','BZ','SV','HN','NI','CR','PA',]))
       .where(col('target_name').isin(['codfw','eqiad','ulsfo','magru'])))

# Scraped from geo-maps
current_mappings = {'MX': 'codfw', 'BR': 'eqiad', 'GT': 'eqiad', 'VE': 'eqiad', 'CR': 'eqiad', 'UY': 'eqiad', 'CL': 'eqiad', 'NI': 'eqiad', 'HN': 'eqiad', 'AR': 'eqiad', 'SV': 'eqiad', 'PY': 'eqiad', 'SR': 'eqiad', 'GF': 'eqiad', 'BO': 'eqiad', 'PE': 'eqiad', 'FK': 'eqiad', 'GY': 'eqiad', 'EC': 'eqiad', 'PA': 'eqiad', 'BZ': 'eqiad', 'CO': 'eqiad'}


# Time to plot
pandas_df = eeee.select('target_name', 'geocoded_data.country_code', 'request_time_ms').toPandas()

import seaborn as sns
import matplotlib.pyplot as plt

sample_sizes = pandas_df['country_code'].value_counts().to_dict()
num_countries = len(sample_sizes)

g = sns.catplot(
    y='country_code', x='request_time_ms', hue='target_name',
    data=pandas_df,
    kind='violin',
    height=3*num_countries,
    aspect=2/num_countries,
    split=False,
)
plt.title('2024-05-03 LatAm RTT by Target PoP and Country Code')
plt.xlabel('rtt (ms)')
plt.ylabel('Country code')


for ax in g.axes.flat:
    for c in ax.collections:  # ax.collections contains the PolyCollections (violin plots)
        c.set_edgecolor(c.get_facecolor())  # Set edge color to match face color


# Adjust y-axis labels to include sample sizes and current mappings
ax = g.facet_axis(0, 0)
labels = [item.get_text() for item in ax.get_yticklabels()]
new_labels = [f"{label} (n={sample_sizes[label]}) --> {current_mappings[label]}" if label in sample_sizes else label for label in labels]
ax.set_yticklabels(new_labels)

# Dotted grid all the way down the graph
ax.grid(True, which='major', linestyle='--', linewidth='1', color='gray', axis='x')
ax.secondary_xaxis('top')

# Adjust the legend
sns.move_legend(g, 'upper left')

# only show 0..1000 ms because there is a long long tail
g.set(xlim=(0,1000))
# Show plot
plt.show()