File Metadata
File Metadata
- Mime Type
- image/png
- Attributes
- Image
- Storage Engine
- blob
- Storage Format
- Raw Data
- Storage Handle
- 16690340
- Default Alt Text
- Initial user-measured magru latency as violin plots, per country, Latin/South America (6×755 px, 421 KB)
Event Timeline
Comment Actions
import wmfdata spark = wmfdata.spark.create_session(type='yarn-regular') import pyspark.sql.functions as F from pyspark.sql.functions import col # All latency measurements since magru enabled for measurements st = (spark.table('event.development_network_probe') .where('year=2024 and month=05 and ((day=2 and hour>=18) or (day=3))')) # Each probe pulse is nested deeply inside, explode them out explode = (st.withColumn('report', F.explode('reports')) .withColumn('probe', F.explode('report.pulses')) .where('probe.pulse_number==2') .select(col('report.target_name'), col('geocoded_data'), col('probe.request_time_ms'))) # Then filter down to Central/South America, and only compare DCs in the Americas eeee = (explode.where( (col('geocoded_data.continent')=='South America') | col('geocoded_data.country_code').isin(['MX','GT','BZ','SV','HN','NI','CR','PA',])) .where(col('target_name').isin(['codfw','eqiad','ulsfo','magru']))) # Scraped from geo-maps current_mappings = {'MX': 'codfw', 'BR': 'eqiad', 'GT': 'eqiad', 'VE': 'eqiad', 'CR': 'eqiad', 'UY': 'eqiad', 'CL': 'eqiad', 'NI': 'eqiad', 'HN': 'eqiad', 'AR': 'eqiad', 'SV': 'eqiad', 'PY': 'eqiad', 'SR': 'eqiad', 'GF': 'eqiad', 'BO': 'eqiad', 'PE': 'eqiad', 'FK': 'eqiad', 'GY': 'eqiad', 'EC': 'eqiad', 'PA': 'eqiad', 'BZ': 'eqiad', 'CO': 'eqiad'} # Time to plot pandas_df = eeee.select('target_name', 'geocoded_data.country_code', 'request_time_ms').toPandas() import seaborn as sns import matplotlib.pyplot as plt sample_sizes = pandas_df['country_code'].value_counts().to_dict() num_countries = len(sample_sizes) g = sns.catplot( y='country_code', x='request_time_ms', hue='target_name', data=pandas_df, kind='violin', height=3*num_countries, aspect=2/num_countries, split=False, ) plt.title('2024-05-03 LatAm RTT by Target PoP and Country Code') plt.xlabel('rtt (ms)') plt.ylabel('Country code') for ax in g.axes.flat: for c in ax.collections: # ax.collections contains the PolyCollections (violin plots) c.set_edgecolor(c.get_facecolor()) # Set edge color to match face color # Adjust y-axis labels to include sample sizes and current mappings ax = g.facet_axis(0, 0) labels = [item.get_text() for item in ax.get_yticklabels()] new_labels = [f"{label} (n={sample_sizes[label]}) --> {current_mappings[label]}" if label in sample_sizes else label for label in labels] ax.set_yticklabels(new_labels) # Dotted grid all the way down the graph ax.grid(True, which='major', linestyle='--', linewidth='1', color='gray', axis='x') ax.secondary_xaxis('top') # Adjust the legend sns.move_legend(g, 'upper left') # only show 0..1000 ms because there is a long long tail g.set(xlim=(0,1000)) # Show plot plt.show()