Part of T358707.
In this task, we want to get tables created in the production Cassandra instance so that we can start testing our data loading HQL.
Context:
- Commons Impact Metrics is an effort to support the GLAM community see the impact of their contributions.
- We have a set of 6 Iceberg tables that live in the Datalake.
- These 6 tables will be transformed into 14 Cassandra tables optimized for serving the data. (Serving Layer Design)
- A set of 14 AQS endpoints will receive HTTP requests, query Cassandra and serve the data. (API design)
As per the Serving Layer Design, we have drafted the Cassandra DDL below.
CREATE KEYSPACE commons WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : '1' }; -- this needs to be changed to whatever prod settings we typically use. CREATE TABLE commons.category_metrics_snapshot ( category VARCHAR, media_file_count BIGINT, media_file_count_deep BIGINT, used_media_file_count BIGINT, used_media_file_count_deep BIGINT, leveraging_wiki_count BIGINT, leveraging_wiki_count_deep BIGINT, leveraging_page_count BIGINT, leveraging_page_count_deep BIGINT, month TIMESTAMP, PRIMARY KEY (category, month) ); CREATE TABLE commons.media_file_metrics_snapshot ( media_file VARCHAR, leveraging_wiki_count BIGINT, leveraging_page_count BIGINT, month TIMESTAMP, PRIMARY KEY (media_file, month) ); CREATE TABLE commons.pageviews_by_category ( category VARCHAR, category_scope VARCHAR, wiki VARCHAR, pageview_count BIGINT, month TIMESTAMP, PRIMARY KEY ((category, category_scope, wiki), month) ); CREATE TABLE commons.pageviews_by_media_file ( media_file VARCHAR, wiki VARCHAR, pageview_count BIGINT, month TIMESTAMP, PRIMARY KEY ((media_file, wiki), month) ); CREATE TABLE commons.edits_by_category ( category VARCHAR, category_scope VARCHAR, edit_type VARCHAR, edit_count BIGINT, month TIMESTAMP, PRIMARY KEY ((category, category_scope, edit_type), month) ); CREATE TABLE commons.edits_by_user ( user_name VARCHAR, edit_type VARCHAR, edit_count BIGINT, month TIMESTAMP, PRIMARY KEY ((user_name, edit_type), month) ); CREATE TABLE commons.top_pages_by_category ( category VARCHAR, category_scope VARCHAR, wiki VARCHAR, top_data TEXT, year INT, month INT, PRIMARY KEY ((category, category_scope, wiki, year, month)) ); CREATE TABLE commons.top_wikis_by_category ( category VARCHAR, category_scope VARCHAR, top_data TEXT, year INT, month INT, PRIMARY KEY ((category, category_scope, year, month)) ); CREATE TABLE commons.top_viewed_categories ( category_scope VARCHAR, wiki VARCHAR, top_data TEXT, year INT, month INT, PRIMARY KEY ((category_scope, wiki, year, month)) ); CREATE TABLE commons.top_pages_by_media_file ( media_file VARCHAR, wiki VARCHAR, top_data TEXT, year INT, month INT, PRIMARY KEY ((media_file, wiki, year, month)) ); CREATE TABLE commons.top_wikis_by_media_file ( media_file VARCHAR, top_data TEXT, year INT, month INT, PRIMARY KEY ((media_file, year, month)) ); CREATE TABLE commons.top_viewed_media_files ( category VARCHAR, category_scope VARCHAR, wiki VARCHAR, top_data TEXT, year INT, month INT, PRIMARY KEY ((category, category_scope, wiki, year, month)) ); CREATE TABLE commons.top_edited_categories ( category_scope VARCHAR, edit_type VARCHAR, top_data TEXT, year INT, month INT, PRIMARY KEY ((category_scope, edit_type, year, month)) ); CREATE TABLE commons.top_editors ( category VARCHAR, category_scope VARCHAR, edit_type VARCHAR, top_data TEXT, year INT, month INT, PRIMARY KEY ((category, category_scope, edit_type, year, month)) );