Part of T358707.
In this task, we want to get tables created in the production Cassandra instance so that we can start testing our data loading HQL.
Context:
- Commons Impact Metrics is an effort to support the GLAM community see the impact of their contributions.
- We have a set of 6 Iceberg tables that live in the Datalake.
- These 6 tables will be transformed into 14 Cassandra tables optimized for serving the data. ([[ https://docs.google.com/document/d/1sWPJzO9J6nwfhzJAwrbWZQBcvwiT8UhBBfn2kFW3pNQ/edit#heading=h.trp207kw0b93 | Serving Layer Design]])
- A set of 14 AQS endpoints will receive HTTP requests, query Cassandra and serve the data. ([[ https://docs.google.com/document/d/1sWPJzO9J6nwfhzJAwrbWZQBcvwiT8UhBBfn2kFW3pNQ/edit#heading=h.wrb2efe43iw5 | API design ]])
As per the Serving Layer Design, we have drafted the Cassandra DDL below.
```
CREATE KEYSPACE commons
WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : '1' }; -- this needs to be changed to whatever prod settings we typically use.
CREATE TABLE commons.category_metrics_snapshot (
category VARCHAR,
media_file_count BIGINT,
media_file_count_deep BIGINT,
used_media_file_count BIGINT,
used_media_file_count_deep BIGINT,
leveraging_wiki_count BIGINT,
leveraging_wiki_count_deep BIGINT,
leveraging_page_count BIGINT,
leveraging_page_count_deep BIGINT,
month TIMESTAMP,
PRIMARY KEY (category, month)
);
CREATE TABLE commons.media_file_metrics_snapshot (
media_file VARCHAR,
leveraging_wiki_count BIGINT,
leveraging_page_count BIGINT,
month TIMESTAMP,
PRIMARY KEY (media_file, month)
);
CREATE TABLE commons.pageviews_by_category (
category VARCHAR,
category_scope VARCHAR,
wiki VARCHAR,
page_title VARCHAR,
pageview_count BIGINT,
month TIMESTAMP,
PRIMARY KEY ((category, category_scope, wiki, page_title), month)
);
CREATE TABLE commons.pageviews_by_media_file (
media_file VARCHAR,
wiki VARCHAR,
page_title VARCHAR,
pageview_count BIGINT,
month TIMESTAMP,
PRIMARY KEY ((media_file, wiki, page_title), month)
);
CREATE TABLE commons.edits_by_category (
category VARCHAR,
category_scope VARCHAR,
edit_type VARCHAR,
edit_count BIGINT,
month TIMESTAMP,
PRIMARY KEY ((category, category_scope, edit_type), month)
);
CREATE TABLE commons.edits_by_user (
user_name VARCHAR,
edit_type VARCHAR,
edit_count BIGINT,
month TIMESTAMP,
PRIMARY KEY ((user_name, edit_type), month)
);
CREATE TABLE commons.top_pages_by_category (
category VARCHAR,
category_scope VARCHAR,
wiki VARCHAR,
top_data TEXT,
year INT,
month INT,
PRIMARY KEY ((category, category_scope, wiki, year, month))
);
CREATE TABLE commons.top_wikis_by_category (
category VARCHAR,
category_scope VARCHAR,
top_data TEXT,
year INT,
month INT,
PRIMARY KEY ((category, category_scope, year, month))
);
CREATE TABLE commons.top_viewed_categories (
category_scope VARCHAR,
wiki VARCHAR,
top_data TEXT,
year INT,
month INT,
PRIMARY KEY ((category_scope, wiki, year, month))
);
CREATE TABLE commons.top_pages_by_media_file (
media_file VARCHAR,
wiki VARCHAR,
top_data TEXT,
year INT,
month INT,
PRIMARY KEY ((media_file, wiki, year, month))
);
CREATE TABLE commons.top_wikis_by_media_file (
media_file VARCHAR,
top_data TEXT,
year INT,
month INT,
PRIMARY KEY ((media_file, year, month))
);
CREATE TABLE commons.top_viewed_media_files (
category VARCHAR,
category_scope VARCHAR,
wiki VARCHAR,
top_data TEXT,
year INT,
month INT,
PRIMARY KEY ((category, category_scope, wiki, year, month))
);
CREATE TABLE commons.top_edited_categories (
category_scope VARCHAR,
edit_type VARCHAR,
top_data TEXT,
year INT,
month INT,
PRIMARY KEY ((category_scope, edit_type, year, month))
);
CREATE TABLE commons.top_editors (
category VARCHAR,
category_scope VARCHAR,
edit_type VARCHAR,
top_data TEXT,
year INT,
month INT,
PRIMARY KEY ((category, category_scope, edit_type, year, month))
);
```