Page MenuHomePhabricator

Migrate revisions and restrictions from legacy to new storage
Closed, ResolvedPublic

Description

Migrate revisions and restrictions from legacy storage (Cassandra 2.x), to the new strategy and cluster (Cassandra 3.x).

Event Timeline

Eevans created this task.Oct 31 2017, 5:01 PM

Script used to generate the table creation statements:

1"use strict";
2
3
4const crypto = require('crypto');
5const colors = require('colors/safe');
6const P = require('bluebird');
7const yargs = require('yargs');
8
9const green = colors.green;
10const red = colors.red;
11const yellow = colors.yellow;
12
13
14function hashKey(key) {
15 return new crypto.Hash('sha1')
16 .update(key)
17 .digest()
18 .toString('base64')
19 // Replace [+/] from base64 with _ (illegal in Cassandra)
20 .replace(/[+/]/g, '_')
21 // Remove base64 padding, has no entropy
22 .replace(/=+$/, '');
23}
24
25
26function getValidPrefix(key) {
27 const prefixMatch = /^[a-zA-Z0-9_]+/.exec(key);
28 if (prefixMatch) {
29 return prefixMatch[0];
30 } else {
31 return '';
32 }
33}
34
35
36function makeValidKey(key, length) {
37 const origKey = key;
38 key = key.replace(/_/g, '__')
39 .replace(/\./g, '_');
40 if (!/^[a-zA-Z0-9_]+$/.test(key)) {
41 // Create a new 28 char prefix
42 const validPrefix = getValidPrefix(key).substr(0, length * 2 / 3);
43 return validPrefix + hashKey(origKey).substr(0, length - validPrefix.length);
44 } else if (key.length > length) {
45 return key.substr(0, length * 2 / 3) + hashKey(origKey).substr(0, length / 3);
46 } else {
47 return key;
48 }
49}
50
51
52function keyspaceName(name, table) {
53 const reversedName = name.toLowerCase().split('.').reverse().join('.');
54 const prefix = makeValidKey(reversedName, Math.max(26, 48 - table.length - 3));
55 // 6 chars _hash_ to prevent conflicts between domains & table names
56 const res = `${prefix}_T_${makeValidKey(table, 48 - prefix.length - 3)}`;
57 return res;
58}
59
60
61function cassID(name) {
62 if (/^[a-zA-Z0-9_]+$/.test(name)) {
63 return `"${name}"`;
64 } else {
65 return `"${name.replace(/"/g, '""')}"`;
66 }
67}
68
69const tables = {
70 'page_restrictions': 'blob'
71};
72
73const storages = [
74 'enwiki',
75 'commons',
76 'wikipedia',
77 'others'
78];
79
80const qKs = `CREATE KEYSPACE IF NOT EXISTS <keyspace> WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;`;
81
82const qMeta = `CREATE TABLE IF NOT EXISTS <keyspace>.meta (
83 key text PRIMARY KEY,
84 value text
85) WITH bloom_filter_fp_chance = 0.1
86 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
87 AND comment = ''
88 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
89 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
90 AND crc_check_chance = 1.0
91 AND dclocal_read_repair_chance = 0.1
92 AND default_time_to_live = 0
93 AND gc_grace_seconds = 864000
94 AND max_index_interval = 2048
95 AND memtable_flush_period_in_ms = 0
96 AND min_index_interval = 128
97 AND read_repair_chance = 0.0
98 AND speculative_retry = '99PERCENTILE';`;
99
100const qData = `CREATE TABLE <keyspace>.data (
101 "_domain" text,
102 title text,
103 rev int,
104 page_deleted int static,
105 redirect text,
106 restrictions set<text>,
107 PRIMARY KEY (("_domain", title), rev)
108) WITH CLUSTERING ORDER BY (rev DESC)
109 AND bloom_filter_fp_chance = 0.01
110 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
111 AND comment = ''
112 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
113 AND compression = {'chunk_length_in_kb': '256', 'class': 'org.apache.cassandra.io.compress.DeflateCompressor'}
114 AND crc_check_chance = 1.0
115 AND dclocal_read_repair_chance = 0.1
116 AND default_time_to_live = 0
117 AND gc_grace_seconds = 86400
118 AND max_index_interval = 2048
119 AND memtable_flush_period_in_ms = 0
120 AND min_index_interval = 128
121 AND read_repair_chance = 0.0
122 AND speculative_retry = '99PERCENTILE';`;
123
124const qDataRevision = `CREATE TABLE IF NOT EXISTS <keyspace>.data (
125 "_domain" text,
126 key text,
127 ts timestamp,
128 rev int,
129 PRIMARY KEY (("_domain", key), ts)
130) WITH CLUSTERING ORDER BY (ts DESC)
131 AND bloom_filter_fp_chance = 0.1
132 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
133 AND comment = ''
134 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
135 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
136 AND crc_check_chance = 1.0
137 AND dclocal_read_repair_chance = 0.1
138 AND default_time_to_live = 864000
139 AND gc_grace_seconds = 864000
140 AND max_index_interval = 2048
141 AND memtable_flush_period_in_ms = 0
142 AND min_index_interval = 128
143 AND read_repair_chance = 0.0
144 AND speculative_retry = '99PERCENTILE';`;
145
146const qDataRender = `CREATE TABLE IF NOT EXISTS <keyspace>.data (
147 "_domain" text,
148 key text,
149 rev int,
150 ts timestamp,
151 tid timeuuid,
152 PRIMARY KEY (("_domain", key), rev, ts)
153) WITH CLUSTERING ORDER BY (rev DESC, ts DESC)
154 AND bloom_filter_fp_chance = 0.1
155 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
156 AND comment = ''
157 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
158 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
159 AND crc_check_chance = 1.0
160 AND dclocal_read_repair_chance = 0.1
161 AND default_time_to_live = 864000
162 AND gc_grace_seconds = 864000
163 AND max_index_interval = 2048
164 AND memtable_flush_period_in_ms = 0
165 AND min_index_interval = 128
166 AND read_repair_chance = 0.0
167 AND speculative_retry = '99PERCENTILE';`;
168
169const qKvData = `CREATE TABLE IF NOT EXISTS <keyspace>.data (
170 "_domain" text,
171 key text,
172 tid timeuuid,
173 headers blob,
174 value <type>,
175 PRIMARY KEY (("_domain", key))
176) WITH bloom_filter_fp_chance = 0.01
177 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
178 AND comment = ''
179 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
180 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
181 AND crc_check_chance = 1.0
182 AND dclocal_read_repair_chance = 0.1
183 AND default_time_to_live = 0
184 AND gc_grace_seconds = 86400
185 AND max_index_interval = 2048
186 AND memtable_flush_period_in_ms = 0
187 AND min_index_interval = 128
188 AND read_repair_chance = 0.0
189 AND speculative_retry = '99PERCENTILE';`;
190
191
192const argv = yargs.usage('Usage: $0 [options]')
193.options('h', {alias: 'help'})
194 .argv;
195
196if (argv.help) {
197 yargs.showHelp();
198 process.exit(0);
199}
200
201
202return P.each(storages, (storage) => {
203 return P.each(Object.keys(tables), (table) => {
204 const formatted = (query, keyspace, type) => {
205 let output = query.replace('<keyspace>', keyspace);
206 if (type)
207 output = output.replace('<type>', type);
208 output.split('\n').forEach((line) => {
209 console.log(` ${line}`);
210 });
211 };
212
213 const keyspace = cassID(keyspaceName(storage, table));
214
215 console.log(`${keyspace.replace(/"/g, '')}:`);
216 console.log(` statement: |`);
217 formatted(qKs, keyspace);
218 console.log(`${keyspace.replace(/"/g, '')}_meta:`);
219 console.log(` statement: |`);
220 formatted(qMeta, keyspace);
221 console.log(`${keyspace.replace(/"/g, '')}_data:`);
222 console.log(` qKvData statement: |`);
223 formatted(qData, keyspace, tables[table]);
224 console.log('\n# -----\n');
225 });
226});

the output:

1enwiki_T_page__restrictions:
2 statement: |
3 CREATE KEYSPACE IF NOT EXISTS "enwiki_T_page__restrictions" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
4enwiki_T_page__restrictions_meta:
5 statement: |
6 CREATE TABLE IF NOT EXISTS "enwiki_T_page__restrictions".meta (
7 key text PRIMARY KEY,
8 value text
9 ) WITH bloom_filter_fp_chance = 0.1
10 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
11 AND comment = ''
12 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
13 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
14 AND crc_check_chance = 1.0
15 AND dclocal_read_repair_chance = 0.1
16 AND default_time_to_live = 0
17 AND gc_grace_seconds = 864000
18 AND max_index_interval = 2048
19 AND memtable_flush_period_in_ms = 0
20 AND min_index_interval = 128
21 AND read_repair_chance = 0.0
22 AND speculative_retry = '99PERCENTILE';
23enwiki_T_page__restrictions_data:
24 qKvData statement: |
25 CREATE TABLE "enwiki_T_page__restrictions".data (
26 "_domain" text,
27 title text,
28 rev int,
29 page_deleted int static,
30 redirect text,
31 restrictions set<text>,
32 PRIMARY KEY (("_domain", title), rev)
33 ) WITH CLUSTERING ORDER BY (rev DESC)
34 AND bloom_filter_fp_chance = 0.01
35 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
36 AND comment = ''
37 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
38 AND compression = {'chunk_length_in_kb': '256', 'class': 'org.apache.cassandra.io.compress.DeflateCompressor'}
39 AND crc_check_chance = 1.0
40 AND dclocal_read_repair_chance = 0.1
41 AND default_time_to_live = 0
42 AND gc_grace_seconds = 86400
43 AND max_index_interval = 2048
44 AND memtable_flush_period_in_ms = 0
45 AND min_index_interval = 128
46 AND read_repair_chance = 0.0
47 AND speculative_retry = '99PERCENTILE';
48
49# -----
50
51commons_T_page__restrictions:
52 statement: |
53 CREATE KEYSPACE IF NOT EXISTS "commons_T_page__restrictions" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
54commons_T_page__restrictions_meta:
55 statement: |
56 CREATE TABLE IF NOT EXISTS "commons_T_page__restrictions".meta (
57 key text PRIMARY KEY,
58 value text
59 ) WITH bloom_filter_fp_chance = 0.1
60 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
61 AND comment = ''
62 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
63 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
64 AND crc_check_chance = 1.0
65 AND dclocal_read_repair_chance = 0.1
66 AND default_time_to_live = 0
67 AND gc_grace_seconds = 864000
68 AND max_index_interval = 2048
69 AND memtable_flush_period_in_ms = 0
70 AND min_index_interval = 128
71 AND read_repair_chance = 0.0
72 AND speculative_retry = '99PERCENTILE';
73commons_T_page__restrictions_data:
74 qKvData statement: |
75 CREATE TABLE "commons_T_page__restrictions".data (
76 "_domain" text,
77 title text,
78 rev int,
79 page_deleted int static,
80 redirect text,
81 restrictions set<text>,
82 PRIMARY KEY (("_domain", title), rev)
83 ) WITH CLUSTERING ORDER BY (rev DESC)
84 AND bloom_filter_fp_chance = 0.01
85 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
86 AND comment = ''
87 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
88 AND compression = {'chunk_length_in_kb': '256', 'class': 'org.apache.cassandra.io.compress.DeflateCompressor'}
89 AND crc_check_chance = 1.0
90 AND dclocal_read_repair_chance = 0.1
91 AND default_time_to_live = 0
92 AND gc_grace_seconds = 86400
93 AND max_index_interval = 2048
94 AND memtable_flush_period_in_ms = 0
95 AND min_index_interval = 128
96 AND read_repair_chance = 0.0
97 AND speculative_retry = '99PERCENTILE';
98
99# -----
100
101wikipedia_T_page__restrictions:
102 statement: |
103 CREATE KEYSPACE IF NOT EXISTS "wikipedia_T_page__restrictions" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
104wikipedia_T_page__restrictions_meta:
105 statement: |
106 CREATE TABLE IF NOT EXISTS "wikipedia_T_page__restrictions".meta (
107 key text PRIMARY KEY,
108 value text
109 ) WITH bloom_filter_fp_chance = 0.1
110 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
111 AND comment = ''
112 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
113 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
114 AND crc_check_chance = 1.0
115 AND dclocal_read_repair_chance = 0.1
116 AND default_time_to_live = 0
117 AND gc_grace_seconds = 864000
118 AND max_index_interval = 2048
119 AND memtable_flush_period_in_ms = 0
120 AND min_index_interval = 128
121 AND read_repair_chance = 0.0
122 AND speculative_retry = '99PERCENTILE';
123wikipedia_T_page__restrictions_data:
124 qKvData statement: |
125 CREATE TABLE "wikipedia_T_page__restrictions".data (
126 "_domain" text,
127 title text,
128 rev int,
129 page_deleted int static,
130 redirect text,
131 restrictions set<text>,
132 PRIMARY KEY (("_domain", title), rev)
133 ) WITH CLUSTERING ORDER BY (rev DESC)
134 AND bloom_filter_fp_chance = 0.01
135 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
136 AND comment = ''
137 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
138 AND compression = {'chunk_length_in_kb': '256', 'class': 'org.apache.cassandra.io.compress.DeflateCompressor'}
139 AND crc_check_chance = 1.0
140 AND dclocal_read_repair_chance = 0.1
141 AND default_time_to_live = 0
142 AND gc_grace_seconds = 86400
143 AND max_index_interval = 2048
144 AND memtable_flush_period_in_ms = 0
145 AND min_index_interval = 128
146 AND read_repair_chance = 0.0
147 AND speculative_retry = '99PERCENTILE';
148
149# -----
150
151others_T_page__restrictions:
152 statement: |
153 CREATE KEYSPACE IF NOT EXISTS "others_T_page__restrictions" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
154others_T_page__restrictions_meta:
155 statement: |
156 CREATE TABLE IF NOT EXISTS "others_T_page__restrictions".meta (
157 key text PRIMARY KEY,
158 value text
159 ) WITH bloom_filter_fp_chance = 0.1
160 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
161 AND comment = ''
162 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
163 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
164 AND crc_check_chance = 1.0
165 AND dclocal_read_repair_chance = 0.1
166 AND default_time_to_live = 0
167 AND gc_grace_seconds = 864000
168 AND max_index_interval = 2048
169 AND memtable_flush_period_in_ms = 0
170 AND min_index_interval = 128
171 AND read_repair_chance = 0.0
172 AND speculative_retry = '99PERCENTILE';
173others_T_page__restrictions_data:
174 qKvData statement: |
175 CREATE TABLE "others_T_page__restrictions".data (
176 "_domain" text,
177 title text,
178 rev int,
179 page_deleted int static,
180 redirect text,
181 restrictions set<text>,
182 PRIMARY KEY (("_domain", title), rev)
183 ) WITH CLUSTERING ORDER BY (rev DESC)
184 AND bloom_filter_fp_chance = 0.01
185 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
186 AND comment = ''
187 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
188 AND compression = {'chunk_length_in_kb': '256', 'class': 'org.apache.cassandra.io.compress.DeflateCompressor'}
189 AND crc_check_chance = 1.0
190 AND dclocal_read_repair_chance = 0.1
191 AND default_time_to_live = 0
192 AND gc_grace_seconds = 86400
193 AND max_index_interval = 2048
194 AND memtable_flush_period_in_ms = 0
195 AND min_index_interval = 128
196 AND read_repair_chance = 0.0
197 AND speculative_retry = '99PERCENTILE';
198
199# -----

The CQL statements LGTM.

Mentioned in SAL (#wikimedia-operations) [2017-11-08T19:31:07Z] <urandom> Creating page restrictions schema (T179421)

Eevans added a comment.Nov 8 2017, 7:47 PM

The schema has been created.

NOTE: There was one minor issue with the YAML post above (see for example, here: https://phabricator.wikimedia.org/P6288$24)

Script used to create yaml for title_revisions-ng table:

1"use strict";
2
3
4const crypto = require('crypto');
5const P = require('bluebird');
6
7
8function hashKey(key) {
9 return new crypto.Hash('sha1')
10 .update(key)
11 .digest()
12 .toString('base64')
13 // Replace [+/] from base64 with _ (illegal in Cassandra)
14 .replace(/[+/]/g, '_')
15 // Remove base64 padding, has no entropy
16 .replace(/=+$/, '');
17}
18
19
20function getValidPrefix(key) {
21 const prefixMatch = /^[a-zA-Z0-9_]+/.exec(key);
22 if (prefixMatch) {
23 return prefixMatch[0];
24 } else {
25 return '';
26 }
27}
28
29
30function makeValidKey(key, length) {
31 const origKey = key;
32 key = key.replace(/_/g, '__')
33 .replace(/\./g, '_');
34 if (!/^[a-zA-Z0-9_]+$/.test(key)) {
35 // Create a new 28 char prefix
36 const validPrefix = getValidPrefix(key).substr(0, length * 2 / 3);
37 return validPrefix + hashKey(origKey).substr(0, length - validPrefix.length);
38 } else if (key.length > length) {
39 return key.substr(0, length * 2 / 3) + hashKey(origKey).substr(0, length / 3);
40 } else {
41 return key;
42 }
43}
44
45
46function keyspaceName(name, table) {
47 const reversedName = name.toLowerCase().split('.').reverse().join('.');
48 const prefix = makeValidKey(reversedName, Math.max(26, 48 - table.length - 3));
49 // 6 chars _hash_ to prevent conflicts between domains & table names
50 const res = `${prefix}_T_${makeValidKey(table, 48 - prefix.length - 3)}`;
51 return res;
52}
53
54
55function cassID(name) {
56 if (/^[a-zA-Z0-9_]+$/.test(name)) {
57 return `"${name}"`;
58 } else {
59 return `"${name.replace(/"/g, '""')}"`;
60 }
61}
62
63const tables = {
64 'title_revisions-ng': 'blob'
65};
66
67const storages = [
68 'enwiki',
69 'commons',
70 'wikipedia',
71 'others'
72];
73
74const qKs = `CREATE KEYSPACE IF NOT EXISTS <keyspace> WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;`;
75
76const qMeta = `CREATE TABLE IF NOT EXISTS <keyspace>.meta (
77 key text PRIMARY KEY,
78 value text
79) WITH bloom_filter_fp_chance = 0.1
80 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
81 AND comment = ''
82 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
83 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
84 AND crc_check_chance = 1.0
85 AND dclocal_read_repair_chance = 0.1
86 AND default_time_to_live = 0
87 AND gc_grace_seconds = 864000
88 AND max_index_interval = 2048
89 AND memtable_flush_period_in_ms = 0
90 AND min_index_interval = 128
91 AND read_repair_chance = 0.0
92 AND speculative_retry = '99PERCENTILE';`;
93
94const qData = `CREATE TABLE <keyspace>.data (
95 "_domain" text,
96 title text,
97 rev int,
98 comment text,
99 namespace int,
100 page_deleted int static,
101 page_id int,
102 redirect boolean,
103 restrictions set<text>,
104 tags set<text>,
105 tid timeuuid,
106 timestamp timestamp,
107 user_id int,
108 user_text text,
109 PRIMARY KEY (("_domain", title), rev)
110) WITH CLUSTERING ORDER BY (rev DESC)
111 AND bloom_filter_fp_chance = 0.01
112 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
113 AND comment = ''
114 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
115 AND compression = {'chunk_length_in_kb': '256', 'class': 'org.apache.cassandra.io.compress.DeflateCompressor'}
116 AND crc_check_chance = 1.0
117 AND dclocal_read_repair_chance = 0.1
118 AND default_time_to_live = 0
119 AND gc_grace_seconds = 86400
120 AND max_index_interval = 2048
121 AND memtable_flush_period_in_ms = 0
122 AND min_index_interval = 128
123 AND read_repair_chance = 0.0
124 AND speculative_retry = '99PERCENTILE';`;
125
126const qDataRevision = `CREATE TABLE IF NOT EXISTS <keyspace>.data (
127 "_domain" text,
128 key text,
129 ts timestamp,
130 rev int,
131 PRIMARY KEY (("_domain", key), ts)
132) WITH CLUSTERING ORDER BY (ts DESC)
133 AND bloom_filter_fp_chance = 0.1
134 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
135 AND comment = ''
136 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
137 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
138 AND crc_check_chance = 1.0
139 AND dclocal_read_repair_chance = 0.1
140 AND default_time_to_live = 864000
141 AND gc_grace_seconds = 864000
142 AND max_index_interval = 2048
143 AND memtable_flush_period_in_ms = 0
144 AND min_index_interval = 128
145 AND read_repair_chance = 0.0
146 AND speculative_retry = '99PERCENTILE';`;
147
148const qDataRender = `CREATE TABLE IF NOT EXISTS <keyspace>.data (
149 "_domain" text,
150 key text,
151 rev int,
152 ts timestamp,
153 tid timeuuid,
154 PRIMARY KEY (("_domain", key), rev, ts)
155) WITH CLUSTERING ORDER BY (rev DESC, ts DESC)
156 AND bloom_filter_fp_chance = 0.1
157 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
158 AND comment = ''
159 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
160 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
161 AND crc_check_chance = 1.0
162 AND dclocal_read_repair_chance = 0.1
163 AND default_time_to_live = 864000
164 AND gc_grace_seconds = 864000
165 AND max_index_interval = 2048
166 AND memtable_flush_period_in_ms = 0
167 AND min_index_interval = 128
168 AND read_repair_chance = 0.0
169 AND speculative_retry = '99PERCENTILE';`;
170
171const qKvData = `CREATE TABLE IF NOT EXISTS <keyspace>.data (
172 "_domain" text,
173 key text,
174 tid timeuuid,
175 headers blob,
176 value <type>,
177 PRIMARY KEY (("_domain", key))
178) WITH bloom_filter_fp_chance = 0.01
179 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
180 AND comment = ''
181 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
182 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
183 AND crc_check_chance = 1.0
184 AND dclocal_read_repair_chance = 0.1
185 AND default_time_to_live = 0
186 AND gc_grace_seconds = 86400
187 AND max_index_interval = 2048
188 AND memtable_flush_period_in_ms = 0
189 AND min_index_interval = 128
190 AND read_repair_chance = 0.0
191 AND speculative_retry = '99PERCENTILE';`;
192
193
194return P.each(storages, (storage) => {
195 return P.each(Object.keys(tables), (table) => {
196 const formatted = (query, keyspace, type) => {
197 let output = query.replace('<keyspace>', keyspace);
198 if (type)
199 output = output.replace('<type>', type);
200 output.split('\n').forEach((line) => {
201 console.log(` ${line}`);
202 });
203 };
204
205 const keyspace = cassID(keyspaceName(storage, table));
206
207 console.log(`${keyspace.replace(/"/g, '')}:`);
208 console.log(` statement: |`);
209 formatted(qKs, keyspace);
210 console.log(`${keyspace.replace(/"/g, '')}_meta:`);
211 console.log(` statement: |`);
212 formatted(qMeta, keyspace);
213 console.log(`${keyspace.replace(/"/g, '')}_data:`);
214 console.log(` qKvData statement: |`);
215 formatted(qData, keyspace, tables[table]);
216 console.log('\n# -----\n');
217 });
218});

Resulting YAML:

1enwiki_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe:
2 statement: |
3 CREATE KEYSPACE IF NOT EXISTS "enwiki_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
4enwiki_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe_meta:
5 statement: |
6 CREATE TABLE IF NOT EXISTS "enwiki_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe".meta (
7 key text PRIMARY KEY,
8 value text
9 ) WITH bloom_filter_fp_chance = 0.1
10 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
11 AND comment = ''
12 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
13 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
14 AND crc_check_chance = 1.0
15 AND dclocal_read_repair_chance = 0.1
16 AND default_time_to_live = 0
17 AND gc_grace_seconds = 864000
18 AND max_index_interval = 2048
19 AND memtable_flush_period_in_ms = 0
20 AND min_index_interval = 128
21 AND read_repair_chance = 0.0
22 AND speculative_retry = '99PERCENTILE';
23enwiki_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe_data:
24 qKvData statement: |
25 CREATE TABLE "enwiki_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe".data (
26 "_domain" text,
27 title text,
28 rev int,
29 comment text,
30 namespace int,
31 page_deleted int static,
32 page_id int,
33 redirect boolean,
34 restrictions set<text>,
35 tags set<text>,
36 tid timeuuid,
37 timestamp timestamp,
38 user_id int,
39 user_text text,
40 PRIMARY KEY (("_domain", title), rev)
41 ) WITH CLUSTERING ORDER BY (rev DESC)
42 AND bloom_filter_fp_chance = 0.01
43 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
44 AND comment = ''
45 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
46 AND compression = {'chunk_length_in_kb': '256', 'class': 'org.apache.cassandra.io.compress.DeflateCompressor'}
47 AND crc_check_chance = 1.0
48 AND dclocal_read_repair_chance = 0.1
49 AND default_time_to_live = 0
50 AND gc_grace_seconds = 86400
51 AND max_index_interval = 2048
52 AND memtable_flush_period_in_ms = 0
53 AND min_index_interval = 128
54 AND read_repair_chance = 0.0
55 AND speculative_retry = '99PERCENTILE';
56
57# -----
58
59commons_T_title__revisions3WsaB42Wia1E_eq_KmoYTH:
60 statement: |
61 CREATE KEYSPACE IF NOT EXISTS "commons_T_title__revisions3WsaB42Wia1E_eq_KmoYTH" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
62commons_T_title__revisions3WsaB42Wia1E_eq_KmoYTH_meta:
63 statement: |
64 CREATE TABLE IF NOT EXISTS "commons_T_title__revisions3WsaB42Wia1E_eq_KmoYTH".meta (
65 key text PRIMARY KEY,
66 value text
67 ) WITH bloom_filter_fp_chance = 0.1
68 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
69 AND comment = ''
70 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
71 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
72 AND crc_check_chance = 1.0
73 AND dclocal_read_repair_chance = 0.1
74 AND default_time_to_live = 0
75 AND gc_grace_seconds = 864000
76 AND max_index_interval = 2048
77 AND memtable_flush_period_in_ms = 0
78 AND min_index_interval = 128
79 AND read_repair_chance = 0.0
80 AND speculative_retry = '99PERCENTILE';
81commons_T_title__revisions3WsaB42Wia1E_eq_KmoYTH_data:
82 qKvData statement: |
83 CREATE TABLE "commons_T_title__revisions3WsaB42Wia1E_eq_KmoYTH".data (
84 "_domain" text,
85 title text,
86 rev int,
87 comment text,
88 namespace int,
89 page_deleted int static,
90 page_id int,
91 redirect boolean,
92 restrictions set<text>,
93 tags set<text>,
94 tid timeuuid,
95 timestamp timestamp,
96 user_id int,
97 user_text text,
98 PRIMARY KEY (("_domain", title), rev)
99 ) WITH CLUSTERING ORDER BY (rev DESC)
100 AND bloom_filter_fp_chance = 0.01
101 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
102 AND comment = ''
103 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
104 AND compression = {'chunk_length_in_kb': '256', 'class': 'org.apache.cassandra.io.compress.DeflateCompressor'}
105 AND crc_check_chance = 1.0
106 AND dclocal_read_repair_chance = 0.1
107 AND default_time_to_live = 0
108 AND gc_grace_seconds = 86400
109 AND max_index_interval = 2048
110 AND memtable_flush_period_in_ms = 0
111 AND min_index_interval = 128
112 AND read_repair_chance = 0.0
113 AND speculative_retry = '99PERCENTILE';
114
115# -----
116
117wikipedia_T_title__revisions3WsaB42Wia1E_eq_KmoY:
118 statement: |
119 CREATE KEYSPACE IF NOT EXISTS "wikipedia_T_title__revisions3WsaB42Wia1E_eq_KmoY" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
120wikipedia_T_title__revisions3WsaB42Wia1E_eq_KmoY_meta:
121 statement: |
122 CREATE TABLE IF NOT EXISTS "wikipedia_T_title__revisions3WsaB42Wia1E_eq_KmoY".meta (
123 key text PRIMARY KEY,
124 value text
125 ) WITH bloom_filter_fp_chance = 0.1
126 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
127 AND comment = ''
128 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
129 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
130 AND crc_check_chance = 1.0
131 AND dclocal_read_repair_chance = 0.1
132 AND default_time_to_live = 0
133 AND gc_grace_seconds = 864000
134 AND max_index_interval = 2048
135 AND memtable_flush_period_in_ms = 0
136 AND min_index_interval = 128
137 AND read_repair_chance = 0.0
138 AND speculative_retry = '99PERCENTILE';
139wikipedia_T_title__revisions3WsaB42Wia1E_eq_KmoY_data:
140 qKvData statement: |
141 CREATE TABLE "wikipedia_T_title__revisions3WsaB42Wia1E_eq_KmoY".data (
142 "_domain" text,
143 title text,
144 rev int,
145 comment text,
146 namespace int,
147 page_deleted int static,
148 page_id int,
149 redirect boolean,
150 restrictions set<text>,
151 tags set<text>,
152 tid timeuuid,
153 timestamp timestamp,
154 user_id int,
155 user_text text,
156 PRIMARY KEY (("_domain", title), rev)
157 ) WITH CLUSTERING ORDER BY (rev DESC)
158 AND bloom_filter_fp_chance = 0.01
159 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
160 AND comment = ''
161 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
162 AND compression = {'chunk_length_in_kb': '256', 'class': 'org.apache.cassandra.io.compress.DeflateCompressor'}
163 AND crc_check_chance = 1.0
164 AND dclocal_read_repair_chance = 0.1
165 AND default_time_to_live = 0
166 AND gc_grace_seconds = 86400
167 AND max_index_interval = 2048
168 AND memtable_flush_period_in_ms = 0
169 AND min_index_interval = 128
170 AND read_repair_chance = 0.0
171 AND speculative_retry = '99PERCENTILE';
172
173# -----
174
175others_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe:
176 statement: |
177 CREATE KEYSPACE IF NOT EXISTS "others_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
178others_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe_meta:
179 statement: |
180 CREATE TABLE IF NOT EXISTS "others_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe".meta (
181 key text PRIMARY KEY,
182 value text
183 ) WITH bloom_filter_fp_chance = 0.1
184 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
185 AND comment = ''
186 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
187 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
188 AND crc_check_chance = 1.0
189 AND dclocal_read_repair_chance = 0.1
190 AND default_time_to_live = 0
191 AND gc_grace_seconds = 864000
192 AND max_index_interval = 2048
193 AND memtable_flush_period_in_ms = 0
194 AND min_index_interval = 128
195 AND read_repair_chance = 0.0
196 AND speculative_retry = '99PERCENTILE';
197others_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe_data:
198 qKvData statement: |
199 CREATE TABLE "others_T_title__revisions3WsaB42Wia1E_eq_KmoYTHe".data (
200 "_domain" text,
201 title text,
202 rev int,
203 comment text,
204 namespace int,
205 page_deleted int static,
206 page_id int,
207 redirect boolean,
208 restrictions set<text>,
209 tags set<text>,
210 tid timeuuid,
211 timestamp timestamp,
212 user_id int,
213 user_text text,
214 PRIMARY KEY (("_domain", title), rev)
215 ) WITH CLUSTERING ORDER BY (rev DESC)
216 AND bloom_filter_fp_chance = 0.01
217 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
218 AND comment = ''
219 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
220 AND compression = {'chunk_length_in_kb': '256', 'class': 'org.apache.cassandra.io.compress.DeflateCompressor'}
221 AND crc_check_chance = 1.0
222 AND dclocal_read_repair_chance = 0.1
223 AND default_time_to_live = 0
224 AND gc_grace_seconds = 86400
225 AND max_index_interval = 2048
226 AND memtable_flush_period_in_ms = 0
227 AND min_index_interval = 128
228 AND read_repair_chance = 0.0
229 AND speculative_retry = '99PERCENTILE';
230
231# -----

Pull request that modifies the table:

https://github.com/wikimedia/restbase/pull/909

Mentioned in SAL (#wikimedia-operations) [2017-11-15T14:37:56Z] <mobrovac> restbase creating Cassandra 3 revision tables on restbase1009 - T179421

I have created the schemas according to the YAML above except I used the LZ4Compressor with 64kb chunk length as per our discussion yesterday. No problems were spotted in the Cassandra logs during or after the creation process.

PR #909 has been merged. The plan is to deploy the switch of both revisions and restrictions tomorrow, 2017-11-16.

Given T180568: Aberrant load on instances involved in recent bootstrap, and out of an abundance of caution, I would avoid deploying anything until we have a better understanding of what is going on there.

Given T180568: Aberrant load on instances involved in recent bootstrap, and out of an abundance of caution, I would avoid deploying anything until we have a better understanding of what is going on there.

Agreed, postponing til then.

Mentioned in SAL (#wikimedia-operations) [2017-12-06T10:39:41Z] <mobrovac@tin> Started deploy [restbase/deploy@b1d7c82]: Use Cass3 for revisions, deprecate trending-edits, fix CX end point - T179421 T180384 T173801

Mentioned in SAL (#wikimedia-operations) [2017-12-06T10:45:43Z] <mobrovac@tin> Finished deploy [restbase/deploy@b1d7c82]: Use Cass3 for revisions, deprecate trending-edits, fix CX end point - T179421 T180384 T173801 (duration: 06m 02s)

mobrovac closed this task as Resolved.Dec 6 2017, 12:57 PM
mobrovac claimed this task.
mobrovac edited projects, added Services (done); removed Services (doing).

The switch has been completed and no problems have been observed. Resolving.