Page MenuHomePhabricator

Migrate mathoid storage from legacy to new strategy
Closed, ResolvedPublic

Description

Migrate the mathoid use-case from legacy storage (Cassandra 2.x), to the new strategy and cluster (Cassandra 3.x).

Normally, we would need to copy over all of the data in order to minimise user impact. However, we are preparing a new release of Mathoid (cf. T172767) which will need a complete round of re-renders, so we can couple these two transitions. The following steps are needed:

  1. extract all of the formulae from Cassandra 2
  2. deploy Mathoid to codfw
  3. set-up a switched version of RESTBase in codfw (but without purging)
  4. execute check and render requests for each formula
  5. (optional) copy over the indirection table
  6. fully deploy Mathoid and RESTBase
  7. issue purge requests for all formulae covered in the process

Details

Related Gerrit Patches:
mediawiki/services/restbase/deploy : masterConfig: Remove references for Cassandra 2

Event Timeline

Eevans created this task.Oct 31 2017, 4:59 PM
Eevans added a comment.EditedNov 6 2017, 5:28 PM

The following script outputs a YAML file containing CQL literals for the queries necessary to create these tables.

1"use strict";
2
3
4const crypto = require('crypto');
5const colors = require('colors/safe');
6const P = require('bluebird');
7const yargs = require('yargs');
8
9const green = colors.green;
10const red = colors.red;
11const yellow = colors.yellow;
12
13
14function hashKey(key) {
15 return new crypto.Hash('sha1')
16 .update(key)
17 .digest()
18 .toString('base64')
19 // Replace [+/] from base64 with _ (illegal in Cassandra)
20 .replace(/[+/]/g, '_')
21 // Remove base64 padding, has no entropy
22 .replace(/=+$/, '');
23}
24
25
26function getValidPrefix(key) {
27 const prefixMatch = /^[a-zA-Z0-9_]+/.exec(key);
28 if (prefixMatch) {
29 return prefixMatch[0];
30 } else {
31 return '';
32 }
33}
34
35
36function makeValidKey(key, length) {
37 const origKey = key;
38 key = key.replace(/_/g, '__')
39 .replace(/\./g, '_');
40 if (!/^[a-zA-Z0-9_]+$/.test(key)) {
41 // Create a new 28 char prefix
42 const validPrefix = getValidPrefix(key).substr(0, length * 2 / 3);
43 return validPrefix + hashKey(origKey).substr(0, length - validPrefix.length);
44 } else if (key.length > length) {
45 return key.substr(0, length * 2 / 3) + hashKey(origKey).substr(0, length / 3);
46 } else {
47 return key;
48 }
49}
50
51
52function keyspaceName(name, table) {
53 const reversedName = name.toLowerCase().split('.').reverse().join('.');
54 const prefix = makeValidKey(reversedName, Math.max(26, 48 - table.length - 3));
55 // 6 chars _hash_ to prevent conflicts between domains & table names
56 const res = `${prefix}_T_${makeValidKey(table, 48 - prefix.length - 3)}`;
57 return res;
58}
59
60
61function cassID(name) {
62 if (/^[a-zA-Z0-9_]+$/.test(name)) {
63 return `"${name}"`;
64 } else {
65 return `"${name.replace(/"/g, '""')}"`;
66 }
67}
68
69const tables = {
70 'mathoid_ng.input': 'blob',
71 'mathoid_ng.hash_table': 'text',
72 'mathoid_ng.check': 'blob',
73 'mathoid_ng.mml': 'blob',
74 'mathoid_ng.svg': 'blob',
75 'mathoid_ng.png': 'blob'
76};
77
78const storages = [
79 // 'enwiki',
80 // 'commons',
81 // 'wikipedia',
82 'others'
83];
84
85const qKs = `CREATE KEYSPACE IF NOT EXISTS <keyspace> WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;`;
86
87const qMeta = `CREATE TABLE IF NOT EXISTS <keyspace>.meta (
88 key text PRIMARY KEY,
89 value text
90) WITH bloom_filter_fp_chance = 0.1
91 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
92 AND comment = ''
93 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
94 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
95 AND crc_check_chance = 1.0
96 AND dclocal_read_repair_chance = 0.1
97 AND default_time_to_live = 0
98 AND gc_grace_seconds = 864000
99 AND max_index_interval = 2048
100 AND memtable_flush_period_in_ms = 0
101 AND min_index_interval = 128
102 AND read_repair_chance = 0.0
103 AND speculative_retry = '99PERCENTILE';`;
104
105const qData = `CREATE TABLE IF NOT EXISTS <keyspace>.data (
106 "_domain" text,
107 key text,
108 rev int,
109 tid timeuuid,
110 "content-location" text,
111 "content-type" text,
112 tags set<text>,
113 value <type>,
114 PRIMARY KEY (("_domain", key), rev, tid)
115) WITH CLUSTERING ORDER BY (rev DESC, tid DESC)
116 AND bloom_filter_fp_chance = 0.01
117 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
118 AND comment = ''
119 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
120 AND compression = {'chunk_length_in_kb': '256', 'class': 'org.apache.cassandra.io.compress.DeflateCompressor'}
121 AND crc_check_chance = 1.0
122 AND dclocal_read_repair_chance = 0.1
123 AND default_time_to_live = 0
124 AND gc_grace_seconds = 86400
125 AND max_index_interval = 2048
126 AND memtable_flush_period_in_ms = 0
127 AND min_index_interval = 128
128 AND read_repair_chance = 0.0
129 AND speculative_retry = '99PERCENTILE';`;
130
131const qDataRevision = `CREATE TABLE IF NOT EXISTS <keyspace>.data (
132 "_domain" text,
133 key text,
134 ts timestamp,
135 rev int,
136 PRIMARY KEY (("_domain", key), ts)
137) WITH CLUSTERING ORDER BY (ts DESC)
138 AND bloom_filter_fp_chance = 0.1
139 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
140 AND comment = ''
141 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
142 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
143 AND crc_check_chance = 1.0
144 AND dclocal_read_repair_chance = 0.1
145 AND default_time_to_live = 864000
146 AND gc_grace_seconds = 864000
147 AND max_index_interval = 2048
148 AND memtable_flush_period_in_ms = 0
149 AND min_index_interval = 128
150 AND read_repair_chance = 0.0
151 AND speculative_retry = '99PERCENTILE';`;
152
153const qDataRender = `CREATE TABLE IF NOT EXISTS <keyspace>.data (
154 "_domain" text,
155 key text,
156 rev int,
157 ts timestamp,
158 tid timeuuid,
159 PRIMARY KEY (("_domain", key), rev, ts)
160) WITH CLUSTERING ORDER BY (rev DESC, ts DESC)
161 AND bloom_filter_fp_chance = 0.1
162 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
163 AND comment = ''
164 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
165 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
166 AND crc_check_chance = 1.0
167 AND dclocal_read_repair_chance = 0.1
168 AND default_time_to_live = 864000
169 AND gc_grace_seconds = 864000
170 AND max_index_interval = 2048
171 AND memtable_flush_period_in_ms = 0
172 AND min_index_interval = 128
173 AND read_repair_chance = 0.0
174 AND speculative_retry = '99PERCENTILE';`;
175
176const qKvData = `CREATE TABLE IF NOT EXISTS <keyspace>.data (
177 "_domain" text,
178 key text,
179 tid timeuuid,
180 headers blob,
181 value <type>,
182 PRIMARY KEY (("_domain", key))
183) WITH bloom_filter_fp_chance = 0.01
184 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
185 AND comment = ''
186 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
187 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
188 AND crc_check_chance = 1.0
189 AND dclocal_read_repair_chance = 0.1
190 AND default_time_to_live = 0
191 AND gc_grace_seconds = 86400
192 AND max_index_interval = 2048
193 AND memtable_flush_period_in_ms = 0
194 AND min_index_interval = 128
195 AND read_repair_chance = 0.0
196 AND speculative_retry = '99PERCENTILE';`;
197
198
199const argv = yargs.usage('Usage: $0 [options]')
200 .options('h', {alias: 'help'})
201 .argv;
202
203if (argv.help) {
204 yargs.showHelp();
205 process.exit(0);
206}
207
208
209return P.each(storages, (storage) => {
210 return P.each(Object.keys(tables), (table) => {
211 const formatted = (query, keyspace, type) => {
212 let output = query.replace('<keyspace>', keyspace);
213 if (type)
214 output = output.replace('<type>', type);
215 output.split('\n').forEach((line) => {
216 console.log(` ${line}`);
217 });
218 };
219
220 const keyspace = cassID(keyspaceName(storage, table));
221
222 console.log(`${keyspace.replace(/"/g, '')}:`);
223 console.log(` statement: |`);
224 formatted(qKs, keyspace);
225 console.log(`${keyspace.replace(/"/g, '')}_meta:`);
226 console.log(` statement: |`);
227 formatted(qMeta, keyspace);
228 console.log(`${keyspace.replace(/"/g, '')}_data:`);
229 console.log(` statement: |`);
230 formatted(qKvData, keyspace, tables[table]);
231 console.log('\n# -----\n');
232 });
233});

The output looks like the following.

1others_T_mathoid__ng_input:
2 statement: |
3 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_input" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
4others_T_mathoid__ng_input_meta:
5 statement: |
6 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_input".meta (
7 key text PRIMARY KEY,
8 value text
9 ) WITH bloom_filter_fp_chance = 0.1
10 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
11 AND comment = ''
12 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
13 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
14 AND crc_check_chance = 1.0
15 AND dclocal_read_repair_chance = 0.1
16 AND default_time_to_live = 0
17 AND gc_grace_seconds = 864000
18 AND max_index_interval = 2048
19 AND memtable_flush_period_in_ms = 0
20 AND min_index_interval = 128
21 AND read_repair_chance = 0.0
22 AND speculative_retry = '99PERCENTILE';
23others_T_mathoid__ng_input_data:
24 statement: |
25 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_input".data (
26 "_domain" text,
27 key text,
28 tid timeuuid,
29 headers blob,
30 value blob,
31 PRIMARY KEY (("_domain", key))
32 ) WITH bloom_filter_fp_chance = 0.01
33 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
34 AND comment = ''
35 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
36 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
37 AND crc_check_chance = 1.0
38 AND dclocal_read_repair_chance = 0.1
39 AND default_time_to_live = 0
40 AND gc_grace_seconds = 86400
41 AND max_index_interval = 2048
42 AND memtable_flush_period_in_ms = 0
43 AND min_index_interval = 128
44 AND read_repair_chance = 0.0
45 AND speculative_retry = '99PERCENTILE';
46
47# -----
48
49others_T_mathoid__ng_hash__table:
50 statement: |
51 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_hash__table" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
52others_T_mathoid__ng_hash__table_meta:
53 statement: |
54 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_hash__table".meta (
55 key text PRIMARY KEY,
56 value text
57 ) WITH bloom_filter_fp_chance = 0.1
58 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
59 AND comment = ''
60 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
61 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
62 AND crc_check_chance = 1.0
63 AND dclocal_read_repair_chance = 0.1
64 AND default_time_to_live = 0
65 AND gc_grace_seconds = 864000
66 AND max_index_interval = 2048
67 AND memtable_flush_period_in_ms = 0
68 AND min_index_interval = 128
69 AND read_repair_chance = 0.0
70 AND speculative_retry = '99PERCENTILE';
71others_T_mathoid__ng_hash__table_data:
72 statement: |
73 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_hash__table".data (
74 "_domain" text,
75 key text,
76 tid timeuuid,
77 headers blob,
78 value text,
79 PRIMARY KEY (("_domain", key))
80 ) WITH bloom_filter_fp_chance = 0.01
81 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
82 AND comment = ''
83 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
84 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
85 AND crc_check_chance = 1.0
86 AND dclocal_read_repair_chance = 0.1
87 AND default_time_to_live = 0
88 AND gc_grace_seconds = 86400
89 AND max_index_interval = 2048
90 AND memtable_flush_period_in_ms = 0
91 AND min_index_interval = 128
92 AND read_repair_chance = 0.0
93 AND speculative_retry = '99PERCENTILE';
94
95# -----
96
97others_T_mathoid__ng_check:
98 statement: |
99 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_check" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
100others_T_mathoid__ng_check_meta:
101 statement: |
102 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_check".meta (
103 key text PRIMARY KEY,
104 value text
105 ) WITH bloom_filter_fp_chance = 0.1
106 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
107 AND comment = ''
108 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
109 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
110 AND crc_check_chance = 1.0
111 AND dclocal_read_repair_chance = 0.1
112 AND default_time_to_live = 0
113 AND gc_grace_seconds = 864000
114 AND max_index_interval = 2048
115 AND memtable_flush_period_in_ms = 0
116 AND min_index_interval = 128
117 AND read_repair_chance = 0.0
118 AND speculative_retry = '99PERCENTILE';
119others_T_mathoid__ng_check_data:
120 statement: |
121 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_check".data (
122 "_domain" text,
123 key text,
124 tid timeuuid,
125 headers blob,
126 value blob,
127 PRIMARY KEY (("_domain", key))
128 ) WITH bloom_filter_fp_chance = 0.01
129 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
130 AND comment = ''
131 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
132 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
133 AND crc_check_chance = 1.0
134 AND dclocal_read_repair_chance = 0.1
135 AND default_time_to_live = 0
136 AND gc_grace_seconds = 86400
137 AND max_index_interval = 2048
138 AND memtable_flush_period_in_ms = 0
139 AND min_index_interval = 128
140 AND read_repair_chance = 0.0
141 AND speculative_retry = '99PERCENTILE';
142
143# -----
144
145others_T_mathoid__ng_mml:
146 statement: |
147 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_mml" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
148others_T_mathoid__ng_mml_meta:
149 statement: |
150 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_mml".meta (
151 key text PRIMARY KEY,
152 value text
153 ) WITH bloom_filter_fp_chance = 0.1
154 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
155 AND comment = ''
156 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
157 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
158 AND crc_check_chance = 1.0
159 AND dclocal_read_repair_chance = 0.1
160 AND default_time_to_live = 0
161 AND gc_grace_seconds = 864000
162 AND max_index_interval = 2048
163 AND memtable_flush_period_in_ms = 0
164 AND min_index_interval = 128
165 AND read_repair_chance = 0.0
166 AND speculative_retry = '99PERCENTILE';
167others_T_mathoid__ng_mml_data:
168 statement: |
169 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_mml".data (
170 "_domain" text,
171 key text,
172 tid timeuuid,
173 headers blob,
174 value blob,
175 PRIMARY KEY (("_domain", key))
176 ) WITH bloom_filter_fp_chance = 0.01
177 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
178 AND comment = ''
179 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
180 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
181 AND crc_check_chance = 1.0
182 AND dclocal_read_repair_chance = 0.1
183 AND default_time_to_live = 0
184 AND gc_grace_seconds = 86400
185 AND max_index_interval = 2048
186 AND memtable_flush_period_in_ms = 0
187 AND min_index_interval = 128
188 AND read_repair_chance = 0.0
189 AND speculative_retry = '99PERCENTILE';
190
191# -----
192
193others_T_mathoid__ng_svg:
194 statement: |
195 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_svg" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
196others_T_mathoid__ng_svg_meta:
197 statement: |
198 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_svg".meta (
199 key text PRIMARY KEY,
200 value text
201 ) WITH bloom_filter_fp_chance = 0.1
202 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
203 AND comment = ''
204 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
205 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
206 AND crc_check_chance = 1.0
207 AND dclocal_read_repair_chance = 0.1
208 AND default_time_to_live = 0
209 AND gc_grace_seconds = 864000
210 AND max_index_interval = 2048
211 AND memtable_flush_period_in_ms = 0
212 AND min_index_interval = 128
213 AND read_repair_chance = 0.0
214 AND speculative_retry = '99PERCENTILE';
215others_T_mathoid__ng_svg_data:
216 statement: |
217 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_svg".data (
218 "_domain" text,
219 key text,
220 tid timeuuid,
221 headers blob,
222 value blob,
223 PRIMARY KEY (("_domain", key))
224 ) WITH bloom_filter_fp_chance = 0.01
225 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
226 AND comment = ''
227 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
228 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
229 AND crc_check_chance = 1.0
230 AND dclocal_read_repair_chance = 0.1
231 AND default_time_to_live = 0
232 AND gc_grace_seconds = 86400
233 AND max_index_interval = 2048
234 AND memtable_flush_period_in_ms = 0
235 AND min_index_interval = 128
236 AND read_repair_chance = 0.0
237 AND speculative_retry = '99PERCENTILE';
238
239# -----
240
241others_T_mathoid__ng_png:
242 statement: |
243 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_png" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
244others_T_mathoid__ng_png_meta:
245 statement: |
246 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_png".meta (
247 key text PRIMARY KEY,
248 value text
249 ) WITH bloom_filter_fp_chance = 0.1
250 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
251 AND comment = ''
252 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
253 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
254 AND crc_check_chance = 1.0
255 AND dclocal_read_repair_chance = 0.1
256 AND default_time_to_live = 0
257 AND gc_grace_seconds = 864000
258 AND max_index_interval = 2048
259 AND memtable_flush_period_in_ms = 0
260 AND min_index_interval = 128
261 AND read_repair_chance = 0.0
262 AND speculative_retry = '99PERCENTILE';
263others_T_mathoid__ng_png_data:
264 statement: |
265 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_png".data (
266 "_domain" text,
267 key text,
268 tid timeuuid,
269 headers blob,
270 value blob,
271 PRIMARY KEY (("_domain", key))
272 ) WITH bloom_filter_fp_chance = 0.01
273 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
274 AND comment = ''
275 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
276 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
277 AND crc_check_chance = 1.0
278 AND dclocal_read_repair_chance = 0.1
279 AND default_time_to_live = 0
280 AND gc_grace_seconds = 86400
281 AND max_index_interval = 2048
282 AND memtable_flush_period_in_ms = 0
283 AND min_index_interval = 128
284 AND read_repair_chance = 0.0
285 AND speculative_retry = '99PERCENTILE';
286
287# -----
288

Which we can apply using mkschema.

More eyes before applying these changes would be much appreciated!

Things to change:

  • all headers must be of type text
  • value in others_T_mathoid__ng_input must be of type text
  • value in others_T_mathoid__ng_check must be of type text
  • value in others_T_mathoid__ng_mml must be of type text
  • value in others_T_mathoid__ng_svg must be of type text

The rest LGTM.

Eevans added a comment.Nov 8 2017, 7:28 PM

Things to change:

  • all headers must be of type text
  • value in others_T_mathoid__ng_input must be of type text
  • value in others_T_mathoid__ng_check must be of type text
  • value in others_T_mathoid__ng_mml must be of type text
  • value in others_T_mathoid__ng_svg must be of type text

The rest LGTM.

Revised version here (and I will begin applying it RSN):

1others_T_mathoid__ng_input:
2 statement: |
3 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_input" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
4others_T_mathoid__ng_input_meta:
5 statement: |
6 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_input".meta (
7 key text PRIMARY KEY,
8 value text
9 ) WITH bloom_filter_fp_chance = 0.1
10 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
11 AND comment = ''
12 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
13 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
14 AND crc_check_chance = 1.0
15 AND dclocal_read_repair_chance = 0.1
16 AND default_time_to_live = 0
17 AND gc_grace_seconds = 864000
18 AND max_index_interval = 2048
19 AND memtable_flush_period_in_ms = 0
20 AND min_index_interval = 128
21 AND read_repair_chance = 0.0
22 AND speculative_retry = '99PERCENTILE';
23others_T_mathoid__ng_input_data:
24 statement: |
25 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_input".data (
26 "_domain" text,
27 key text,
28 tid timeuuid,
29 headers text,
30 value text,
31 PRIMARY KEY (("_domain", key))
32 ) WITH bloom_filter_fp_chance = 0.01
33 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
34 AND comment = ''
35 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
36 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
37 AND crc_check_chance = 1.0
38 AND dclocal_read_repair_chance = 0.1
39 AND default_time_to_live = 0
40 AND gc_grace_seconds = 86400
41 AND max_index_interval = 2048
42 AND memtable_flush_period_in_ms = 0
43 AND min_index_interval = 128
44 AND read_repair_chance = 0.0
45 AND speculative_retry = '99PERCENTILE';
46
47# -----
48
49others_T_mathoid__ng_hash__table:
50 statement: |
51 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_hash__table" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
52others_T_mathoid__ng_hash__table_meta:
53 statement: |
54 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_hash__table".meta (
55 key text PRIMARY KEY,
56 value text
57 ) WITH bloom_filter_fp_chance = 0.1
58 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
59 AND comment = ''
60 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
61 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
62 AND crc_check_chance = 1.0
63 AND dclocal_read_repair_chance = 0.1
64 AND default_time_to_live = 0
65 AND gc_grace_seconds = 864000
66 AND max_index_interval = 2048
67 AND memtable_flush_period_in_ms = 0
68 AND min_index_interval = 128
69 AND read_repair_chance = 0.0
70 AND speculative_retry = '99PERCENTILE';
71others_T_mathoid__ng_hash__table_data:
72 statement: |
73 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_hash__table".data (
74 "_domain" text,
75 key text,
76 tid timeuuid,
77 headers text,
78 value text,
79 PRIMARY KEY (("_domain", key))
80 ) WITH bloom_filter_fp_chance = 0.01
81 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
82 AND comment = ''
83 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
84 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
85 AND crc_check_chance = 1.0
86 AND dclocal_read_repair_chance = 0.1
87 AND default_time_to_live = 0
88 AND gc_grace_seconds = 86400
89 AND max_index_interval = 2048
90 AND memtable_flush_period_in_ms = 0
91 AND min_index_interval = 128
92 AND read_repair_chance = 0.0
93 AND speculative_retry = '99PERCENTILE';
94
95# -----
96
97others_T_mathoid__ng_check:
98 statement: |
99 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_check" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
100others_T_mathoid__ng_check_meta:
101 statement: |
102 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_check".meta (
103 key text PRIMARY KEY,
104 value text
105 ) WITH bloom_filter_fp_chance = 0.1
106 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
107 AND comment = ''
108 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
109 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
110 AND crc_check_chance = 1.0
111 AND dclocal_read_repair_chance = 0.1
112 AND default_time_to_live = 0
113 AND gc_grace_seconds = 864000
114 AND max_index_interval = 2048
115 AND memtable_flush_period_in_ms = 0
116 AND min_index_interval = 128
117 AND read_repair_chance = 0.0
118 AND speculative_retry = '99PERCENTILE';
119others_T_mathoid__ng_check_data:
120 statement: |
121 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_check".data (
122 "_domain" text,
123 key text,
124 tid timeuuid,
125 headers text,
126 value text,
127 PRIMARY KEY (("_domain", key))
128 ) WITH bloom_filter_fp_chance = 0.01
129 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
130 AND comment = ''
131 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
132 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
133 AND crc_check_chance = 1.0
134 AND dclocal_read_repair_chance = 0.1
135 AND default_time_to_live = 0
136 AND gc_grace_seconds = 86400
137 AND max_index_interval = 2048
138 AND memtable_flush_period_in_ms = 0
139 AND min_index_interval = 128
140 AND read_repair_chance = 0.0
141 AND speculative_retry = '99PERCENTILE';
142
143# -----
144
145others_T_mathoid__ng_mml:
146 statement: |
147 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_mml" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
148others_T_mathoid__ng_mml_meta:
149 statement: |
150 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_mml".meta (
151 key text PRIMARY KEY,
152 value text
153 ) WITH bloom_filter_fp_chance = 0.1
154 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
155 AND comment = ''
156 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
157 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
158 AND crc_check_chance = 1.0
159 AND dclocal_read_repair_chance = 0.1
160 AND default_time_to_live = 0
161 AND gc_grace_seconds = 864000
162 AND max_index_interval = 2048
163 AND memtable_flush_period_in_ms = 0
164 AND min_index_interval = 128
165 AND read_repair_chance = 0.0
166 AND speculative_retry = '99PERCENTILE';
167others_T_mathoid__ng_mml_data:
168 statement: |
169 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_mml".data (
170 "_domain" text,
171 key text,
172 tid timeuuid,
173 headers text,
174 value text,
175 PRIMARY KEY (("_domain", key))
176 ) WITH bloom_filter_fp_chance = 0.01
177 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
178 AND comment = ''
179 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
180 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
181 AND crc_check_chance = 1.0
182 AND dclocal_read_repair_chance = 0.1
183 AND default_time_to_live = 0
184 AND gc_grace_seconds = 86400
185 AND max_index_interval = 2048
186 AND memtable_flush_period_in_ms = 0
187 AND min_index_interval = 128
188 AND read_repair_chance = 0.0
189 AND speculative_retry = '99PERCENTILE';
190
191# -----
192
193others_T_mathoid__ng_svg:
194 statement: |
195 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_svg" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
196others_T_mathoid__ng_svg_meta:
197 statement: |
198 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_svg".meta (
199 key text PRIMARY KEY,
200 value text
201 ) WITH bloom_filter_fp_chance = 0.1
202 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
203 AND comment = ''
204 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
205 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
206 AND crc_check_chance = 1.0
207 AND dclocal_read_repair_chance = 0.1
208 AND default_time_to_live = 0
209 AND gc_grace_seconds = 864000
210 AND max_index_interval = 2048
211 AND memtable_flush_period_in_ms = 0
212 AND min_index_interval = 128
213 AND read_repair_chance = 0.0
214 AND speculative_retry = '99PERCENTILE';
215others_T_mathoid__ng_svg_data:
216 statement: |
217 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_svg".data (
218 "_domain" text,
219 key text,
220 tid timeuuid,
221 headers text,
222 value text,
223 PRIMARY KEY (("_domain", key))
224 ) WITH bloom_filter_fp_chance = 0.01
225 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
226 AND comment = ''
227 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
228 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
229 AND crc_check_chance = 1.0
230 AND dclocal_read_repair_chance = 0.1
231 AND default_time_to_live = 0
232 AND gc_grace_seconds = 86400
233 AND max_index_interval = 2048
234 AND memtable_flush_period_in_ms = 0
235 AND min_index_interval = 128
236 AND read_repair_chance = 0.0
237 AND speculative_retry = '99PERCENTILE';
238
239# -----
240
241others_T_mathoid__ng_png:
242 statement: |
243 CREATE KEYSPACE IF NOT EXISTS "others_T_mathoid__ng_png" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
244others_T_mathoid__ng_png_meta:
245 statement: |
246 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_png".meta (
247 key text PRIMARY KEY,
248 value text
249 ) WITH bloom_filter_fp_chance = 0.1
250 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
251 AND comment = ''
252 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
253 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
254 AND crc_check_chance = 1.0
255 AND dclocal_read_repair_chance = 0.1
256 AND default_time_to_live = 0
257 AND gc_grace_seconds = 864000
258 AND max_index_interval = 2048
259 AND memtable_flush_period_in_ms = 0
260 AND min_index_interval = 128
261 AND read_repair_chance = 0.0
262 AND speculative_retry = '99PERCENTILE';
263others_T_mathoid__ng_png_data:
264 statement: |
265 CREATE TABLE IF NOT EXISTS "others_T_mathoid__ng_png".data (
266 "_domain" text,
267 key text,
268 tid timeuuid,
269 headers text,
270 value blob,
271 PRIMARY KEY (("_domain", key))
272 ) WITH bloom_filter_fp_chance = 0.01
273 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
274 AND comment = ''
275 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
276 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
277 AND crc_check_chance = 1.0
278 AND dclocal_read_repair_chance = 0.1
279 AND default_time_to_live = 0
280 AND gc_grace_seconds = 86400
281 AND max_index_interval = 2048
282 AND memtable_flush_period_in_ms = 0
283 AND min_index_interval = 128
284 AND read_repair_chance = 0.0
285 AND speculative_retry = '99PERCENTILE';
286
287# -----
288

Mentioned in SAL (#wikimedia-operations) [2017-11-08T19:55:41Z] <urandom> Creating mathoid schema (T179419)

Eevans added a comment.Nov 8 2017, 8:18 PM

[ ... ]
The rest LGTM.

Revised version here (and I will begin applying it RSN):

Done.

Uh, completely missed the fact that all of the Mathoid keyspaces need to go into the globaldomain storage group, not others... I dropped the others_T_mathoid__ng_* keyspaces, we will have to recreate them for the correct group.

Eevans added a comment.Nov 9 2017, 2:55 PM

Uh, completely missed the fact that all of the Mathoid keyspaces need to go into the globaldomain storage group, not others... I dropped the others_T_mathoid__ng_* keyspaces, we will have to recreate them for the correct group.

Updated version:

1globaldomain_T_mathoid__ng_input:
2 statement: |
3 CREATE KEYSPACE IF NOT EXISTS "globaldomain_T_mathoid__ng_input" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
4globaldomain_T_mathoid__ng_input_meta:
5 statement: |
6 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_input".meta (
7 key text PRIMARY KEY,
8 value text
9 ) WITH bloom_filter_fp_chance = 0.1
10 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
11 AND comment = ''
12 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
13 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
14 AND crc_check_chance = 1.0
15 AND dclocal_read_repair_chance = 0.1
16 AND default_time_to_live = 0
17 AND gc_grace_seconds = 864000
18 AND max_index_interval = 2048
19 AND memtable_flush_period_in_ms = 0
20 AND min_index_interval = 128
21 AND read_repair_chance = 0.0
22 AND speculative_retry = '99PERCENTILE';
23globaldomain_T_mathoid__ng_input_data:
24 statement: |
25 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_input".data (
26 "_domain" text,
27 key text,
28 tid timeuuid,
29 headers text,
30 value text,
31 PRIMARY KEY (("_domain", key))
32 ) WITH bloom_filter_fp_chance = 0.01
33 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
34 AND comment = ''
35 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
36 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
37 AND crc_check_chance = 1.0
38 AND dclocal_read_repair_chance = 0.1
39 AND default_time_to_live = 0
40 AND gc_grace_seconds = 86400
41 AND max_index_interval = 2048
42 AND memtable_flush_period_in_ms = 0
43 AND min_index_interval = 128
44 AND read_repair_chance = 0.0
45 AND speculative_retry = '99PERCENTILE';
46
47# -----
48
49globaldomain_T_mathoid__ng_hash__table:
50 statement: |
51 CREATE KEYSPACE IF NOT EXISTS "globaldomain_T_mathoid__ng_hash__table" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
52globaldomain_T_mathoid__ng_hash__table_meta:
53 statement: |
54 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_hash__table".meta (
55 key text PRIMARY KEY,
56 value text
57 ) WITH bloom_filter_fp_chance = 0.1
58 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
59 AND comment = ''
60 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
61 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
62 AND crc_check_chance = 1.0
63 AND dclocal_read_repair_chance = 0.1
64 AND default_time_to_live = 0
65 AND gc_grace_seconds = 864000
66 AND max_index_interval = 2048
67 AND memtable_flush_period_in_ms = 0
68 AND min_index_interval = 128
69 AND read_repair_chance = 0.0
70 AND speculative_retry = '99PERCENTILE';
71globaldomain_T_mathoid__ng_hash__table_data:
72 statement: |
73 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_hash__table".data (
74 "_domain" text,
75 key text,
76 tid timeuuid,
77 headers text,
78 value text,
79 PRIMARY KEY (("_domain", key))
80 ) WITH bloom_filter_fp_chance = 0.01
81 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
82 AND comment = ''
83 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
84 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
85 AND crc_check_chance = 1.0
86 AND dclocal_read_repair_chance = 0.1
87 AND default_time_to_live = 0
88 AND gc_grace_seconds = 86400
89 AND max_index_interval = 2048
90 AND memtable_flush_period_in_ms = 0
91 AND min_index_interval = 128
92 AND read_repair_chance = 0.0
93 AND speculative_retry = '99PERCENTILE';
94
95# -----
96
97globaldomain_T_mathoid__ng_check:
98 statement: |
99 CREATE KEYSPACE IF NOT EXISTS "globaldomain_T_mathoid__ng_check" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
100globaldomain_T_mathoid__ng_check_meta:
101 statement: |
102 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_check".meta (
103 key text PRIMARY KEY,
104 value text
105 ) WITH bloom_filter_fp_chance = 0.1
106 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
107 AND comment = ''
108 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
109 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
110 AND crc_check_chance = 1.0
111 AND dclocal_read_repair_chance = 0.1
112 AND default_time_to_live = 0
113 AND gc_grace_seconds = 864000
114 AND max_index_interval = 2048
115 AND memtable_flush_period_in_ms = 0
116 AND min_index_interval = 128
117 AND read_repair_chance = 0.0
118 AND speculative_retry = '99PERCENTILE';
119globaldomain_T_mathoid__ng_check_data:
120 statement: |
121 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_check".data (
122 "_domain" text,
123 key text,
124 tid timeuuid,
125 headers text,
126 value text,
127 PRIMARY KEY (("_domain", key))
128 ) WITH bloom_filter_fp_chance = 0.01
129 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
130 AND comment = ''
131 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
132 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
133 AND crc_check_chance = 1.0
134 AND dclocal_read_repair_chance = 0.1
135 AND default_time_to_live = 0
136 AND gc_grace_seconds = 86400
137 AND max_index_interval = 2048
138 AND memtable_flush_period_in_ms = 0
139 AND min_index_interval = 128
140 AND read_repair_chance = 0.0
141 AND speculative_retry = '99PERCENTILE';
142
143# -----
144
145globaldomain_T_mathoid__ng_mml:
146 statement: |
147 CREATE KEYSPACE IF NOT EXISTS "globaldomain_T_mathoid__ng_mml" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
148globaldomain_T_mathoid__ng_mml_meta:
149 statement: |
150 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_mml".meta (
151 key text PRIMARY KEY,
152 value text
153 ) WITH bloom_filter_fp_chance = 0.1
154 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
155 AND comment = ''
156 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
157 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
158 AND crc_check_chance = 1.0
159 AND dclocal_read_repair_chance = 0.1
160 AND default_time_to_live = 0
161 AND gc_grace_seconds = 864000
162 AND max_index_interval = 2048
163 AND memtable_flush_period_in_ms = 0
164 AND min_index_interval = 128
165 AND read_repair_chance = 0.0
166 AND speculative_retry = '99PERCENTILE';
167globaldomain_T_mathoid__ng_mml_data:
168 statement: |
169 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_mml".data (
170 "_domain" text,
171 key text,
172 tid timeuuid,
173 headers text,
174 value text,
175 PRIMARY KEY (("_domain", key))
176 ) WITH bloom_filter_fp_chance = 0.01
177 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
178 AND comment = ''
179 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
180 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
181 AND crc_check_chance = 1.0
182 AND dclocal_read_repair_chance = 0.1
183 AND default_time_to_live = 0
184 AND gc_grace_seconds = 86400
185 AND max_index_interval = 2048
186 AND memtable_flush_period_in_ms = 0
187 AND min_index_interval = 128
188 AND read_repair_chance = 0.0
189 AND speculative_retry = '99PERCENTILE';
190
191# -----
192
193globaldomain_T_mathoid__ng_svg:
194 statement: |
195 CREATE KEYSPACE IF NOT EXISTS "globaldomain_T_mathoid__ng_svg" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
196globaldomain_T_mathoid__ng_svg_meta:
197 statement: |
198 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_svg".meta (
199 key text PRIMARY KEY,
200 value text
201 ) WITH bloom_filter_fp_chance = 0.1
202 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
203 AND comment = ''
204 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
205 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
206 AND crc_check_chance = 1.0
207 AND dclocal_read_repair_chance = 0.1
208 AND default_time_to_live = 0
209 AND gc_grace_seconds = 864000
210 AND max_index_interval = 2048
211 AND memtable_flush_period_in_ms = 0
212 AND min_index_interval = 128
213 AND read_repair_chance = 0.0
214 AND speculative_retry = '99PERCENTILE';
215globaldomain_T_mathoid__ng_svg_data:
216 statement: |
217 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_svg".data (
218 "_domain" text,
219 key text,
220 tid timeuuid,
221 headers text,
222 value text,
223 PRIMARY KEY (("_domain", key))
224 ) WITH bloom_filter_fp_chance = 0.01
225 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
226 AND comment = ''
227 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
228 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
229 AND crc_check_chance = 1.0
230 AND dclocal_read_repair_chance = 0.1
231 AND default_time_to_live = 0
232 AND gc_grace_seconds = 86400
233 AND max_index_interval = 2048
234 AND memtable_flush_period_in_ms = 0
235 AND min_index_interval = 128
236 AND read_repair_chance = 0.0
237 AND speculative_retry = '99PERCENTILE';
238
239# -----
240
241globaldomain_T_mathoid__ng_png:
242 statement: |
243 CREATE KEYSPACE IF NOT EXISTS "globaldomain_T_mathoid__ng_png" WITH replication = {'class': 'NetworkTopologyStrategy', 'codfw': '3', 'eqiad': '3'} AND durable_writes = true;
244globaldomain_T_mathoid__ng_png_meta:
245 statement: |
246 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_png".meta (
247 key text PRIMARY KEY,
248 value text
249 ) WITH bloom_filter_fp_chance = 0.1
250 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
251 AND comment = ''
252 AND compaction = {'class': 'org.apache.cassandra.db.compaction.LeveledCompactionStrategy'}
253 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
254 AND crc_check_chance = 1.0
255 AND dclocal_read_repair_chance = 0.1
256 AND default_time_to_live = 0
257 AND gc_grace_seconds = 864000
258 AND max_index_interval = 2048
259 AND memtable_flush_period_in_ms = 0
260 AND min_index_interval = 128
261 AND read_repair_chance = 0.0
262 AND speculative_retry = '99PERCENTILE';
263globaldomain_T_mathoid__ng_png_data:
264 statement: |
265 CREATE TABLE IF NOT EXISTS "globaldomain_T_mathoid__ng_png".data (
266 "_domain" text,
267 key text,
268 tid timeuuid,
269 headers text,
270 value blob,
271 PRIMARY KEY (("_domain", key))
272 ) WITH bloom_filter_fp_chance = 0.01
273 AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
274 AND comment = ''
275 AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
276 AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
277 AND crc_check_chance = 1.0
278 AND dclocal_read_repair_chance = 0.1
279 AND default_time_to_live = 0
280 AND gc_grace_seconds = 86400
281 AND max_index_interval = 2048
282 AND memtable_flush_period_in_ms = 0
283 AND min_index_interval = 128
284 AND read_repair_chance = 0.0
285 AND speculative_retry = '99PERCENTILE';
286
287# -----
288

Mentioned in SAL (#wikimedia-operations) [2017-11-09T15:12:46Z] <urandom> Creating mathoid schema (T179419)

Mentioned in SAL (#wikimedia-operations) [2017-11-09T16:33:16Z] <urandom> Restarting Cassandra, restbase2005-a.codfw.wmnet (T179419)

Mentioned in SAL (#wikimedia-operations) [2017-11-09T17:14:16Z] <urandom> Restarting Cassandra, restbase2005-b.codfw.wmnet (T179419)

Eevans added a comment.Nov 9 2017, 6:59 PM

The schema has been created.

mobrovac updated the task description. (Show Details)
mobrovac updated the task description. (Show Details)
mobrovac added a subscriber: Physikerwelt.
greg updated the task description. (Show Details)Dec 11 2017, 7:07 PM

Mentioned in SAL (#wikimedia-operations) [2017-12-22T13:37:14Z] <mobrovac> restbase depool restbase2008 for T179419

Mentioned in SAL (#wikimedia-operations) [2017-12-22T14:02:47Z] <mobrovac@tin> Started deploy [mathoid/deploy@7f39b71]: Update Mathoid to v0.7.0 in CODFW only to prefill storage - T179419 T172767

Mentioned in SAL (#wikimedia-operations) [2017-12-22T14:05:44Z] <mobrovac@tin> Finished deploy [mathoid/deploy@7f39b71]: Update Mathoid to v0.7.0 in CODFW only to prefill storage - T179419 T172767 (duration: 02m 57s)

Mentioned in SAL (#wikimedia-operations) [2017-12-22T14:13:35Z] <mobrovac@tin> Started deploy [mathoid/deploy@6c29c09]: Update Mathoid to v0.7.0 in CODFW only to prefill storage, take 2 - T179419 T172767

Mentioned in SAL (#wikimedia-operations) [2017-12-22T14:17:58Z] <mobrovac@tin> Finished deploy [mathoid/deploy@6c29c09]: Update Mathoid to v0.7.0 in CODFW only to prefill storage, take 2 - T179419 T172767 (duration: 04m 23s)

mobrovac updated the task description. (Show Details)Dec 22 2017, 5:10 PM
mobrovac claimed this task.Dec 22 2017, 5:14 PM

The first three steps have been completed. There is a script running under my user in a screen session on restbase2008 that pulls all formulae from Cassandra 2 and issues check and render requests against a local RESTBase instance set up to use Cassandra 3 for storage. The average rendering speed is ~60 seconds for 10k renders, so I expect the script to run for 4 to 5 days.

mobrovac added a comment.EditedDec 24 2017, 10:32 AM

The script has completed. Over 7 million formulae have been re-rendered with the new Mathoid engine and stored in Cassandra 3. However, there are ~30k formulae for which requests failed, either because they failed the check or couldn't be rendered. I'll investigate a bit further and try to classify them (i.e. see if these are regressions or known issues) before reverting Mathoid in codfw to the previous version (for consistency with eqiad).

Mentioned in SAL (#wikimedia-operations) [2017-12-26T16:31:20Z] <mobrovac@tin> Started deploy [mathoid/deploy@63b2ddc]: Bring back Mathoid in codfw to v0.6.5 in sync with eqiad - T179419 T172767

Mentioned in SAL (#wikimedia-operations) [2017-12-26T16:33:04Z] <mobrovac@tin> Finished deploy [mathoid/deploy@63b2ddc]: Bring back Mathoid in codfw to v0.6.5 in sync with eqiad - T179419 T172767 (duration: 01m 44s)

Out of the 31k formulae that failed to render the first time around, 4.5k of them succeeded this time around. The rest failed either because of T183557 or T183559, which will need to be addressed before we can proceed here.

Mathoid in codfw has been reverted to the previous state and is now in sync with eqiad.

mobrovac updated the task description. (Show Details)Dec 26 2017, 4:38 PM

Change 402035 had a related patch set uploaded (by Mobrovac; owner: Mobrovac):
[mediawiki/services/restbase/deploy@master] Config: Remove references for Cassandra 2

https://gerrit.wikimedia.org/r/402035

Change 402035 merged by Mobrovac:
[mediawiki/services/restbase/deploy@master] Config: Remove references for Cassandra 2

https://gerrit.wikimedia.org/r/402035

Mentioned in SAL (#wikimedia-operations) [2018-01-04T12:41:33Z] <mobrovac@tin> Started deploy [restbase/deploy@66b7efe]: Switch Mathoid to Cassandra 3 and drop Cassandra 2 references - T179419

Mentioned in SAL (#wikimedia-operations) [2018-01-04T12:45:38Z] <mobrovac@tin> Finished deploy [restbase/deploy@66b7efe]: Switch Mathoid to Cassandra 3 and drop Cassandra 2 references - T179419 (duration: 04m 05s)

mobrovac closed this task as Resolved.Jan 4 2018, 12:47 PM
mobrovac updated the task description. (Show Details)

Mathoid and RESTBase have been deployed. Let's savour this moment :)