Page MenuHomePhabricator

cassandra vs postgres benchmarker

Authored By
akosiaris
Sep 8 2015, 2:15 PM
Size
6 KB
Referenced Files
None
Subscribers
None

cassandra vs postgres benchmarker

Postgres table
==============
gis=# \d tiles
Table "public.tiles"
Column | Type | Modifiers
--------+--------+-----------
zoom | bigint |
block | bigint |
idx | bigint |
tile | bytea |
Indexes:
"tiles_zoom_block_idx_idx" UNIQUE, btree (zoom, block, idx)
Cassandra table
===============
cqlsh:v2> describe tiles;
CREATE TABLE v2.tiles (
zoom int,
block int,
idx int,
tile blob,
PRIMARY KEY (zoom, block, idx)
) WITH CLUSTERING ORDER BY (block ASC, idx ASC)
AND bloom_filter_fp_chance = 0.01
AND caching = '{"keys":"ALL", "rows_per_partition":"NONE"}'
AND comment = ''
AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'}
AND compression = {'sstable_compression': 'org.apache.cassandra.io.compress.LZ4Compressor'}
AND dclocal_read_repair_chance = 0.1
AND default_time_to_live = 0
AND gc_grace_seconds = 864000
AND max_index_interval = 2048
AND memtable_flush_period_in_ms = 0
AND min_index_interval = 128
AND read_repair_chance = 0.0
AND speculative_retry = '99.0PERCENTILE';
replication factor for the keyspace is 4 so
nodetool status v2
Datacenter: codfw
=================
Status=Up/Down
|/ State=Normal/Leaving/Joining/Moving
-- Address Load Tokens Owns (effective) Host ID Rack
UN 10.192.0.128 122.99 GB 256 100.0% e33a1800-96d9-4a6d-86d3-e1d270026d4a rack1
UN 10.192.0.129 124.58 GB 256 100.0% f74287ed-8286-459e-956c-e0aed02184c4 rack1
UN 10.192.16.34 110.34 GB 256 100.0% 3e5ff43e-df80-48fe-9302-2a28b1127095 rack1
UN 10.192.16.35 114.19 GB 256 100.0% 1af61427-f70e-4653-bccd-71735e5f78da rack1
Code
====
import psycopg2
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from cassandra.query import SimpleStatement
import random
from multiprocessing import Process, Value
import time
import math
import os
REPEATS = 10
NUMBER_OF_QUERIES = 360000
NUMBER_OF_PROCS = 18
HOSTS = ['maps-test2001.codfw.wmnet',
'maps-test2002.codfw.wmnet',
'maps-test2003.codfw.wmnet',
'maps-test2004.codfw.wmnet']
CASSANDRA_USER = os.environ['CASSANDRA_USER']
CASSANDRA_PASS = os.environ['CASSANDRA_PASS']
POSTGRES_USER = os.environ['POSTGRES_USER']
POSTGRES_PASS = os.environ['POSTGRES_PASS']
def benchmark_cassandra_worker(host, queries, ret):
# Setup cassandra
auth_provider = PlainTextAuthProvider(
username=CASSANDRA_USER, password=CASSANDRA_PASS)
cluster = Cluster([host], auth_provider=auth_provider)
session = cluster.connect('v2')
statement = session.prepare('SELECT zoom, block, idx, tile FROM tiles \
WHERE zoom=? \
AND block=? \
AND idx=?')
start_time = time.time()
for query in queries:
session.execute(statement, (query['zoom'], query['block'], query['idx']))
end_time = time.time()
duration = end_time - start_time
qps = len(queries) / duration
ret.value = qps
return qps
def benchmark_postgresql_worker(host, queries, ret):
# Setup postgresql
conn = psycopg2.connect('host=%s dbname=gis user=%s password=%s' % (host, POSTGRES_USER, POSTGRES_PASS))
cur = conn.cursor()
start_time = time.time()
sql = 'SELECT zoom, block, idx, tile FROM tiles \
WHERE zoom=%(zoom)s \
AND block=%(block)s \
AND idx=%(idx)s'
for query in queries:
cur.execute(sql, query)
end_time = time.time()
cur.close()
conn.close()
duration = end_time - start_time
qps = len(queries) / duration
ret.value = qps
return qps
def benchmark(func):
print "Benchmarking %s" % func.func_name
# Benchmarking cassandra
processes = []
for i in range(NUMBER_OF_PROCS):
host = HOSTS[i % len(HOSTS)]
start_query = i*len(queries)/NUMBER_OF_PROCS
end_query = (i+1)*len(queries)/NUMBER_OF_PROCS - 1
v = Value('d', 0)
p = Process(target=func,
args=(host, queries[start_query:end_query], v))
p.start()
processes.append({'p': p, 'v': v})
# Let's wait for everyone to finish
for proc in processes:
proc['p'].join()
qps = 0
for proc in processes:
qps = qps + proc['v'].value
print "Benchmarking %s: done" % func.func_name
return qps
if __name__ == '__main__':
queries = []
# First generate all our queries
print "Creating queries"
for i in range(1, NUMBER_OF_QUERIES):
# We give every max some extra space so as to have queries that return
# nothing. idx is an exception as it set to the max allowed (current
# cassandra keyspace/table limitation in this specific installation)
query = {
'zoom': random.randrange(0, 20),
'block': random.randrange(0, 70000),
'idx': random.randrange(0, 2147483647),
}
queries.append(query)
print "Queries created"
cassandra_qps_list = []
postgresql_qps_list = []
# Repeat the exact same test random multiple times per datastore to
# statistically remove the effects of pagecache
for i in range(REPEATS):
worker = random.choice((
('cassandra', benchmark_cassandra_worker),
('postgresql', benchmark_postgresql_worker)))
qps = benchmark(worker[1])
if worker[0] == 'cassandra':
cassandra_qps_list.append(qps)
if worker[0] == 'postgresql':
postgresql_qps_list.append(qps)
cassandra_qps = sum(cassandra_qps_list)/len(cassandra_qps_list)
postgresql_qps = sum(postgresql_qps_list)/len(postgresql_qps_list)
print 'Cassandra got: %s QPS' % cassandra_qps
print 'PostgreSQL got: %s QPS' % postgresql_qps
Preliminary results
===================
python stresstest.py
Creating queries
Queries created
Benchmarking benchmark_cassandra_worker
Benchmarking benchmark_cassandra_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Benchmarking benchmark_cassandra_worker
Benchmarking benchmark_cassandra_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Benchmarking benchmark_cassandra_worker
Benchmarking benchmark_cassandra_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Benchmarking benchmark_cassandra_worker
Benchmarking benchmark_cassandra_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Cassandra got: 4860.36987879 QPS
PostgreSQL got: 53602.9711195 QPS
And a second run:
Cassandra got: 4858.80625565 QPS
PostgreSQL got: 53986.4100861 QPS

File Metadata

Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2570942
Default Alt Text
cassandra vs postgres benchmarker (6 KB)

Event Timeline