Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F2556919
cassandra vs postgres benchmarker
No One
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Authored By
akosiaris
Sep 8 2015, 12:21 PM
2015-09-08 12:21:50 (UTC+0)
Size
6 KB
Referenced Files
None
Subscribers
None
cassandra vs postgres benchmarker
View Options
Postgres table
==============
gis=# \d tiles
Table "public.tiles"
Column | Type | Modifiers
--------+--------+-----------
zoom | bigint |
block | bigint |
idx | bigint |
tile | bytea |
Indexes:
"tiles_zoom_block_idx_idx" UNIQUE, btree (zoom, block, idx)
Cassandra table
===============
cqlsh:v2> describe tiles;
CREATE TABLE v2.tiles (
zoom int,
block int,
idx int,
tile blob,
PRIMARY KEY (zoom, block, idx)
) WITH CLUSTERING ORDER BY (block ASC, idx ASC)
AND bloom_filter_fp_chance = 0.01
AND caching = '{"keys":"ALL", "rows_per_partition":"NONE"}'
AND comment = ''
AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'}
AND compression = {'sstable_compression': 'org.apache.cassandra.io.compress.LZ4Compressor'}
AND dclocal_read_repair_chance = 0.1
AND default_time_to_live = 0
AND gc_grace_seconds = 864000
AND max_index_interval = 2048
AND memtable_flush_period_in_ms = 0
AND min_index_interval = 128
AND read_repair_chance = 0.0
AND speculative_retry = '99.0PERCENTILE';
replication factor for the keyspace is 4 so
nodetool status v2
Datacenter: codfw
=================
Status=Up/Down
|/ State=Normal/Leaving/Joining/Moving
-- Address Load Tokens Owns (effective) Host ID Rack
UN 10.192.0.128 122.99 GB 256 100.0% e33a1800-96d9-4a6d-86d3-e1d270026d4a rack1
UN 10.192.0.129 124.58 GB 256 100.0% f74287ed-8286-459e-956c-e0aed02184c4 rack1
UN 10.192.16.34 110.34 GB 256 100.0% 3e5ff43e-df80-48fe-9302-2a28b1127095 rack1
UN 10.192.16.35 114.19 GB 256 100.0% 1af61427-f70e-4653-bccd-71735e5f78da rack1
Code
====
import psycopg2
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from cassandra.query import SimpleStatement
import random
from multiprocessing import Process, Value
import time
import math
import os
REPEATS = 10
NUMBER_OF_QUERIES = 360000
NUMBER_OF_PROCS = 18
HOSTS = ['maps-test2001.codfw.wmnet',
'maps-test2002.codfw.wmnet',
'maps-test2003.codfw.wmnet',
'maps-test2004.codfw.wmnet']
CASSANDRA_USER = os.environ['CASSANDRA_USER']
CASSANDRA_PASS = os.environ['CASSANDRA_PASS']
POSTGRES_USER = os.environ['POSTGRES_USER']
POSTGRES_PASS = os.environ['POSTGRES_PASS']
def benchmark_cassandra_worker(host, queries, ret):
# Setup cassandra
auth_provider = PlainTextAuthProvider(
username=CASSANDRA_USER, password=CASSANDRA_PASS)
cluster = Cluster([host], auth_provider=auth_provider)
session = cluster.connect('v2')
start_time = time.time()
for query in queries:
statement = SimpleStatement(query)
session.execute(statement)
end_time = time.time()
duration = end_time - start_time
qps = len(queries) / duration
ret.value = qps
return qps
def benchmark_postgresql_worker(host, queries, ret):
# Setup postgresql
conn = psycopg2.connect('host=%s dbname=gis user=%s password=%s' % (host, POSTGRES_USER, POSTGRES_PASS))
cur = conn.cursor()
start_time = time.time()
for query in queries:
cur.execute(query)
end_time = time.time()
cur.close()
conn.close()
duration = end_time - start_time
qps = len(queries) / duration
ret.value = qps
return qps
def benchmark(func):
print "Benchmarking %s" % func.func_name
# Benchmarking cassandra
processes = []
for i in range(NUMBER_OF_PROCS):
host = HOSTS[i % len(HOSTS)]
start_query = i*len(queries)/NUMBER_OF_PROCS
end_query = (i+1)*len(queries)/NUMBER_OF_PROCS - 1
v = Value('d', 0)
p = Process(target=func,
args=(host, queries[start_query:end_query], v))
p.start()
processes.append({'p': p, 'v': v})
# Let's wait for everyone to finish
for proc in processes:
proc['p'].join()
qps = 0
for proc in processes:
qps = qps + proc['v'].value
print "Benchmarking %s: done" % func.func_name
return qps
if __name__ == '__main__':
queries = []
# First generate all our queries
print "Creating queries"
for i in range(1, NUMBER_OF_QUERIES):
# We give every max some extra space so as to have queries that return
# nothing. idx is an exception as it set to the max allowed (current
# cassandra keyspace/table limitation in this specific installation)
query = {
'zoom': random.randrange(0, 20),
'block': random.randrange(0, 70000),
'idx': random.randrange(0, 2147483647),
}
query = 'SELECT zoom, block, idx, tile FROM tiles \
WHERE zoom=%(zoom)s \
AND block=%(block)s \
AND idx=%(idx)s' % query
queries.append(query)
print "Queries created"
cassandra_qps_list = []
postgresql_qps_list = []
for i in range(REPEATS):
worker = random.choice((
('cassandra', benchmark_cassandra_worker),
('postgresql', benchmark_postgresql_worker)))
qps = benchmark(worker[1])
if worker[0] == 'cassandra':
cassandra_qps_list.append(qps)
if worker[0] == 'postgresql':
postgresql_qps_list.append(qps)
cassandra_qps = sum(cassandra_qps_list)/len(cassandra_qps_list)
postgresql_qps = sum(postgresql_qps_list)/len(postgresql_qps_list)
print 'Cassandra got: %s QPS' % cassandra_qps
print 'PostgreSQL got: %s QPS' % postgresql_qps
Preliminary results
===================
python stresstest.py
Creating queries
Queries created
Benchmarking benchmark_cassandra_worker
Benchmarking benchmark_cassandra_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Benchmarking benchmark_cassandra_worker
Benchmarking benchmark_cassandra_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Benchmarking benchmark_cassandra_worker
Benchmarking benchmark_cassandra_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Benchmarking benchmark_cassandra_worker
Benchmarking benchmark_cassandra_worker: done
Benchmarking benchmark_postgresql_worker
Benchmarking benchmark_postgresql_worker: done
Cassandra got: 4860.36987879 QPS
PostgreSQL got: 53602.9711195 QPS
File Metadata
Details
Attached
Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2570635
Default Alt Text
cassandra vs postgres benchmarker (6 KB)
Attached To
Mode
P1995 cassandra vs postgres benchmarker
Attached
Detach File
Event Timeline
Log In to Comment