Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F2556822
cassandra vs postgres benchmarker
No One
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Authored By
akosiaris
Sep 8 2015, 11:46 AM
2015-09-08 11:46:56 (UTC+0)
Size
4 KB
Referenced Files
None
Subscribers
None
cassandra vs postgres benchmarker
View Options
Postgres table
==============
gis=# \d tiles
Table "public.tiles"
Column | Type | Modifiers
--------+--------+-----------
zoom | bigint |
block | bigint |
idx | bigint |
tile | bytea |
Indexes:
"tiles_zoom_block_idx_idx" UNIQUE, btree (zoom, block, idx)
Cassandra table
===============
cqlsh:v2> describe tiles;
CREATE TABLE v2.tiles (
zoom int,
block int,
idx int,
tile blob,
PRIMARY KEY (zoom, block, idx)
) WITH CLUSTERING ORDER BY (block ASC, idx ASC)
AND bloom_filter_fp_chance = 0.01
AND caching = '{"keys":"ALL", "rows_per_partition":"NONE"}'
AND comment = ''
AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'}
AND compression = {'sstable_compression': 'org.apache.cassandra.io.compress.LZ4Compressor'}
AND dclocal_read_repair_chance = 0.1
AND default_time_to_live = 0
AND gc_grace_seconds = 864000
AND max_index_interval = 2048
AND memtable_flush_period_in_ms = 0
AND min_index_interval = 128
AND read_repair_chance = 0.0
AND speculative_retry = '99.0PERCENTILE';
Code
====
import psycopg2
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from cassandra.query import SimpleStatement
import random
from multiprocessing import Process, Value
import time
import math
import os
NUMBER_OF_QUERIES = 120000 # 10 million queries
NUMBER_OF_PROCS = 12
HOSTS = ['maps-test2001.codfw.wmnet',
'maps-test2002.codfw.wmnet',
'maps-test2003.codfw.wmnet',
'maps-test2004.codfw.wmnet']
CASSANDRA_USER = os.environ['CASSANDRA_USER']
CASSANDRA_PASS = os.environ['CASSANDRA_PASS']
POSTGRES_USER = os.environ['POSTGRES_USER']
POSTGRES_PASS = os.environ['POSTGRES_PASS']
def benchmark_cassandra_worker(host, queries, ret):
# Setup cassandra
auth_provider = PlainTextAuthProvider(
username=CASSANDRA_USER, password=CASSANDRA_PASS)
cluster = Cluster([host], auth_provider=auth_provider)
session = cluster.connect('v2')
start_time = time.time()
for query in queries:
statement = SimpleStatement(query)
session.execute(statement)
end_time = time.time()
duration = end_time - start_time
qps = len(queries) / duration
ret.value = qps
return qps
def benchmark_postgresql_worker(host, queries, ret):
# Setup postgresql
conn = psycopg2.connect('host=%s dbname=gis user=%s password=%s' % (host, POSTGRES_USER, POSTGRES_PASS))
cur = conn.cursor()
start_time = time.time()
for query in queries:
cur.execute(query)
end_time = time.time()
cur.close()
conn.close()
duration = end_time - start_time
qps = len(queries) / duration
ret.value = qps
return qps
def benchmark(func):
# Benchmarking cassandra
processes = []
for i in range(NUMBER_OF_PROCS):
host = HOSTS[i % len(HOSTS)]
start_query = i*len(queries)/NUMBER_OF_PROCS
end_query = (i+1)*len(queries)/NUMBER_OF_PROCS - 1
v = Value('d', 0)
p = Process(target=func,
args=(host, queries[start_query:end_query], v))
p.start()
processes.append({'p': p, 'v': v})
# Let's wait for everyone to finish
for proc in processes:
proc['p'].join()
qps = 0
for proc in processes:
qps = qps + proc['v'].value
return qps
if __name__ == '__main__':
queries = []
# First generate all our queries
print "Creating queries"
for i in range(1, NUMBER_OF_QUERIES):
# We give every max some extra space so as to have queries that return
# nothing. idx is an exception as it set to the max allowed
query = {
'zoom': random.randrange(0, 20),
'block': random.randrange(0, 70000),
'idx': random.randrange(0, 2147483647),
}
query = 'SELECT zoom, block, idx, tile FROM tiles \
WHERE zoom=%(zoom)s \
AND block=%(block)s \
AND idx=%(idx)s' % query
queries.append(query)
print "Queries created"
print "Benchmarking cassandra"
cassandra_qps = benchmark(benchmark_cassandra_worker)
print "Benchmarking cassandra: done"
print "Benchmarking postgres"
postgresql_qps = benchmark(benchmark_postgresql_worker)
print "Benchmarking postgres: done"
print 'Cassandra got: %s QPS' % cassandra_qps
print 'PostgreSQL got: %s QPS' % postgresql_qps
File Metadata
Details
Attached
Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2570538
Default Alt Text
cassandra vs postgres benchmarker (4 KB)
Attached To
Mode
P1995 cassandra vs postgres benchmarker
Attached
Detach File
Event Timeline
Log In to Comment