Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F2556803
cassandra vs postgres benchmarker
No One
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Authored By
akosiaris
Sep 8 2015, 11:41 AM
2015-09-08 11:41:16 (UTC+0)
Size
3 KB
Referenced Files
None
Subscribers
None
cassandra vs postgres benchmarker
View Options
import psycopg2
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from cassandra.query import SimpleStatement
import random
from multiprocessing import Process, Value
import time
import math
import os
NUMBER_OF_QUERIES = 120000 # 10 million queries
NUMBER_OF_PROCS = 12
HOSTS = ['maps-test2001.codfw.wmnet',
'maps-test2002.codfw.wmnet',
'maps-test2003.codfw.wmnet',
'maps-test2004.codfw.wmnet']
CASSANDRA_USER = os.environ['CASSANDRA_USER']
CASSANDRA_PASS = os.environ['CASSANDRA_PASS']
POSTGRES_USER = os.environ['POSTGRES_USER']
POSTGRES_PASS = os.environ['POSTGRES_PASS']
def benchmark_cassandra_worker(host, queries, ret):
# Setup cassandra
auth_provider = PlainTextAuthProvider(
username=CASSANDRA_USER, password=CASSANDRA_PASS)
cluster = Cluster([host], auth_provider=auth_provider)
session = cluster.connect('v2')
start_time = time.time()
for query in queries:
statement = SimpleStatement(query)
session.execute(statement)
end_time = time.time()
duration = end_time - start_time
qps = len(queries) / duration
ret.value = qps
return qps
def benchmark_postgresql_worker(host, queries, ret):
# Setup postgresql
conn = psycopg2.connect('host=%s dbname=gis user=%s password=%s' % (host, POSTGRES_USER, POSTGRES_PASS))
cur = conn.cursor()
start_time = time.time()
for query in queries:
cur.execute(query)
end_time = time.time()
cur.close()
conn.close()
duration = end_time - start_time
qps = len(queries) / duration
ret.value = qps
return qps
def benchmark(func):
# Benchmarking cassandra
processes = []
for i in range(NUMBER_OF_PROCS):
host = HOSTS[i % len(HOSTS)]
start_query = i*len(queries)/NUMBER_OF_PROCS
end_query = (i+1)*len(queries)/NUMBER_OF_PROCS - 1
v = Value('d', 0)
p = Process(target=func,
args=(host, queries[start_query:end_query], v))
p.start()
processes.append({'p': p, 'v': v})
# Let's wait for everyone to finish
for proc in processes:
proc['p'].join()
qps = 0
for proc in processes:
qps = qps + proc['v'].value
return qps
if __name__ == '__main__':
queries = []
# First generate all our queries
print "Creating queries"
for i in range(1, NUMBER_OF_QUERIES):
# We give every max some extra space so as to have queries that return
# nothing. idx is an exception as it set to the max allowed
query = {
'zoom': random.randrange(0, 20),
'block': random.randrange(0, 70000),
'idx': random.randrange(0, 2147483647),
}
query = 'SELECT zoom, block, idx, tile FROM tiles \
WHERE zoom=%(zoom)s \
AND block=%(block)s \
AND idx=%(idx)s' % query
queries.append(query)
print "Queries created"
print "Benchmarking cassandra"
cassandra_qps = benchmark(benchmark_cassandra_worker)
print "Benchmarking cassandra: done"
print "Benchmarking postgres"
postgresql_qps = benchmark(benchmark_postgresql_worker)
print "Benchmarking postgres: done"
print 'Cassandra got: %s QPS' % cassandra_qps
print 'PostgreSQL got: %s QPS' % postgresql_qps
File Metadata
Details
Attached
Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2570519
Default Alt Text
cassandra vs postgres benchmarker (3 KB)
Attached To
Mode
P1995 cassandra vs postgres benchmarker
Attached
Detach File
Event Timeline
Log In to Comment