Changeset View
Changeset View
Standalone View
Standalone View
scap/cdblib.py
Show All 12 Lines | |||||
rather than djb_hash() for a tidy speedup. | rather than djb_hash() for a tidy speedup. | ||||
.. note:: | .. note:: | ||||
Minor alterations made to comply with PEP8 style check and to remove | Minor alterations made to comply with PEP8 style check and to remove | ||||
attempt to import C implementation of djb_hash. -- bd808, 2014-03-04 | attempt to import C implementation of djb_hash. -- bd808, 2014-03-04 | ||||
""" | """ | ||||
from __future__ import absolute_import | from __future__ import absolute_import | ||||
import six | |||||
from itertools import chain | from itertools import chain | ||||
from _struct import Struct | from _struct import Struct | ||||
def py_djb_hash(s): | def py_djb_hash(s): | ||||
u""" | u""" | ||||
Return the value of DJB's hash function for the given 8-bit string. | Return the value of DJB's hash function for the given 8-bit string. | ||||
>>> py_djb_hash('') | >>> py_djb_hash('') | ||||
5381 | 5381 | ||||
>>> py_djb_hash('\x01') | >>> py_djb_hash('\x01') | ||||
177572 | 177572 | ||||
>>> py_djb_hash('€') | >>> py_djb_hash('€') | ||||
193278953 | 193278953 | ||||
""" | """ | ||||
try: | |||||
s = s.encode('utf-8') | |||||
except UnicodeDecodeError: | |||||
# This is python2 | |||||
s = s | |||||
h = 5381 | h = 5381 | ||||
for c in s: | for c in s: | ||||
h = (((h << 5) + h) ^ ord(c)) & 0xffffffff | if isinstance(c, str): | ||||
c = six.byte2int(c) | |||||
h = (((h << 5) + h) ^ c) & 0xffffffff | |||||
return h | return h | ||||
# 2014-03-04 bd808: removed try block for importing C hash implementation | # 2014-03-04 bd808: removed try block for importing C hash implementation | ||||
DJB_HASH = py_djb_hash | DJB_HASH = py_djb_hash | ||||
READ_2_LE4 = Struct('<LL').unpack | READ_2_LE4 = Struct('<LL').unpack | ||||
WRITE_2_LE4 = Struct('<LL').pack | WRITE_2_LE4 = Struct('<LL').pack | ||||
Show All 10 Lines | class Reader(object): | ||||
def __init__(self, data, hashfn=DJB_HASH): | def __init__(self, data, hashfn=DJB_HASH): | ||||
""" | """ | ||||
Create an instance reading from a sequence and hash keys using hashfn. | Create an instance reading from a sequence and hash keys using hashfn. | ||||
>>> Reader(data='') | >>> Reader(data='') | ||||
Traceback (most recent call last): | Traceback (most recent call last): | ||||
... | ... | ||||
IOError: CDB too small | OSError: CDB too small | ||||
>>> Reader(data='a' * 2048) #doctest: +ELLIPSIS | >>> Reader(data='a' * 2048) #doctest: +ELLIPSIS | ||||
<scap.cdblib.Reader object at 0x...> | <scap.cdblib.Reader object at 0x...> | ||||
""" | """ | ||||
data = data.encode('utf-8') | |||||
if len(data) < 2048: | if len(data) < 2048: | ||||
raise IOError('CDB too small') | raise OSError('CDB too small') | ||||
self.data = data | self.data = data | ||||
self.hashfn = hashfn | self.hashfn = hashfn | ||||
self.index = [READ_2_LE4(data[i:i + 8]) for i in range(0, 2048, 8)] | self.index = [READ_2_LE4(data[i:i + 8]) for i in range(0, 2048, 8)] | ||||
self.table_start = min(p[0] for p in self.index) | self.table_start = min(p[0] for p in self.index) | ||||
# Assume load load factor is 0.5 like official CDB. | # Assume load load factor is 0.5 like official CDB. | ||||
self.length = sum(p[1] >> 1 for p in self.index) | self.length = sum(p[1] >> 1 for p in self.index) | ||||
Show All 31 Lines | def __init__(self, fp, hashfn=DJB_HASH): | ||||
>>> import tempfile | >>> import tempfile | ||||
>>> temp_fp = tempfile.TemporaryFile() | >>> temp_fp = tempfile.TemporaryFile() | ||||
>>> Writer(fp=temp_fp, hashfn=py_djb_hash) #doctest: +ELLIPSIS | >>> Writer(fp=temp_fp, hashfn=py_djb_hash) #doctest: +ELLIPSIS | ||||
<scap.cdblib.Writer object at 0x...> | <scap.cdblib.Writer object at 0x...> | ||||
""" | """ | ||||
self.fp = fp | self.fp = fp | ||||
self.hashfn = hashfn | self.hashfn = hashfn | ||||
fp.write('\x00' * 2048) | fp.write(b'\x00' * 2048) | ||||
self._unordered = [[] for i in range(256)] | self._unordered = [[] for i in range(256)] | ||||
def put(self, key, value=''): | def put(self, key, value=''): | ||||
"""Write a string key/value pair to the output file.""" | """Write a string key/value pair to the output file.""" | ||||
assert isinstance(key, str) and isinstance(value, str) | assert isinstance(key, str) and isinstance(value, str) | ||||
pos = self.fp.tell() | pos = self.fp.tell() | ||||
self.fp.write(WRITE_2_LE4(len(key), len(value))) | self.fp.write(WRITE_2_LE4(len(key), len(value))) | ||||
Show All 28 Lines |
Content licensed under Creative Commons Attribution-ShareAlike 3.0 (CC-BY-SA) unless otherwise noted; code licensed under GNU General Public License (GPL) or other open source licenses. By using this site, you agree to the Terms of Use, Privacy Policy, and Code of Conduct. · Wikimedia Foundation · Privacy Policy · Code of Conduct · Terms of Use · Disclaimer · CC-BY-SA · GPL