#!/usr/bin/env python import datetime import logging import json import os import subprocess import sys import yaml CREOLE_JAR = "/home/eevans/creole-cli-1.0.0-SNAPSHOT-jar-with-dependencies.jar" SSUTIL_JAR = "/home/eevans/twcs-inspector-1.0.0-SNAPSHOT.jar" os.environ["FORMAT"] = "json" logging.basicConfig(level=logging.DEBUG) def discover_instance_jmx(directory): for name in os.listdir("/etc/cassandra-instances.d"): if not name.endswith(".yaml"): continue with open(os.path.join("/etc/cassandra-instances.d", name)) as f: y = yaml.load(f) for data_directory in y["data_file_directories"]: if directory.startswith(data_directory): return y["jmx_port"] return -1 def setup(directory): # TODO: validate jars are present # TODO: validate existence, readability, of directory supplied # TODO: lock, so that we do not trample upon another long-running instance pass def main(): if len(sys.argv) != 2: print >>sys.stderr, "Usage: {} ".format(sys.argv[0]) sys.exit(1) directory = sys.argv[1] setup(directory) __ms_to_date = lambda ms: datetime.datetime.fromtimestamp(int(ms) / 1e3).strftime("%Y-%m-%d") tables = json.loads(subprocess.check_output(["java", "-jar", SSUTIL_JAR, directory])) if not len(tables) > 2: logging.warn("too few windows to begin a new compaction") return 1 windows = sorted(tables.keys()) candidates = [] data_files = [] for candidate in windows[:len(windows)-1]: # This should be all but the most current window, each of which should have # been compacted down to a single table. If that is not the case (perhaps), # the compaction is on-going, then exclude the window for this iteration. if len(tables[candidate]) > 1: logging.warn( "window starting %s hasn't been fully compacted, stopping here...", __ms_to_date(candidate)) # Do not consider any later windows, either... break candidates.append(candidate) table = tables[candidate][0] logging.debug( "adding window starting %s, data file %s (%s percent droppable)", __ms_to_date(candidate), table["dataFileName"], table["estimatedDroppableTombstones"] * 100) data_files.append(os.path.join(directory, table["dataFileName"])) if len(data_files) < 2: logging.warn("not enough candidates for compaction, exiting...") return 0 jmx_port = discover_instance_jmx(directory) if jmx_port < 0: logging.error("Unable to detect instance from directory") return 1 subprocess.call(["java", "-jar", CREOLE_JAR, "-H", "localhost", "-P", str(jmx_port), "user-compaction"] + data_files) return 0 if __name__ == "__main__": sys.exit(main())