Skip to content

Commit

Permalink
First working version of clickhouse-plantuml
Browse files Browse the repository at this point in the history
  • Loading branch information
Felixoid committed Apr 22, 2020
1 parent d478462 commit 8f447cc
Show file tree
Hide file tree
Showing 12 changed files with 745 additions and 12 deletions.
13 changes: 1 addition & 12 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -175,18 +175,7 @@

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright [yyyy] [name of copyright owner]
Copyright (C) 2020 Mikhail f. Shiryaev

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# PlantUML generator for ClickHouse tables

This is a very early version of diagrams generator. It parses `system.tables` table and produces [PlantUML](https://plantuml.com) diagrams source. Here's example of produced diagram:

![example](./example.png)

## Usage

```bash
python setup.py install
clickhouse-plantuml
```
14 changes: 14 additions & 0 deletions clickhouse_plantuml/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env python

# License: Apache-2.0
# Copyright (C) 2020 Mikhail f. Shiryaev

from .client import Client
from .column import Column
from .table import Table
from .tables import Tables


__all__ = ['Client', 'Column', 'Table', 'Tables']

VERSION = (0, 1)
150 changes: 150 additions & 0 deletions clickhouse_plantuml/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#!/usr/bin/env python

# License: Apache-2.0
# Copyright (C) 2020 Mikhail f. Shiryaev

"""
The scrip accepts ClickHouse credentials, databases and tables, and produces
the PlantULM schema description. Optionally it could invoke `plantuml` and
create the graphical output.
"""

import logging
import sys

from argparse import (
ArgumentParser, ArgumentDefaultsHelpFormatter, Namespace, FileType
)
from hashlib import sha1
from os.path import basename, isfile
from pprint import pformat
from subprocess import Popen, PIPE

from . import Client, Tables
from .plantuml import plantuml

logging.basicConfig(
format='%(levelname)-8s [%(filename)s:%(lineno)d]:\n%(message)s'
)
logger = logging.getLogger(__name__)


def parse_args() -> Namespace:
parser = ArgumentParser(
formatter_class=ArgumentDefaultsHelpFormatter,
usage="Gets the info about tables' schamas from ClickHouse database "
'and generates PlantUML diagram source code.'
)
parser.add_argument(
'-v', '--verbose', action='count', default=0,
help='set the script verbosity, could be used multiple',
)
clickhouse = parser.add_argument_group('ClickHouse parameters')
clickhouse.add_argument(
'--host', default='localhost', help='ClickHouse server hostname',
)
clickhouse.add_argument(
'--port', default=9000, type=int, help='ClickHouse server hostname',
)
clickhouse.add_argument(
'-u', '--user', default='default', help='ClickHouse username',
)
clickhouse.add_argument(
'-p', '--password', default='', help='ClickHouse username',
)
clickhouse.add_argument(
'-d', '--database', dest='databases', action='append', default=[],
help='databases to describe. If omitted, `default` database is used',
)
clickhouse.add_argument(
'-t', '--table', action='append', dest='tables', default=[],
help='tables whitelist to describe. If set, only mentioned tables will'
'be queried from the server',
)

plantuml = parser.add_argument_group('PlantUml parameters')
plantuml.add_argument(
'-P', '--run-plantuml', action='store_true', dest='run_plantuml',
help='if set, plantuml binary will be executed',
)
plantuml.add_argument(
'-F', '--plantuml-format', choices=[
'png', 'svg', 'eps', 'pdf', 'vdx', 'xmi', 'scxml', 'html',
'txt', 'utxt', 'latex', 'latex:nopreamble',
],
default='png', help='PlantUML output format',
)
plantuml.add_argument(
'--plantuml-arguments', default='',
help='additional parameters to pass into plantuml command'
)

diagram = parser.add_argument_group('diagram parameters')
diagram.add_argument(
'-o', '--text-output', type=FileType('w'), default='-',
help='file to write a generated diagram source',
)
diagram.add_argument(
'-O', '--diagram-output', type=FileType('w'),
help='file to write a generated diagram. If `--text-output` is set, '
'the default name is calculated as `filename_without_extension`.'
'`plantuml-format`. If omitted, the default name is sha1 hexdigest '
'out of diagram content.',
)

args = parser.parse_args()
args.databases = args.databases or ['default']
return args


def run_plantuml(args: Namespace, diagram: str):
diagram_bin = diagram.encode('UTF-8')
if args.run_plantuml and args.diagram_output is None:
if args.text_output == sys.stdout:
file_name = sha1(diagram_bin).hexdigest()
args.diagram_output = '{}.{}'.format(
file_name, args.plantuml_format
)
if isfile(args.diagram_output):
logger.info('File exists, do not run plantuml')
return
else:
args.diagram_output = '{}.{}'.format(
basename(args.text_output.name), args.plantuml_format
)
logger.debug(args.diagram_output)
command = ['plantuml', '-p', '-t' + args.plantuml_format]
command.extend(args.plantuml_arguments.split())
proc = Popen(command, stdout=PIPE, stdin=PIPE)
if proc.stdin is not None:
proc.stdin.write(diagram_bin)
with open(args.diagram_output, 'bw') as out:
out.write(proc.communicate()[0])


def main():
args = parse_args()
log_levels = [logging.CRITICAL, logging.WARN, logging.INFO, logging.DEBUG]
logger.setLevel(log_levels[min(args.verbose, 3)])
logger.debug('Arguments are {}'.format(pformat(args.__dict__)))
client = Client(host=args.host, port=args.port, user=args.user,
password=args.password)
tables = Tables(client, args.databases, args.tables)
logger.debug('Tables are: {}'.format(pformat(list(map(str, tables)))))
if not tables:
logger.critical('There are no tables with given parameters')
sys.exit(2)
logger.debug('Columns of the first table are {}'.format(
pformat([c.__dict__ for c in tables[0].columns])
))
diagram = plantuml(tables)
args.text_output.write(diagram)
if args.text_output != sys.stdout:
args.text_output.close()

if args.run_plantuml:
run_plantuml(args, diagram)


if __name__ == '__main__':
main()
26 changes: 26 additions & 0 deletions clickhouse_plantuml/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env python

# License: Apache-2.0
# Copyright (C) 2020 Mikhail f. Shiryaev

from clickhouse_driver import Client as OriginalClient # type: ignore


class Client(OriginalClient):
"""
Wrapper for clickhouse_driver.Client with execute_dict method
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def execute_dict(self, *args, **kwargs):
kwargs['with_column_types'] = True
rows, columns = self.execute(*args, **kwargs)
result = [{columns[i][0]: v for i, v in enumerate(r)} for r in rows]
return result

def execute_iter_dict(self, *args, **kwargs):
kwargs['with_column_types'] = True
result, columns = self.execute(*args, **kwargs)
for r in result:
yield {columns[i][0]: v for i, v in enumerate(r)}
35 changes: 35 additions & 0 deletions clickhouse_plantuml/column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env python

# License: Apache-2.0
# Copyright (C) 2020 Mikhail f. Shiryaev


class Column(object):
"""
Represents ClickHouse column
"""

def __init__(self, database: str, table: str, name: str, type: str,
default_kind: str, default_expression: str, comment: str,
compression_codec: str,
is_in_partition_key: bool, is_in_sorting_key: bool,
is_in_primary_key: bool, is_in_sampling_key: bool):
self.database = database
self.table = table
self.name = name
self.type = type
self.default_kind = default_kind
self.default_expression = default_expression
self.comment = comment
self.compression_codec = compression_codec
self.is_in_partition_key = is_in_partition_key
self.is_in_sorting_key = is_in_sorting_key
self.is_in_primary_key = is_in_primary_key
self.is_in_sampling_key = is_in_sampling_key

@property
def db_table(self):
return '{}.{}'.format(self.database, self.table)

def __str__(self):
return self.name
111 changes: 111 additions & 0 deletions clickhouse_plantuml/plantuml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env python

# License: Apache-2.0
# Copyright (C) 2020 Mikhail f. Shiryaev

from . import Tables, Column
from typing import List


def plantuml(tables: Tables):
return plantuml_header() + gen_tables(tables) + plantuml_footer()


def plantuml_header():
# Credits
# https://www.red-gate.com/simple-talk/sql/sql-tools/automatically-creating-uml-database-diagrams-for-sql-server/
header = '\n'.join((
'@startuml',
'!define Table(x) class x << (T,mistyrose) >>',
'!define View(x) class x << (V,lightblue) >>',
'!define MaterializedView(x) class x << (m,orange) >>',
'!define Distributed(x) class x << (D,violet) >>',
'',
'hide empty methods',
'hide stereotypes',
'skinparam classarrowcolor gray',
'',
'',
))
return header


def gen_tables(tables: Tables):
"""
Generates the PlantUML source code out of the Tables object
"""
code = ''
for t in tables:
include_primary = t.sorting_key != t.primary_key
if include_primary:
table_keys = ['partition', 'sorting', 'primary', 'sampling']
else:
table_keys = ['partition', 'sorting', 'sampling']

code += '{}({})'.format(table_macros(t.engine), str(t)) + ' {\n'

code += 'ENGINE=**{}**\n'.format(t.engine)
if t.engine_config:
code += '..engine config..\n'
for k, v in t.engine_config:
code += '{}: {}\n'.format(k, v)

if hasattr(t, 'replication_config'):
code += '..replication..\n'
for k, v in t.replication_config:
code += '{}: {}\n'.format(k, v)

code += '==columns==\n'
for c in t.columns:
code += '{}: {}'.format(c.name, c.type)
code += column_keys(c, table_keys)
code += '\n'

for k in table_keys:
key_string = getattr(t, '{}_key'.format(k))
if key_string:
code += '..{}{} key..\n{}\n'.format(key_sign(k), k, key_string)

code += '}\n'
code += ''.join('{} -|> {}\n'.format(str(t), d) for d in t.dependencies
if d in tables.as_dict)

code += ''.join('{} -|> {}\n'.format(r, str(t))
for r in t.rev_dependencies
if r in tables.as_dict)

code += '\n'

return code


def plantuml_footer():
return '@enduml'


def table_macros(table_type: str):
if table_type in ('MaterializedView', 'View', 'Distributed'):
return table_type
return 'Table'


def key_sign(key: str) -> str:
sign = '<size:15><&{}></size>'
if key == 'partition':
return sign.format('list-rich')
if key == 'sorting':
return sign.format('signal')
if key == 'primary':
return sign.format('key')
if key == 'sampling':
return sign.format('collapse-down')
return ''


def column_keys(column: Column, table_keys: List[str]) -> str:
keys = ''

for key in table_keys:
if getattr(column, 'is_in_{}_key'.format(key)):
keys += ' {}'.format(key_sign(key))
return keys
Loading

0 comments on commit 8f447cc

Please sign in to comment.