From e85c3b9d7b3c7091f1b42e03908fd646a4035e77 Mon Sep 17 00:00:00 2001 From: Maximilian Schulz <maximilian.schulz@hs-hannover.de> Date: Wed, 2 Apr 2014 13:26:10 +0200 Subject: [PATCH] [TASK] Introduces config dictionary The config dictionary now acts as a place for definitions of specific cases. So now you can set the identifier if the object is referencable or change the display type for desired objects or create children for !!list types. --- hshetl/editor.py | 127 ++++++++++++++++++++++++++++++++++++++----- hshetl/entities.py | 2 +- hshetl/extractors.py | 6 +- hshetl/loaders.py | 16 +++++- 4 files changed, 133 insertions(+), 18 deletions(-) diff --git a/hshetl/editor.py b/hshetl/editor.py index 3333abc..b0d7adb 100644 --- a/hshetl/editor.py +++ b/hshetl/editor.py @@ -5,6 +5,52 @@ import json import re from hshetl.exc import * + +config = { + "TYPE_MAPPING": { + "string" : "!!str", + "unicode" : "!!str", + "int" : "!!int", + "bool" : "!!bool", + "boolean" : "!!bool", + "list" : "!!list", + "dict" : "!!map" + }, + "TYPE_DISPLAY_MAPPING": { + "!!str" : "textfield", + "!!int" : "textfield", + "!!bool" : "checkbox", + "!!list" : "csv", + "!!map" : "table" + }, + "SPECIFIC_TYPE_DISPLAY_MAPPING": { + "ExtractionJob.collision_handling": { + "display" : "select", "str_choices": [ + "BREAKALL", + "BREAKJOIN", + "BREAKCONTAINER", + "BREAKNEVER" + ] + }, + "JobList.jobs": "list" + }, + "IDENTIFIERS": { + #"SQLiteConnector": "name" + }, + "CHILDREN": { + "JobList.jobs": ["!bash", "!sqlquery", "!plsqlquery", "!extract", "!transform", "!inspect", "!load", "!sync", "!copy"] + }, + "REFERENCABLE": [ + "Entity", + "FileConnector", + "LdapConnector", + "OracleConnector", + "MySQLConnector", + "SQLiteConnector", + "PostgreSQLConnector" + ] + } + def find_subclasses(cls): result = [] for subcls in cls.__subclasses__(): @@ -15,8 +61,24 @@ def find_subclasses(cls): return result def apply_type_mapping(possible_types): - # TODO: implement logic... - return possible_types + mapped_types = [] + for p_type in possible_types: + if p_type in config["TYPE_MAPPING"].keys(): + mapped_types.append(config["TYPE_MAPPING"][p_type]) + else: + if p_type.startswith("hshetl."): + cls = eval(p_type) + elif p_type.endswith("Connector"): + cls = eval("hshetl.connectors." + p_type) + elif p_type.endswith("Extractor"): + cls = eval("hshetl.extractors." + p_type) + elif p_type.endswith("Loader"): + cls = eval("hshetl.loaders." + p_type) + else: + continue + if hasattr(cls, "yaml_tag"): + mapped_types.append(cls.yaml_tag) + return mapped_types def itersubclasses(cls, _seen = None): if not isinstance(cls, type): @@ -36,7 +98,7 @@ def itersubclasses(cls, _seen = None): def resolve_param_type(cls, property): possible_types = [] - pattern = ":type " + property + ": (:class:\`\.?(.*)`)?(\\w*)(( or )?(\\w+)?)?" + pattern = ":type " + property + ": ((:class:\`(.*)`)|(\w*))(( or )?(\\w+)?)*" result = re.search(pattern, cls.__doc__) if result == None: for parent_cls in cls.__mro__: @@ -50,25 +112,62 @@ def resolve_param_type(cls, property): if param_type == "" or param_type == None or " " in param_type or ":" in param_type: continue if "." in param_type: - for cls in itersubclasses(eval(param_type)): - if cls.__name__ not in possible_types: - possible_types.append(cls.__name__) + for clas in itersubclasses(eval(param_type)): + if clas.__name__ not in possible_types: + possible_types.append(clas.__name__) + if possible_types == []: + possible_types.append(param_type) else: - possible_types.append(param_type) + if param_type not in possible_types: + possible_types.append(param_type) return apply_type_mapping(possible_types) +def resolve_display_type(sup_type, property, cls_name): + if config["SPECIFIC_TYPE_DISPLAY_MAPPING"].has_key(cls_name + "." + property): + return config["SPECIFIC_TYPE_DISPLAY_MAPPING"][cls_name + "." + property] + elif config["TYPE_DISPLAY_MAPPING"].has_key(sup_type): + return config["TYPE_DISPLAY_MAPPING"][sup_type] + elif sup_type[1] != "!": + return "inline" + else: + return "textfield" + def json_encode_class(cls): arg_resolver = hshetl.ConfigurationArgumentMatcher() props, required = arg_resolver.get_constructor_arguments(cls) - cfg = {"yaml_tag": cls.yaml_tag, - "verbose_name": cls.__name__, - "properties": {}} + if cls.__name__ in config["REFERENCABLE"]: + cfg = {"yaml_tag": cls.yaml_tag, + "identifier": config["IDENTIFIERS"].get(cls.__name__, "name"), + "verbose_name": cls.__name__, + "properties": {}} + else: + cfg = {"yaml_tag": cls.yaml_tag, + "verbose_name": cls.__name__, + "properties": {}} for prop in props: supported_types = resolve_param_type(cls, prop) - cfg["properties"][prop] = {"type": supported_types, - "form": {"display": "textfield"}} - if prop in required: - cfg["properties"][prop]["defaults"] = "foo" + param_list = [] + for s_type in supported_types: + children = None + display_type = resolve_display_type(s_type, prop, cls.__name__) + if s_type == "!!list": + children = config["CHILDREN"][cls.__name__ + "." + prop] if config["CHILDREN"].has_key(cls.__name__ + "." + prop) else None + + if children == None: + if isinstance(display_type, dict): + param_list.append({"type": s_type, "form": display_type, "defaults": "foo"}) + else: + param_list.append({"type": s_type, "form": {"display": display_type}, "defaults": "foo"}) + else: + if isinstance(display_type, dict): + param_list.append({"type": s_type, "children": children, "form": display_type, "defaults": "foo"}) + else: + param_list.append({"type": s_type, "children": children, "form": {"display": display_type}, "defaults": "foo"}) + try: + cfg["properties"][prop] = param_list if len(param_list) > 1 else param_list[0] + except Exception: + #this is not supposed to be here in the final version. + cfg["properties"][prop] = {"type" : None, "form": {"display": "textfield"}, "defaults": "foo"} return cfg def dump_schema(): diff --git a/hshetl/entities.py b/hshetl/entities.py index 27ed6ca..ba3a422 100644 --- a/hshetl/entities.py +++ b/hshetl/entities.py @@ -503,7 +503,7 @@ class Result(object): :param entity: the entity to be representated :param source: system to fetch data from - :type entity: :class:`Entity` + :type entity: :class:`hshetl.entities.Entity` :type source: string A Result describes what records between the diff --git a/hshetl/extractors.py b/hshetl/extractors.py index ef77cc1..974ebab 100644 --- a/hshetl/extractors.py +++ b/hshetl/extractors.py @@ -44,7 +44,7 @@ class AbstractExtractor(object): Constructor :param connector: connector instance - :type connector: :class:`.AbstractExtractor` + :type connector: :class:`hshetl.connectors.AbstractConnector` ''' @@ -96,7 +96,7 @@ class SqlAlchemyExtractor(AbstractExtractor): :param query: query that fetches data from the database :param **kwargs: accepts parameters from :class:`.AbstractExtractor`. :type query: string - :type connector: :class:`.AbstractExtractor` + :type connector: :class:`hshetl.connectors.OracleConnector` or :class:`hshetl.connectors.MySQLConnector` or :class:`hshetl.connectors.SQLiteConnector` or :class:`hshetl.connectors.PostgreSQLConnector` YAML definition sample: @@ -156,6 +156,7 @@ class LdapExtractor(AbstractExtractor): :type ldap_filter: string or None :type attributes: list :type page_size: int or None + :type connector: :class:`hshetl.connectors.LdapConnector` YAML definition sample: @@ -283,6 +284,7 @@ class CsvExtractor(AbstractExtractor): :param dialect: CSV dialect to be use for CSV style :param **kwargs: Accepts parameters from :class:`.AbstractExtractor`. :type dialect: :class:`hshetl.Dialect` + :type connector: :class:`hshetl.connectors.FileConnector` YAML definition sample: diff --git a/hshetl/loaders.py b/hshetl/loaders.py index 4aba5e1..325b207 100644 --- a/hshetl/loaders.py +++ b/hshetl/loaders.py @@ -30,7 +30,7 @@ from uuid import uuid4 as uuid from ldap import modlist from sqlalchemy import Table, MetaData from hshetl import yamlify, NameResolver, Dialect -from connectors import AbstractConnector, connector_repository +from connectors import AbstractConnector, FileConnector, SqlAlchemyConnector, LdapConnector, connector_repository from hshetl.exc import LoaderException, ConfigurationException @@ -114,6 +114,7 @@ class CsvLoader(AbstractLoader): :param dialect: The CSV dialect that will be used for CSV style. :param **kwargs: Accepts parameters from :class:`.AbstractExtractor`. :type dialect: :class:`hshetl.Dialect` + :type connector: :class:`hshetl.connectors.FileConnector` YAML definition sample: @@ -135,6 +136,9 @@ class CsvLoader(AbstractLoader): self.dialect = dialect self._resolve_dialect() + def can_execute(self, connector): + '''Defines which connector can be handled by this extractor.''' + return isinstance(connector, FileConnector) def _update(self, data = []): '''This loader can not update records. Therefore only empty data is allowed''' @@ -186,6 +190,7 @@ class SqlAlchemyLoader(AbstractLoader): :param table_name: The name of the table in which to write the data. :param **kwargs: Accepts parameters from :class:`.AbstractLoader`. :type table_name: string + :type connector: :class:`hshetl.connectors.OracleConnector` or :class:`hshetl.connectors.MySQLConnector` or :class:`hshetl.connectors.SQLiteConnector` or :class:`hshetl.connectors.PostgreSQLConnector` YAML definition sample: @@ -205,6 +210,10 @@ class SqlAlchemyLoader(AbstractLoader): self.table_name = table_name '''The name of the table where the records will be load.''' + def can_execute(self, connector): + '''Defines which connector can be handled by this extractor.''' + return isinstance(connector, SqlAlchemyConnector) + def _execute(self, result): '''Executes the loading of data. Distinguishes between update, insert and delete''' self.table = Table(self.table_name, @@ -279,6 +288,7 @@ class LdapLoader(AbstractLoader): :type rdn: string :type base: string :type objectClass: string + :type connector: :class:`hshetl.connectors.LdapConnector` YAML definition sample: @@ -304,6 +314,10 @@ class LdapLoader(AbstractLoader): self.objectClass = objectClass '''The objectClass of new inserted records.''' + def can_execute(self, connector): + '''Defines which connector can be handled by this extractor.''' + return isinstance(connector, LdapConnector) + def _execute(self, result): '''Executes the loading of data. Distinguishes between update, insert and delete.''' logging.debug('Loads data: ' + self.__class__.__name__) -- GitLab