diff --git a/hshetl/cli.py b/hshetl/cli.py index 8d7ed6daec713cc803b327c7e162e5d94f238025..d6ad428e018ec47f16d14f599ff4f4f18e6a2731 100644 --- a/hshetl/cli.py +++ b/hshetl/cli.py @@ -175,13 +175,6 @@ class Configuration(object): 'jobs.' + str(len(self._jobs) + 1)) job_config['interactive'] = self.interactive job_config['path'] = self.path - if job_config['type'] == 'synchronization' or job_config['type'] == 'transformation': - if type(job_config['entity']['join']) is str: - job_config['entity']['join'] = [job_config['entity']['join']] - if type(job_config['target']['identifier']) is str: - job_config['target']['identifier'] = [job_config['target']['identifier']] - if type(job_config['source']['identifier']) is str: - job_config['source']['identifier'] = [job_config['source']['identifier']] self._jobs.append(job_config) def _merge(self, configuration, defaults, required, path=''): @@ -354,12 +347,12 @@ class SynchronizationJob(AbstractJob): if isinstance(target_loader, connections.SqlAlchemyLoader): query = self._configuration['target']['query'] configuration['table'] = self._resolve_table_from_query(query) - configuration['id_column'] = self._check_empty_identifiers(self._configuration['target']['identifier'], 'target') + configuration['id_column'] = self._resolve_identifier_fields(self._configuration['target']['identifier'], 'target') elif isinstance(target_loader, connections.LdapLoader): for key in ('rdn', 'objectClass'): if key in self._configuration['target']['query']: configuration[key] = self._configuration['target']['query'][key] - configuration['identifier'] = self._check_empty_identifiers(self._configuration['target']['identifier'], 'target') + configuration['identifier'] = self._resolve_identifier_fields(self._configuration['target']['identifier'], 'target') if configuration['identifier'] != ['dn']: raise ConfigurationException('You need to specify "dn" as identifier for ldap targets!') configuration['execute'] = self._configuration['execute'] @@ -367,22 +360,24 @@ class SynchronizationJob(AbstractJob): def _get_entity_configuration(self): '''Return a dictionary with configuration for entity repository; assign with ** operator.''' - return {'join_fields': self._check_empty_identifiers(self._configuration['entity']['join'], 'join'), - 'target_fields': self._check_empty_identifiers(self._configuration['target']['identifier'], 'target'), - 'source_fields': self._check_empty_identifiers(self._configuration['source']['identifier'], 'source'), + return {'join_fields': self._resolve_identifier_fields(self._configuration['entity']['join'], 'join'), + 'target_fields': self._resolve_identifier_fields(self._configuration['target']['identifier'], 'target'), + 'source_fields': self._resolve_identifier_fields(self._configuration['source']['identifier'], 'source'), 'properties': self._configuration['entity']['properties'], 'mapping': self._configuration['mapping'], 'source_datetime_format': self._configuration['source']['datetime_format'], 'target_datetime_format': self._configuration['target']['datetime_format'], - 'source_collision_handling': self._get(self._configuration['source'], 'collision_handling', 'BREAK'), - 'target_collision_handling': self._get(self._configuration['target'], 'collision_handling', 'BREAK')} + 'source_collision_handling': self._configuration['source'].get('collision_handling', 'BREAK'), + 'target_collision_handling': self._configuration['target'].get('collision_handling', 'BREAK')} - def _check_empty_identifiers(self, value, system): + def _resolve_identifier_fields(self, value, system): ''' If given list is empty, return all fields instead. Cannot be done in config, because it relies on the mapping, which is built in the sync-job ''' + if type(value) == str: + return [value] if len(value) != 0: return value elif system == 'join': @@ -390,12 +385,6 @@ class SynchronizationJob(AbstractJob): else: return self._configuration['mapping'][system].keys() - def _get(self, config, key, default_value): - if config.has_key(key): - return config[key] - else: - default_value - def _resolve_table_from_query(self, query): '''Resolves the table name from within the configured query''' if isinstance(query, str): return self._resolve_table_from_query_string(query) diff --git a/hshetl/connections.py b/hshetl/connections.py index 213e983d388ce9482def04a61c8decdde2cef4f9..93f503a19cf03f131a8dececee3412a3aec57aab 100644 --- a/hshetl/connections.py +++ b/hshetl/connections.py @@ -26,7 +26,6 @@ from exc import ConfigurationException from exc import UnknownStorageInterfaceException from exc import DuplicateNameException from exc import UnableToConnectException -from exc import InvalidCallOrderException from exc import ExtractorException import logging import re