From e191182ce72fcd0c6241ba2e1f0bb8a094289a50 Mon Sep 17 00:00:00 2001 From: Dennis Ahrens <dennis.ahrens@hs-hannover.de> Date: Thu, 2 Oct 2014 16:02:10 +0200 Subject: [PATCH] [TASK] Better logging messages. For productive usage the logs should give more information when running in the background without using inspection. --- hshetl/entities.py | 15 +++++++++------ hshetl/extractors.py | 9 +++++++++ hshetl/jobs.py | 1 + hshetl/loaders.py | 15 ++++++++++++--- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/hshetl/entities.py b/hshetl/entities.py index 74f7678..e571d22 100644 --- a/hshetl/entities.py +++ b/hshetl/entities.py @@ -90,7 +90,7 @@ class Entity(AbstractRepository): logging.info('Load {} records into the entity {}.'.format(len(data), self.name)) for raw_record in data: self._load_record(raw_record, container) - logging.info('Load of {} records into the entity {} done with avarage load time.'.format(len(data), self.name)) + logging.info('Load of {} records into the entity {} done.'.format(len(data), self.name)) def _load_record(self, raw_record, container): '''Adds a record to the entity repository.''' @@ -226,7 +226,7 @@ class Container(list): msg = 'Collision in %s system detected! Aborting! %s found twice.' % (self.name, repr(record)) raise DuplicatedSystemIdException(msg) else: - msg = 'Collision in %s container detected! Ignoring collision. %s found twice and added to collision.' % (self.name, repr(record)) + msg = 'Collision in %s container detected! Ignoring collision.\n%s' % (self.name, record) logging.warn(msg) self.collisions.append(record) @@ -238,7 +238,7 @@ class Container(list): msg = 'Collision in %s system detected! Aborting! %s found twice.' % (self.name, repr(record)) raise DuplicatedJoinIdException(msg) else: - msg = 'Collision in %s container detected! Ignoring collision. %s found twice and added to collision.' % (self.name, repr(record)) + msg = 'Collision in %s container detected! Ignoring collision.\n%s' % (self.name, record) logging.warn(msg) self.join_collisions.append(record) @@ -498,10 +498,13 @@ class Record(object): return result def __unicode__(self): - o = 'Record:\n' + o = u'<Record:' + containers = [] for name, container in self.containers.iteritems(): - o += '{}: id: {} properties: {}\n'.format(name, str(self.get_container_identifier(container)), str(self.to_dict(container = container, remove_system_identifiers = False))) - return o + container_id = self.get_container_identifier(container) + if container_id != None: + containers.append(u'container: {}: id: {} properties: {}'.format(name, str(container_id), str(self.to_dict(container = container, remove_system_identifiers = False)))) + return o + u'\n'.join(containers) + u'>' class EntityRepository(AbstractRepository): diff --git a/hshetl/extractors.py b/hshetl/extractors.py index 287751f..a5bcdc0 100644 --- a/hshetl/extractors.py +++ b/hshetl/extractors.py @@ -85,6 +85,9 @@ class AbstractExtractor(object): '''Executes the extraction of data. Implementation specific.''' raise NotImplementedError + def __unicode__(self): + return u'<AbstractExtractor with connector: %s>' % self.connector.name + @yamlify class SqlAlchemyExtractor(AbstractExtractor): @@ -136,6 +139,9 @@ class SqlAlchemyExtractor(AbstractExtractor): return 'SELECT ' + query['select'] + ' FROM ' + query['from'] + ' WHERE ' + query['where'] raise ConfigurationException('The query must be a string or a dictionary containing an key sql that has a string value or a dict containing the keys select, from and where. Encountered {}'.format(type(query))) + def __unicode__(self): + return u'<SqlAlchemyExtractor with connector: %s, query: %s>' % (self.connector.name, self.query) + @yamlify class LdapExtractor(AbstractExtractor): @@ -276,6 +282,9 @@ class LdapExtractor(AbstractExtractor): else: raise ExtractorException('Warning: LdapServer ignores RFC 2696 control.') return raw_result + def __unicode__(self): + return u'<LdapExtractor with connector: %s, base: %s, scope: %s, filter: %s>' % (((self.connector.name,) + self._merge_connector_configuration())[:-1] + (self.ldap_filter,)) + @yamlify class CsvExtractor(AbstractExtractor): diff --git a/hshetl/jobs.py b/hshetl/jobs.py index dbead83..ae95ba1 100644 --- a/hshetl/jobs.py +++ b/hshetl/jobs.py @@ -671,6 +671,7 @@ class LoadJob(EntityJob, ConnectorJob): def _execute(self): '''Load data from the repository into a storage interface.''' result = Result(entity = self.entity, source = self.source) + logging.info(unicode(result)) self.loader.execute(result) diff --git a/hshetl/loaders.py b/hshetl/loaders.py index bc2450c..204d6b8 100644 --- a/hshetl/loaders.py +++ b/hshetl/loaders.py @@ -239,7 +239,7 @@ class SqlAlchemyLoader(AbstractLoader): for record in data: values.append(record.to_dict(remove_system_identifiers = False)) statement = self.table.insert() - logging.debug('Executing {} with {} '.format(str(statement), str(values))) + logging.info('Executing {} with {} '.format(str(statement), str(values))) connection.execute(statement, values) def _update(self, connection, data): @@ -247,7 +247,7 @@ class SqlAlchemyLoader(AbstractLoader): for record in data: statement = self.table.update(values = record.to_dict()) statement = self._add_where_constraints(statement, record) - logging.debug('executing sql update query: ' + str(statement)) + logging.info('Executing sql update query: ' + str(statement)) connection.execute(statement) @@ -256,7 +256,7 @@ class SqlAlchemyLoader(AbstractLoader): for record in data: statement = self.table.delete() statement = self._add_where_constraints(statement, record) - logging.debug('executing sql delete query: ' + str(statement)) + logging.info('Executing sql delete query: ' + str(statement)) connection.execute(statement) def _add_where_constraints(self, statement, record): @@ -330,6 +330,12 @@ class LdapLoader(AbstractLoader): with self.connector as connection: for action in self.operations: getattr(self, '_' + action)(connection, getattr(result, action)) + logging.info('%s data sets inserted (%s), %s data sets updated (%s) , %s data sets deleted (%s).' % (str(len(result.insert)), + 'Done!' if 'insert' in self.operations else 'Skipped!', + str(len(result.update)), + 'Done!' if 'update' in self.operations else 'Skipped!', + str(len(result.delete)), + 'Done!' if 'delete' in self.operations else 'Skipped!')) def _update(self, connection, data): '''Updates every dataset one after another.''' @@ -341,6 +347,7 @@ class LdapLoader(AbstractLoader): attributes = modlist.modifyModlist(old_entity_dict, entity_dict) try: connection.modify_s(dn, attributes) + logging.info('Modified dn {} with {} '.format(dn, str(attributes))) except Exception, e: logging.warn('Update failed for dn \'' + dn + '\': ' + str(e)) @@ -352,6 +359,7 @@ class LdapLoader(AbstractLoader): attributes = modlist.addModlist(entity_dict) try: connection.add_s(dn, attributes) + logging.info('Created dn {} with {} '.format(dn, str(attributes))) except Exception, e: logging.warn('Insert failed for dn \'' + dn + '\': ' + str(e)) @@ -361,6 +369,7 @@ class LdapLoader(AbstractLoader): dn = self._get_dn(record.get_container_identifier(), record.to_dict()) try: connection.delete_s(dn) + logging.info('Deleted dn {}'.format(dn)) except Exception, e: logging.warn('Delete failed for dn \'' + dn + '\': ' + str(e)) -- GitLab