diff --git a/hshetl/__init__.py b/hshetl/__init__.py index d7c43e00df9034f35dc9faadaafb4dfe6ca5a66c..70242da67a3e9cb23903c7d512b4bfbda4e1fb9e 100644 --- a/hshetl/__init__.py +++ b/hshetl/__init__.py @@ -289,7 +289,7 @@ class Dialect(csv.Dialect): :type delimiter: string :type doublequote: bool - :type escapechar: string or False + :type escapechar: mixed :type lineterminator: string :type quotechar: string :type skipinitialspace: bool @@ -314,9 +314,9 @@ class Dialect(csv.Dialect): yaml_tag = u'!dialect' def __init__(self, - delimiter=',', + delimiter = ',', doublequote = True, - escapechar = False, + escapechar = None, lineterminator = '\n', quotechar = '"', skipinitialspace = False, diff --git a/hshetl/connectors.py b/hshetl/connectors.py index d563a71cadc2e442ccab1e230e6a0fea4f68eb35..767ecb862a84472e8f414347569a8df8c34d5f8e 100644 --- a/hshetl/connectors.py +++ b/hshetl/connectors.py @@ -193,7 +193,7 @@ class LdapConnector(AbstractConnector): yaml_tag = u'!ldap' '''Use this tag inside your YAML configuration, to define this connector.''' - def __init__(self, uri, base = None, scope = 'subtree', page_size = False, start_tls = True, **kwargs): + def __init__(self, uri, base = None, scope = 'subtree', page_size = False, start_tls = True, options = {}, **kwargs): '''Initializes the LdapStorageInterface. Internally creates a storage_interface with the ldap module. @@ -218,11 +218,7 @@ class LdapConnector(AbstractConnector): self.security_level = 'none' else: self.security_level = 'tls' - self.options = {} - for k, v in kwargs.iteritems(): - if k in ('name', 'encoding'): continue - self.options[getattr(ldap, k)] = v - del kwargs[k] + self.options = options super(LdapConnector, self).__init__(uri = uri, **kwargs) def _connect(self): @@ -230,7 +226,7 @@ class LdapConnector(AbstractConnector): self.connection = ldap.initialize(self.uri) for option, value in self.options.iteritems(): logging.debug('Set option ' + repr(option) + ' = ' + repr(value)) - self.connection.set_option(option, value) + self.connection.set_option(getattr(ldap, option), value) if self.security_level == 'tls': logging.debug('Connecting with TLS') self.connection.start_tls_s() diff --git a/hshetl/entities.py b/hshetl/entities.py index 59f174e79b39227a74f2907c00e861d2f9771ec1..ed3627fd0010cdb31613d7c6182d9f2f6c250498 100644 --- a/hshetl/entities.py +++ b/hshetl/entities.py @@ -287,10 +287,11 @@ class PropertyConverter(object): encoding = self._detect_encoding(value) logging.debug('Detected raw value ' + repr(value) + 'with encoding: ' + encoding) if encoding not in ('Unknown', 'utf-8'): + logging.debug('decode... encode') raw_value = value.decode(encoding) return unicode(raw_value) elif encoding == 'utf-8': - return unicode(value) + return unicode(value, 'utf-8') else: raise ValueError('hshetl - Encoding trouble.') diff --git a/hshetl/extractors.py b/hshetl/extractors.py index 33a4e0fd9bc3a348dc865ac9db6b1e7e174defb9..0a7122516da2a3c070aa97affbdef629d7ff8bd8 100644 --- a/hshetl/extractors.py +++ b/hshetl/extractors.py @@ -204,7 +204,9 @@ class LdapExtractor(AbstractExtractor): if dn == None: continue row = {'dn': dn} for key, value in entry.items(): - if len(value) is 1: value = value[0].decode(self.connector.encoding) + logging.debug('%s %s' % (key, value)) + value = map(lambda v: unicode(v.decode(self.connector.encoding)), value) + if len(value) is 1: value = value[0] row[key] = value result.append(row) return result @@ -324,7 +326,7 @@ class CsvExtractor(AbstractExtractor): result = [] try: with self.connector as connection: - reader = csv.DictReader(connection, dialect=self.dialect_name) + reader = csv.DictReader(connection, dialect = self.dialect_name) for row in reader: for key, value in row.iteritems(): row[key] = value.decode(self.connector.encoding) diff --git a/hshetl/loaders.py b/hshetl/loaders.py index 325b20739a04b0ebcd4eec81408d771b4da9ce4e..bfb37807f33c3591ff6a75908ccc46f2462f65d1 100644 --- a/hshetl/loaders.py +++ b/hshetl/loaders.py @@ -119,7 +119,7 @@ class CsvLoader(AbstractLoader): YAML definition sample: .. code-block:: yaml - + !csvloader connector: myfile dialect: !dialect @@ -206,6 +206,7 @@ class SqlAlchemyLoader(AbstractLoader): '''Use this tag inside your YAML configuration, to define this loader.''' def __init__(self, table_name, **kwargs): + logging.debug('SqlAlchemyLoader constructor arguments: %s' % kwargs) super(SqlAlchemyLoader, self).__init__(**kwargs) self.table_name = table_name '''The name of the table where the records will be load.''' @@ -216,6 +217,7 @@ class SqlAlchemyLoader(AbstractLoader): def _execute(self, result): '''Executes the loading of data. Distinguishes between update, insert and delete''' + logging.debug('Will execute the following operations: %s' % self.operations) self.table = Table(self.table_name, MetaData(), autoload_with = self.connector.engine, @@ -224,9 +226,12 @@ class SqlAlchemyLoader(AbstractLoader): with self.connector as connection: for action in self.operations: getattr(self, '_' + action)(connection, getattr(result, action)) - logging.info(str(len(result.insert)) + ' data sets inserted, ' - + str(len(result.update)) + ' data sets updated, ' - + str(len(result.delete)) + ' data sets deleted.') + logging.info('%s data sets inserted (%s), %s data sets updated (%s) , %s data sets deleted (%s).' % (str(len(result.insert)), + 'Done!' if 'insert' in self.operations else 'Skipped!', + str(len(result.update)), + 'Done!' if 'update' in self.operations else 'Skipped!', + str(len(result.delete)), + 'Done!' if 'delete' in self.operations else 'Skipped!')) def _insert(self, connection, data): '''Creates a sql insert statement using sqlalchemy and executes it on the connection.'''