diff --git a/hshetl/__init__.py b/hshetl/__init__.py index 70242da67a3e9cb23903c7d512b4bfbda4e1fb9e..73baee20d96d9737fa95a3e1837a46ea8802c7fc 100644 --- a/hshetl/__init__.py +++ b/hshetl/__init__.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- ''' [h]och[s]chule[h]annover[e]xtract[t]ranform[l]oad is designed to create, inspect, log and launch automated synchronizations between several types of data sources. @@ -11,7 +12,7 @@ import functools import inspect import csv from collections import OrderedDict -from exc import NotMatchingYAMLTagException, NotMatchingArgumentsException, ConfigurationException, UnknownNameReferenceException, DuplicateNameException, NameResolverException +from exc import NotMatchingYAMLTagException, NotMatchingArgumentsException, ConfigurationException, UnknownNameReferenceException, DuplicateNameException def yamlify(cls): diff --git a/hshetl/cli.py b/hshetl/cli.py index 091419da903e84dcc39a102ae6edac12c888780d..402963c6f0337ee69d205d8a5f7bbf3c88ae21e0 100644 --- a/hshetl/cli.py +++ b/hshetl/cli.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- '''This module is the command line interface of this application. It's responsibility is to parse the command line arguments and the YAML configuration. diff --git a/hshetl/connectors.py b/hshetl/connectors.py index 767ecb862a84472e8f414347569a8df8c34d5f8e..342c13047fc3a055faa9836b7f93f3c77bee02b0 100644 --- a/hshetl/connectors.py +++ b/hshetl/connectors.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- ''' This module handles the abstraction to different data sources. diff --git a/hshetl/entities.py b/hshetl/entities.py index ed3627fd0010cdb31613d7c6182d9f2f6c250498..5a5543774c678f5f5f09d85208a5daf53eb4e9e7 100644 --- a/hshetl/entities.py +++ b/hshetl/entities.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- '''This module handles the construction, merge and comparison of entities. The tool used entities to describe connected data that should be read and diff --git a/hshetl/exc.py b/hshetl/exc.py index af0ef6e972172bbf8d780b6b3dc425f03ce763b7..099f83a8b8caa7e16952fb3d856acfe0a781a1e4 100644 --- a/hshetl/exc.py +++ b/hshetl/exc.py @@ -1,4 +1,5 @@ -'''This module defines all special exceptions that were raised in this module.''' +# -*- coding: utf-8 -*- +'''This module defines all special exceptions that were raised in this package.''' class ConfigurationException(Exception): '''Raised if something is wrong with the given configuration.''' pass @@ -14,11 +15,8 @@ class NotMatchingYAMLTagException(Exception): pass -class NameResolverException(Exception): - pass - - class JobException(Exception): + '''Raised if something with the job itself is wrong on execution.''' pass @@ -57,35 +55,26 @@ class TransformerException(Exception): class UnknownPropertyException(Exception): + '''Raised if a property should be set, that is not known.''' pass class ContainerPropertiesException(Exception): + '''Raised if container is added to a record with different properties.''' pass class DuplicatedJoinIdException(Exception): + '''Raised if a record gets a join id that is already used by another record in the same entity.''' pass class DuplicatedSystemIdException(Exception): + '''Raised if a record gets a system id that is already used in another record in the same entity that belongs to this source.''' pass class DataTypeConversionException(Exception): + '''Raised if a value can not be converted to wanted type.''' pass - -class InvalidCallOrderException(Exception): - pass - - -class UnexpectedEnvironmentException(Exception): - pass - - -class EscapeDatetimeException(Exception): - pass - -class DocStringMissesTypeException(Exception): - pass diff --git a/hshetl/extractors.py b/hshetl/extractors.py index e7be613ada44ab724116816f0262ab02958a54a3..287751f05f3a0865a418fc74c0dd91cd03ab4514 100644 --- a/hshetl/extractors.py +++ b/hshetl/extractors.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- '''This module encapsulates the extraction of data. It completely depends on :doc:`connectors`, because a connector diff --git a/hshetl/loaders.py b/hshetl/loaders.py index bfb37807f33c3591ff6a75908ccc46f2462f65d1..bc2450c26780aa313896f3dc1cc2f349b3f2ac00 100644 --- a/hshetl/loaders.py +++ b/hshetl/loaders.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- '''This module encapsulates the loading of data into data sources. It completely depends on :doc:`connectors` and :doc:`entities`, @@ -24,7 +25,6 @@ A loader inside of a YAML job definition with more parameters: ''' from __future__ import print_function import logging -import codecs import csv from uuid import uuid4 as uuid from ldap import modlist @@ -153,7 +153,6 @@ class CsvLoader(AbstractLoader): logging.info('Writing header: ' + str(header)) with self.connector as connection: - # if self.connector.encoding: connection.write(codecs.BOM_UTF8) writer = csv.writer(connection, self.dialect_name) writer.writerow(header) for record in data: diff --git a/hshetl/test/functional/__init__.py b/hshetl/test/functional/__init__.py index 4ac3dac269c2e538a2967d76f744d6404da106e0..529a0defbf65006cfe9e0d7814a0f49f2d6bcb42 100644 --- a/hshetl/test/functional/__init__.py +++ b/hshetl/test/functional/__init__.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import unittest import sys import os diff --git a/hshetl/test/functional/test_copy.py b/hshetl/test/functional/test_copy.py index d1e8f10f58ad6834b77adc956cdabf09b3860235..b922c52e9fd184dd5a842f6d0723f7b5b5b48c86 100644 --- a/hshetl/test/functional/test_copy.py +++ b/hshetl/test/functional/test_copy.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from hshetl.test.functional import TestBaseClass from hshetl.cli import Controller import sys diff --git a/hshetl/test/functional/test_copy_faker2file.py b/hshetl/test/functional/test_copy_faker2file.py index 0392669e5588c254c09ff9a173f2b3f9c808a955..1ee80ec3a2d72ac147b96eaaa1c056a1b93915c3 100644 --- a/hshetl/test/functional/test_copy_faker2file.py +++ b/hshetl/test/functional/test_copy_faker2file.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from hshetl.test.functional import TestBaseClass from hshetl.cli import Controller import sys diff --git a/hshetl/test/functional/test_copy_file2file.py b/hshetl/test/functional/test_copy_file2file.py index 05e0a32a9b40384a4b85a0517df6c07034b69e55..ae7fdaf950f5d39c2343749de3363e46ac5b58de 100644 --- a/hshetl/test/functional/test_copy_file2file.py +++ b/hshetl/test/functional/test_copy_file2file.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from hshetl.test.functional import TestBaseClass from hshetl.cli import Controller import sys diff --git a/hshetl/test/functional/test_eetl_file2sqliteWithMultipleIdentifiers.py b/hshetl/test/functional/test_eetl_file2sqliteWithMultipleIdentifiers.py index 6a528af946d120a1beb1a02cf9b202fcc5f57e4c..929c99e7eaee2bbd2284c5c160affa608fe2507f 100644 --- a/hshetl/test/functional/test_eetl_file2sqliteWithMultipleIdentifiers.py +++ b/hshetl/test/functional/test_eetl_file2sqliteWithMultipleIdentifiers.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from hshetl.test.functional import TestBaseClass from hshetl.cli import Controller import sys diff --git a/hshetl/test/functional/test_query_sqlquery2sqlite.py b/hshetl/test/functional/test_query_sqlquery2sqlite.py index a3262a381110159827303194f5259472f41994d5..312c7ab0423a8096407df35ab207011dba010a80 100644 --- a/hshetl/test/functional/test_query_sqlquery2sqlite.py +++ b/hshetl/test/functional/test_query_sqlquery2sqlite.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from hshetl.test.functional import TestBaseClass from hshetl.cli import Controller import sys diff --git a/hshetl/test/functional/test_sync_file2sqlite.py b/hshetl/test/functional/test_sync_file2sqlite.py index 6efaa1618d5da66764432fc7e3d88037717f4cf7..8303de2b5644fcbe66dde62be4e4ccd0a5399893 100644 --- a/hshetl/test/functional/test_sync_file2sqlite.py +++ b/hshetl/test/functional/test_sync_file2sqlite.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from hshetl.test.functional import TestBaseClass from hshetl.cli import Controller import sys diff --git a/hshetl/test/unit/__init__.py b/hshetl/test/unit/__init__.py index ee0aaa35cf9705851b0af4defedf132d27fc5da7..f9c6e6e98e2cb622d0a01689f5ae1b40624e1e3e 100644 --- a/hshetl/test/unit/__init__.py +++ b/hshetl/test/unit/__init__.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# -*- coding: utf-8 -*- from mock import Mock, call from hshetl import connectors @@ -7,10 +7,6 @@ fixtures = { [{'Name': u'ipsum', 'Email': u'Suspendisse.aliquet.sem@aliquetmolestie.com', 'Tel': u'(0818) 98639043', 'Title': u'erat.'}, {'Name': u'velit', 'Email': u'Vivamus.rhoncus.Donec@molestiedapibus.com', 'Tel': u'(057) 44995396', u'Title': 'sagittis'}, {'Name': u'velit', 'Email': u'varius.et.euismod@ipsum.ca', 'Tel': u'(039668) 676480', 'Title': u'nunc'}], - 'extractor_test_extractions_expected_result_mapped': - [{'foo': 'ipsum', 'Email': 'Suspendisse.aliquet.sem@aliquetmolestie.com', 'bar': '(0818) 98639043', 'Title': 'erat.'}, - {'foo': 'velit', 'Email': 'Vivamus.rhoncus.Donec@molestiedapibus.com', 'bar': '(057) 44995396', 'Title': 'sagittis'}, - {'foo': 'velit', 'Email': 'varius.et.euismod@ipsum.ca', 'bar': '(039668) 676480', 'Title': 'nunc'}], 'extractor_sql_alchemy_test_extraction_keys': ['Name', 'Email', 'Tel', 'Title'], 'extractor_sql_alchemy_test_extraction_values': @@ -25,24 +21,6 @@ fixtures = { ['cn=erat,ou=foo,o=bar', {'dn': 'cn=erat,ou=foo,o=bar', 'Name': ['velit'], 'Email': ['varius.et.euismod@ipsum.ca'], 'Tel': ['(039668) 676480'], 'Title': ['nunc']}] ], - 'example_synchronization_configuration': - {'synchronization': - {'source': {'connection': 'example_mysql', 'identifier': 'id', 'mapping': {'uid': 'id'}, 'query': 'select * from tx_fhhpersonen_domain_model_person'}, - 'target': {'connection': 'example_file', 'identifier': 'id', 'mapping': {'id': 'id'}, 'query': {}}, - 'entity': {'join': 'username', 'properties': {'username': 'string', 'id': 'int', 'lastname': 'string', 'firstname': 'string', 'email': 'string', 'telephone': 'string'}}} - }, - 'example_userdata_dictionary_1': - [{'id': 1, 'username': 'userna', 'firstname': 'user', 'lastname': 'name', 'email': 'user.name@test.com'}, - {'id': 2, 'username': 'userfo', 'firstname': 'user', 'lastname': 'foo', 'email': 'user.foo@test.com'}, - {'id': 3, 'username': 'userba', 'firstname': 'user', 'lastname': 'bar', 'email': 'user.bar@test.com'}, - {'id': 4, 'username': 'userdu', 'firstname': 'user', 'lastname': 'dummy', 'email': 'user.dummy@test.com'}, - ], - 'example_userdata_dictionary_2': - [{'id': 42, 'username': 'userna', 'firstname': 'user', 'lastname': 'name', 'telephone': '0123/010101'}, - {'id': 1337, 'username': 'userfo', 'firstname': 'user', 'lastname': 'foo', 'telephone': '0123/020202'}, - {'id': 815, 'username': 'userba', 'firstname': 'user', 'lastname': 'bar', 'telephone': '0123/030303'}, - {'id': 666, 'username': 'userdu', 'firstname': 'user', 'lastname': 'dummy', 'telephone': '0123/040404'}, - ], 'loader_csv_sample_file_content': 'Title,Tel,Name,Email\nerat.,123,ipsum,Suspendisse.aliquet.sem@aliquetmolestie.com\nsagittis,1234,velit,Vivamus.rhoncus.Donec@molestiedapibus.com\nnunc,5678,velit,varius.et.euismod@ipsum.ca\n''', 'loader_test_loading': [{'Name': u'ipsum', 'Email': u'Suspendisse.aliquet.sem@aliquetmolestie.com', 'Tel': 123, 'Title': u'erat.'}, @@ -56,125 +34,4 @@ fixtures = { [{'sn' : 'ipsum', 'cn' : 'erat', 'mail' : 'Suspendisse.aliquet.sem@aliquetmolestie.com', 'telephoneNumber' : '2456'}, {'sn' : 'velit', 'cn' : 'sagittis', 'mail' : 'Vivamus.rhoncus.Donec@molestiedapibus.co.uk', 'telephoneNumber' : '1234'}, {'sn' : 'velitt', 'cn' : 'foo', 'mail' : 'varius.et.euismod@ipsum.ca', 'telephoneNumber' : '5678'}], - 'config_non_valid_yaml': ''' -logging: - severity: DEBUG - file: Foo -''', - 'config_sample_yaml': ''' -logging: - severity: DEBUG - -connection: - localOracle: - uri: 'oracle://hshinfo:oracle@localhost:1521/xe' - localMySQL: - uri: 'mysql://root:root@localhost:3306/t3_fhh' - eDirectory: - uri: 'ldap://ldap.rz.fh-hannover.de' - extract: - base: 'ou=foo,o=bar' - scope: onelevel - sampleFile: - uri: 'file:///home/ahrensde/Work/fhhInfo.xml' - extract: - type: csv - load: - type: csv - -jobs: - - - type: synchronization - entity: - join: 'username' - properties: - id: 'string' - username: 'string' - name: 'string' - forename: 'string' - source: - connection: localOracle - query: 'select * from PERSON where rownum' - identifier: 'id' # This property represents these objects in the system, e.g. for relation mapping in the system - mapping: - id: 'identifier' - target: - connection: localMySQL - query: - from: 'tx_fhhpersonen_domain_model_person' - select: '*' - where: 'foo = ´bar´' - sql: 'select * from tx_fhhpersonen_domain_model_person' - identifier: 'uid' - mapping: - uid: 'identifier' -''', - 'minimal_valid_config_parsed': { - 'connection': { - 'from_source': { - 'uri': 'mysql://foo:bar@localhost:3306/foobar' - }, - 'into_source': { - 'uri': 'sqlite:///:memory:' - }, - }, - 'synchronization': { - 'entity': { - 'join': 'foo', - 'properties': { - 'bar': 'string' - } - }, - 'source': { - 'connection': 'from_source', - 'identifier': 'id' - }, - 'target': { - 'connection': 'into_source', - 'identifier': 'num' - } - } - }, - 'transform_job_config': { - 'description': 'fake transformation', - 'interactive': False, - 'type': 'transformation', - 'execute': ['insert', 'update', 'delete'], - 'entity': { - 'join': ['foo'], - 'properties': { - 'foo': 'string', - 'bar': 'int', - 'batz': 'bool'}}, - 'source': { - 'connection': 'from_source', - 'datetime_format': { - 'default_datetime_format': '%d.%m.%Y' - }, - 'identifier': 'foo', - 'query': 'SELECT * FROM foo', - 'mapping': {}, - 'transform_buffer_table': None}, - 'target': { - 'connection': 'into_source', - 'datetime_format': { - 'default_datetime_format': '%d.%m.%Y' - }, - 'identifier': 'bar', - 'query': 'SELECT * FROM foo', - 'mapping': {}, - 'transform_buffer_table': None}, - 'preparation': [ - {'type': 'sqlquery', - 'description': 'fake sql query job', - 'sql': 'SELECT * FROM foo'}], - 'preprocessing': [ - {'type': 'plsqlquery', - 'description': 'fake plsql query job', - 'sql': 'DROP TABLE foo'}], - 'postprocessing': [ - {'type': 'bash', - 'description': 'fake bash job', - 'command': 'ls /'}] - } } diff --git a/hshetl/test/unit/test_cli.py b/hshetl/test/unit/test_cli.py index 69b5d814d73d324187339a18b8b4fbb19271e36e..705a38ed1f3bf965e566d6a34c42370c56d9efc4 100644 --- a/hshetl/test/unit/test_cli.py +++ b/hshetl/test/unit/test_cli.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import unittest import sys import os diff --git a/hshetl/test/unit/test_connectors.py b/hshetl/test/unit/test_connectors.py index 5626f7eb2271336558e14d2045ea3ecb4e395b58..e57db3aabd621c9097ae7a1f02690f6cc74ed159 100644 --- a/hshetl/test/unit/test_connectors.py +++ b/hshetl/test/unit/test_connectors.py @@ -1,12 +1,9 @@ +# -*- coding: utf-8 -*- import unittest -from mock import Mock, call -from hshetl import connectors, extractors, loaders, entities -from hshetl.test import unit as test -import fake_filesystem +from mock import Mock +from hshetl import connectors import ldap import sqlalchemy -from sqlalchemy.engine import base as sqlalchemybase -import logging class TestManager(unittest.TestCase): diff --git a/hshetl/test/unit/test_entities.py b/hshetl/test/unit/test_entities.py index 8616235c7c8c1ae17b3f032bc20d3638c9afdaeb..4687bd156f5a3a8b01a51c15abfc9fe5bb975441 100644 --- a/hshetl/test/unit/test_entities.py +++ b/hshetl/test/unit/test_entities.py @@ -1,6 +1,5 @@ -# coding=utf-8 +# -*- coding: utf-8 -*- import unittest -import datetime from mock import Mock, call from hshetl.entities import PropertyConverter, Entity, Container, Record, COLLISION_HANDLING_BREAKNEVER, COLLISION_HANDLING_BREAKALL, COLLISION_HANDLING_BREAKJOIN, COLLISION_HANDLING_BREAKCONTAINER from hshetl.exc import UnknownNameReferenceException, ConfigurationException, DuplicatedSystemIdException, DuplicatedJoinIdException, ContainerPropertiesException, DataTypeConversionException diff --git a/hshetl/test/unit/test_extractors.py b/hshetl/test/unit/test_extractors.py index a0de53b112611677e98c35ca2e7cd594b47bcff3..86b60f9f19144a9ba38358c723006ac3df8f3703 100644 --- a/hshetl/test/unit/test_extractors.py +++ b/hshetl/test/unit/test_extractors.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import unittest import os from mock import Mock diff --git a/hshetl/test/unit/test_jobs.py b/hshetl/test/unit/test_jobs.py index bed47a6871ea5bcec77d1b51a0c40e5e343e831e..9b907505ee48de2579f6c552d52578afbbcf51e9 100644 --- a/hshetl/test/unit/test_jobs.py +++ b/hshetl/test/unit/test_jobs.py @@ -1,10 +1,10 @@ +# -*- coding: utf-8 -*- import unittest import fake_filesystem from mock import Mock, patch -from hshetl import entities, connectors, transformers -from hshetl.exc import ConfigurationException, UnknownNameReferenceException, JobException, \ - NotMatchingArgumentsException -from hshetl.jobs import Job, JobList, EntityJob, ConnectorJob, BashJob, QueryJob, ExtractionJob, TransformerJob, LoadJob, SqlQueryJob, PlSqlQueryJob +from hshetl import entities, connectors +from hshetl.exc import ConfigurationException, UnknownNameReferenceException, JobException +from hshetl.jobs import Job, JobList, EntityJob, ConnectorJob, BashJob, QueryJob, ExtractionJob, SqlQueryJob, PlSqlQueryJob from hshetl.extractors import AbstractExtractor class TestJobs(unittest.TestCase): diff --git a/hshetl/test/unit/test_loaders.py b/hshetl/test/unit/test_loaders.py index 1ef30af1b3d3a3deff1f54e5f9ae46e6f2f5b81f..c63cc4dccb98137dc389240199c29e72aabed686 100644 --- a/hshetl/test/unit/test_loaders.py +++ b/hshetl/test/unit/test_loaders.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import unittest from mock import Mock, call, patch from hshetl import loaders, connectors, entities diff --git a/hshetl/transformers.py b/hshetl/transformers.py index bdd7b5c6c26bedfe2165a3f3d5198f0764444767..e1228404902dec66001e4ecb821b9bb960e68b7a 100644 --- a/hshetl/transformers.py +++ b/hshetl/transformers.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- ''' Transformers handle operations on the result of :doc:`extractors` or other transformers - e.g. the comparision of two data sources is a transform operation.