diff --git a/docs/examples/copy_faker2file/config.yml b/docs/examples/copy_faker2file/config.yml
index 76a95ebfc20ce2fdadbe3599fc1f1fd53530156a..9efefb39683a725d751e547bb9b94d5257746e25 100644
--- a/docs/examples/copy_faker2file/config.yml
+++ b/docs/examples/copy_faker2file/config.yml
@@ -19,3 +19,6 @@ jobs:
connector: !file
mode: w+
uri: 'file://./fakeperson.csv'
+ dialect: !dialect
+ lineterminator: "\n"
+ quoting: 0
\ No newline at end of file
diff --git a/docs/examples/copy_file2file/config.yml b/docs/examples/copy_file2file/config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..335f9b634d0dc1d57f8322a59b8e96df47ae5d8d
--- /dev/null
+++ b/docs/examples/copy_file2file/config.yml
@@ -0,0 +1,40 @@
+---
+connectors:
+ -
+ !file
+ name: inputFile
+ uri: 'file://./data/input.csv'
+ encoding: 'utf-8'
+ -
+ !file
+ name: outputFile
+ uri: 'file://./output.csv'
+ mode: w+
+---
+entities:
+ -
+ !entity
+ name: person
+ properties:
+ surname: 'string'
+ name: 'string'
+ md5: 'string'
+---
+jobs:
+ -
+ !copy
+ entity: person
+ source:
+ extractor: !csvextractor
+ connector: inputFile
+ dialect: !dialect
+ delimiter: ':'
+ lineterminator: "\n"
+ quoting: 1
+ target:
+ loader: !csvloader
+ connector: outputFile
+ dialect: !dialect
+ delimiter: ','
+ lineterminator: "\n"
+ quoting: 0
\ No newline at end of file
diff --git a/docs/examples/copy_file2file/data/expected_output.csv b/docs/examples/copy_file2file/data/expected_output.csv
new file mode 100644
index 0000000000000000000000000000000000000000..da8d62cbd9c29ff9515ba070be0f129c333ed7f4
--- /dev/null
+++ b/docs/examples/copy_file2file/data/expected_output.csv
@@ -0,0 +1,101 @@
+surname,name,md5
+Corkery,Jack,a58bdfbcbcffac8dc6874d0b9b12332c
+Purdy,Moses,f4b65103b4348e0edcf29e1cc89b80d4
+Schmidt,Kacey,000534c2af80dcbfc86a3a6ce4114bd4
+Kling,Alyson,fa70e25665a1dcbbffdf15210e7f7919
+West,Etha,eb205d543e087790aef4f6c67dbd3c41
+Runte,Connie,22de2c6a03a850c1114b7abac1b58e88
+Dickens,Fae,49d67e6fbbe037d2eb872b00f948a243
+Windler,Cortez,5b5fc22d532afbf5666b1aa2e06e24a0
+Lueilwitz,Stephanie,278aba733fd2538d249af23f491e1408
+Crooks,Lyla,34df319ae5078c623e946331efd39613
+Abshire,Golden,e1d1c4aa6493f4e03a7c3da441607e2b
+Mann,Clay,383dae9f9e84e22c08bf1876ec2d5f34
+Volkman,Dave,d8f9bff13180516d5c855f81d1aee1a6
+Nienow,Sterling,d769d497f2dddbda623541d2654e22c3
+Lubowitz,Vivien,5954f5a39463a1537992e18c434dff6d
+Raynor,Opal,87237b65aed44f8923a0f88a1cbf7293
+Green,Lamont,929a7d0a4bd4d8a0cf3333f982df6913
+Heaney,Ruby,46e54e2808b2703d3b7fef4f2b32ef35
+Kshlerin,Noemy,05d7bccb695d34c32ca1d6c57248f452
+Toy,Maci,24b0ebc4f2211b46584f88805f2d73b0
+Stamm,Sylvia,63b8ec77a11b1fdef2ebad03390d0449
+Runolfsson,Jonathan,9711e4233a5ec0829f18565518eec660
+McCullough,Caitlyn,d5426f6bf7d631c7c6878cd291dbcd00
+Stoltenberg,Casimir,8bcb701ccfbba3879755842dddd20713
+Purdy,Bo,7b0c94073b15ef36a8f484b8a7018eeb
+Mertz,Dahlia,e28a885e766a45d33cad2651e33c7c9c
+Mraz,Mac,2b4b5ee8ef339da174400ae03964f596
+Mante,Estella,dd10d779fb9f4b87e5c7a4f44cce3a3c
+Buckridge,Clay,9c88404db46c4783ee2a7ff886acc2e1
+Reichel,Corene,327f72f579a4bcaf0c7fe497f8547505
+Weber,Gregoria,742df07ecd1093e0857d5198cec56116
+Mosciski,Magdalen,2dea2861226eb7db026d5b9168958abc
+Littel,May,f5b5965cd5f502bfd1069a676c2fb315
+Schmeler,Savion,69683238030d211737dfe5ba68619081
+Donnelly,Delpha,ddabbe379f9e7f8f70da8777415c39f8
+Connelly,Lucio,9426741c677091d3f3d0938465121eea
+Bergstrom,Tyshawn,4b04f7196b71ff9d71a8953c1fee33a1
+Lind,Jaren,ee400c603fd1b8bcdac2d4c4f777b040
+Klein,Clementine,79904e1168fa4949d080862b61da45ec
+Fritsch,Burnice,93270a9e12d5ab4cf377c4aacf399ff4
+Jenkins,Tom,6282403a66445bc9794cec6c6e97ddb4
+Wuckert,Gregorio,42b9b3806dd77f1404cbab764fd7e1dc
+Quitzon,Clarissa,2cf487a510eab609dd7b09583a080582
+Hudson,Donna,0ee2034f98c05dfa6097ec557696e0a1
+Gutmann,Trent,4d4f6ae782a04b6f46761230e7602215
+Luettgen,Darion,b8a1e21cc37eb6fc756d4c6993a3a321
+Block,Eliane,f6cd4c2a7532f01e111cac76e23534e8
+Muller,Rozella,8d12d10037e26a135d879c567a176448
+Herman,Margie,5a2efccfee605d2a479cee9fe58ec099
+Rempel,Christina,8435faf7c096742ca19283c7a0bcf093
+Bahringer,Lia,e4e04a5fd33bf1cbe4d81abd7d0b5303
+Schmidt,Bud,fe29989f5b9bbd0c3eee17f298c3db47
+Abernathy,Murray,a8b2f7e7325d498b0701c153320d52ac
+Labadie,Chance,763279919c64c3c184d616dbbe636dbc
+Kihn,Craig,984ab5d01a96e86b832d0816d5ae6b9c
+Spinka,Katherine,f022834ace9100a2ea6e80ee15c3c078
+Muller,Loraine,edc2fab54280754aa617ebcfd7623658
+McDermott,Krystina,fc8727640a55a38ae0aec5a5d12405ea
+Gusikowski,Sally,75ba9c800724ca29fb354b05e1b1821c
+Ernser,Glenda,11835f321233f1057e95094b6b571183
+Quigley,Sabrina,a4fa9f325ddffc7ba9754cd2cc43ad6e
+Beahan,Ila,f9925e696406e07a66c6db00db4465ba
+Crist,Bennett,baea14e62ac3738eebe57d9d6ffdf648
+Muller,Austin,aab3f778e052623d5368a361b10a027a
+Wintheiser,Brooklyn,9f4fd24c761d091aa2ec6b3d8cc964e1
+McCullough,Cecil,1efdbe65338d162a129f335ac9cb7ca7
+Rippin,Genesis,5102f849dd69094b1ce898e1c1d2caef
+Braun,Robb,d58ed4a07e4d0f9c7f1e85346e375653
+Buckridge,Taylor,298088519c3a144ac632ac0a4f96f5ed
+Volkman,Earline,1a294690883cc30b31ef8c490cf96eb3
+Satterfield,Faustino,685151fc6da4a0e42b622075783d189a
+Ankunding,Jody,c50e4a093af50c2c3bdc5a021e42580a
+Cassin,London,0031960bcb021f9802f4b977afb9de36
+Krajcik,Johnathon,11d5a432eafdcf3d864a614b101a5362
+Beahan,Stevie,af6c944f3beab59524c9a573a7ab8c07
+McGlynn,Aletha,e40055946d8b214fe43f691292be5212
+Windler,Obie,0613db2754bf924189f23d7c574bdef1
+Batz,Kody,f4d9decf44266c5302f67ff6bdc63cb4
+Yost,Adella,408b3f76b9b7dfc9d11ee406d9387e8e
+Thompson,Blanche,2b6af6358ba1bc3a8641f374319e0d90
+Haag,Shannon,05372e8ae82a0d4c787738d1ac6772db
+Watsica,Claudie,f47b376d7935c11be5f9ab7be80f5141
+Heaney,Nya,54bfdcd6cd4f17ec69b59481b58e675c
+Weber,Mervin,6e346e7558dbe83c3af835b6ce83a0cc
+Haley,Conner,19cbbdb21e61eb5550d1cc74801645a1
+O'Reilly,Imogene,e9690b84774251c14014f4aa390d48fe
+Armstrong,Sylvan,459631981a96ff6cd490c78fb951e718
+Roberts,Rosalinda,e8bf489c2f5fe285de626c024f156553
+Lindgren,Wellington,0348bbd0f013b2aed90106c431655c45
+Kunde,Hermann,ca59c4ee04587702bd8496bf6da9f8be
+Dietrich,Chesley,ee36f44db1abfc8a14aa220c93f6053d
+Emmerich,Eldred,9743ca1cdd40b5697f3f0f52b91c8ec7
+White,Devan,c0f62e869f6d4e26a61542bdeea9204c
+Gleason,Russell,6e001b6dd1c043b172048114776a06a7
+Olson,Augusta,a2a7aca03a83df3144ea6ea553af0eef
+Dach,Lewis,712cba7f187e5e5b8811519b9c7422f5
+Rutherford,Josh,de66c30e84e202292b5015d65b3a192a
+Bahringer,Roel,4437b8896a7e0aa9b240544ccf0116a5
+Kunze,Xzavier,592783b0b3aa7bb7daece121c1d18d66
+Hermann,Tod,fd452a8ff12cbe2919925b4100625717
diff --git a/docs/examples/copy_file2file/data/input.csv b/docs/examples/copy_file2file/data/input.csv
new file mode 100644
index 0000000000000000000000000000000000000000..690cfecb2d33ef118d50147f30487a24593513d8
--- /dev/null
+++ b/docs/examples/copy_file2file/data/input.csv
@@ -0,0 +1,101 @@
+"surname":"name":"md5"
+"Corkery":"Jack":"a58bdfbcbcffac8dc6874d0b9b12332c"
+"Purdy":"Moses":"f4b65103b4348e0edcf29e1cc89b80d4"
+"Schmidt":"Kacey":"000534c2af80dcbfc86a3a6ce4114bd4"
+"Kling":"Alyson":"fa70e25665a1dcbbffdf15210e7f7919"
+"West":"Etha":"eb205d543e087790aef4f6c67dbd3c41"
+"Runte":"Connie":"22de2c6a03a850c1114b7abac1b58e88"
+"Dickens":"Fae":"49d67e6fbbe037d2eb872b00f948a243"
+"Windler":"Cortez":"5b5fc22d532afbf5666b1aa2e06e24a0"
+"Lueilwitz":"Stephanie":"278aba733fd2538d249af23f491e1408"
+"Crooks":"Lyla":"34df319ae5078c623e946331efd39613"
+"Abshire":"Golden":"e1d1c4aa6493f4e03a7c3da441607e2b"
+"Mann":"Clay":"383dae9f9e84e22c08bf1876ec2d5f34"
+"Volkman":"Dave":"d8f9bff13180516d5c855f81d1aee1a6"
+"Nienow":"Sterling":"d769d497f2dddbda623541d2654e22c3"
+"Lubowitz":"Vivien":"5954f5a39463a1537992e18c434dff6d"
+"Raynor":"Opal":"87237b65aed44f8923a0f88a1cbf7293"
+"Green":"Lamont":"929a7d0a4bd4d8a0cf3333f982df6913"
+"Heaney":"Ruby":"46e54e2808b2703d3b7fef4f2b32ef35"
+"Kshlerin":"Noemy":"05d7bccb695d34c32ca1d6c57248f452"
+"Toy":"Maci":"24b0ebc4f2211b46584f88805f2d73b0"
+"Stamm":"Sylvia":"63b8ec77a11b1fdef2ebad03390d0449"
+"Runolfsson":"Jonathan":"9711e4233a5ec0829f18565518eec660"
+"McCullough":"Caitlyn":"d5426f6bf7d631c7c6878cd291dbcd00"
+"Stoltenberg":"Casimir":"8bcb701ccfbba3879755842dddd20713"
+"Purdy":"Bo":"7b0c94073b15ef36a8f484b8a7018eeb"
+"Mertz":"Dahlia":"e28a885e766a45d33cad2651e33c7c9c"
+"Mraz":"Mac":"2b4b5ee8ef339da174400ae03964f596"
+"Mante":"Estella":"dd10d779fb9f4b87e5c7a4f44cce3a3c"
+"Buckridge":"Clay":"9c88404db46c4783ee2a7ff886acc2e1"
+"Reichel":"Corene":"327f72f579a4bcaf0c7fe497f8547505"
+"Weber":"Gregoria":"742df07ecd1093e0857d5198cec56116"
+"Mosciski":"Magdalen":"2dea2861226eb7db026d5b9168958abc"
+"Littel":"May":"f5b5965cd5f502bfd1069a676c2fb315"
+"Schmeler":"Savion":"69683238030d211737dfe5ba68619081"
+"Donnelly":"Delpha":"ddabbe379f9e7f8f70da8777415c39f8"
+"Connelly":"Lucio":"9426741c677091d3f3d0938465121eea"
+"Bergstrom":"Tyshawn":"4b04f7196b71ff9d71a8953c1fee33a1"
+"Lind":"Jaren":"ee400c603fd1b8bcdac2d4c4f777b040"
+"Klein":"Clementine":"79904e1168fa4949d080862b61da45ec"
+"Fritsch":"Burnice":"93270a9e12d5ab4cf377c4aacf399ff4"
+"Jenkins":"Tom":"6282403a66445bc9794cec6c6e97ddb4"
+"Wuckert":"Gregorio":"42b9b3806dd77f1404cbab764fd7e1dc"
+"Quitzon":"Clarissa":"2cf487a510eab609dd7b09583a080582"
+"Hudson":"Donna":"0ee2034f98c05dfa6097ec557696e0a1"
+"Gutmann":"Trent":"4d4f6ae782a04b6f46761230e7602215"
+"Luettgen":"Darion":"b8a1e21cc37eb6fc756d4c6993a3a321"
+"Block":"Eliane":"f6cd4c2a7532f01e111cac76e23534e8"
+"Muller":"Rozella":"8d12d10037e26a135d879c567a176448"
+"Herman":"Margie":"5a2efccfee605d2a479cee9fe58ec099"
+"Rempel":"Christina":"8435faf7c096742ca19283c7a0bcf093"
+"Bahringer":"Lia":"e4e04a5fd33bf1cbe4d81abd7d0b5303"
+"Schmidt":"Bud":"fe29989f5b9bbd0c3eee17f298c3db47"
+"Abernathy":"Murray":"a8b2f7e7325d498b0701c153320d52ac"
+"Labadie":"Chance":"763279919c64c3c184d616dbbe636dbc"
+"Kihn":"Craig":"984ab5d01a96e86b832d0816d5ae6b9c"
+"Spinka":"Katherine":"f022834ace9100a2ea6e80ee15c3c078"
+"Muller":"Loraine":"edc2fab54280754aa617ebcfd7623658"
+"McDermott":"Krystina":"fc8727640a55a38ae0aec5a5d12405ea"
+"Gusikowski":"Sally":"75ba9c800724ca29fb354b05e1b1821c"
+"Ernser":"Glenda":"11835f321233f1057e95094b6b571183"
+"Quigley":"Sabrina":"a4fa9f325ddffc7ba9754cd2cc43ad6e"
+"Beahan":"Ila":"f9925e696406e07a66c6db00db4465ba"
+"Crist":"Bennett":"baea14e62ac3738eebe57d9d6ffdf648"
+"Muller":"Austin":"aab3f778e052623d5368a361b10a027a"
+"Wintheiser":"Brooklyn":"9f4fd24c761d091aa2ec6b3d8cc964e1"
+"McCullough":"Cecil":"1efdbe65338d162a129f335ac9cb7ca7"
+"Rippin":"Genesis":"5102f849dd69094b1ce898e1c1d2caef"
+"Braun":"Robb":"d58ed4a07e4d0f9c7f1e85346e375653"
+"Buckridge":"Taylor":"298088519c3a144ac632ac0a4f96f5ed"
+"Volkman":"Earline":"1a294690883cc30b31ef8c490cf96eb3"
+"Satterfield":"Faustino":"685151fc6da4a0e42b622075783d189a"
+"Ankunding":"Jody":"c50e4a093af50c2c3bdc5a021e42580a"
+"Cassin":"London":"0031960bcb021f9802f4b977afb9de36"
+"Krajcik":"Johnathon":"11d5a432eafdcf3d864a614b101a5362"
+"Beahan":"Stevie":"af6c944f3beab59524c9a573a7ab8c07"
+"McGlynn":"Aletha":"e40055946d8b214fe43f691292be5212"
+"Windler":"Obie":"0613db2754bf924189f23d7c574bdef1"
+"Batz":"Kody":"f4d9decf44266c5302f67ff6bdc63cb4"
+"Yost":"Adella":"408b3f76b9b7dfc9d11ee406d9387e8e"
+"Thompson":"Blanche":"2b6af6358ba1bc3a8641f374319e0d90"
+"Haag":"Shannon":"05372e8ae82a0d4c787738d1ac6772db"
+"Watsica":"Claudie":"f47b376d7935c11be5f9ab7be80f5141"
+"Heaney":"Nya":"54bfdcd6cd4f17ec69b59481b58e675c"
+"Weber":"Mervin":"6e346e7558dbe83c3af835b6ce83a0cc"
+"Haley":"Conner":"19cbbdb21e61eb5550d1cc74801645a1"
+"O'Reilly":"Imogene":"e9690b84774251c14014f4aa390d48fe"
+"Armstrong":"Sylvan":"459631981a96ff6cd490c78fb951e718"
+"Roberts":"Rosalinda":"e8bf489c2f5fe285de626c024f156553"
+"Lindgren":"Wellington":"0348bbd0f013b2aed90106c431655c45"
+"Kunde":"Hermann":"ca59c4ee04587702bd8496bf6da9f8be"
+"Dietrich":"Chesley":"ee36f44db1abfc8a14aa220c93f6053d"
+"Emmerich":"Eldred":"9743ca1cdd40b5697f3f0f52b91c8ec7"
+"White":"Devan":"c0f62e869f6d4e26a61542bdeea9204c"
+"Gleason":"Russell":"6e001b6dd1c043b172048114776a06a7"
+"Olson":"Augusta":"a2a7aca03a83df3144ea6ea553af0eef"
+"Dach":"Lewis":"712cba7f187e5e5b8811519b9c7422f5"
+"Rutherford":"Josh":"de66c30e84e202292b5015d65b3a192a"
+"Bahringer":"Roel":"4437b8896a7e0aa9b240544ccf0116a5"
+"Kunze":"Xzavier":"592783b0b3aa7bb7daece121c1d18d66"
+"Hermann":"Tod":"fd452a8ff12cbe2919925b4100625717"
diff --git a/docs/examples/copy_sqlite2file/config.yml b/docs/examples/copy_sqlite2file/config.yml
index bd1537a304c9337127b1e313dae3a89f48fd1f84..187183026b99479b8f98f39713767a16f496957e 100644
--- a/docs/examples/copy_sqlite2file/config.yml
+++ b/docs/examples/copy_sqlite2file/config.yml
@@ -17,3 +17,7 @@ jobs:
connector: !file
mode: w+
uri: 'file://./person.csv'
+ dialect: !dialect
+ delimiter: ','
+ lineterminator: "\n"
+ quoting: 0
diff --git a/docs/source/index.rst b/docs/source/index.rst
index c2845da8326b52b2293ee6f956025b5bab3504fb..7b9f9f11b388b52b4d0593ad626f65c67a5c6462 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -20,5 +20,6 @@ hshetl documentation
extractors
transformers
loaders
+ utilities
develop
diagramms
diff --git a/docs/source/utilities.rst b/docs/source/utilities.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f81e067649596b78a258af8645c1584502425be7
--- /dev/null
+++ b/docs/source/utilities.rst
@@ -0,0 +1,4 @@
+utilities
+=========
+
+.. autoclass:: hshetl.Dialect
\ No newline at end of file
diff --git a/hshetl/__init__.py b/hshetl/__init__.py
index 1da01490c1b2eb523d489bd96504fd832de39871..c37e00465eb7438057a614c44deb0ea1a5d743d5 100644
--- a/hshetl/__init__.py
+++ b/hshetl/__init__.py
@@ -9,6 +9,7 @@ import yaml
import logging
import functools
import inspect
+import csv
from collections import OrderedDict
from exc import NotMatchingYAMLTagException, NotMatchingArgumentsException, ConfigurationException, UnknownNameReferenceException, DuplicateNameException, NameResolverException
@@ -243,6 +244,63 @@ class AbstractRepository(OrderedDict, object):
return 'AbtractRepository: ' + super(AbstractRepository, self).__repr__()
+@yamlify
+class Dialect(csv.Dialect):
+ '''A Dialect class for csv dialects
+
+ .. seealso::
+ http://docs.python.org/2/library/csv.html#dialects-and-formatting-parameters
+
+ :param str delimiter:
+ Default ','
+ :param boolean doublequote:
+ Default True
+ :param str/False escapechar:
+ Default None
+ :param str lineterminator:
+ Default ``\\n``
+ :param str quotechar:
+ Default '"'
+ :param boolean skipinitialspace:
+ Default False
+ :param int quoting:
+ Default 0
+
+ Example yaml configuration:
+
+ .. code-block:: yaml
+
+ loader: !csvloader
+ connector: outputFile
+ dialect: !dialect
+ delimiter: ','
+ lineterminator: "\\n"
+ quoting: 0
+
+ .. note::
+ Escape sequences must be given in double tics
+
+ '''
+
+ yaml_tag = u'!dialect'
+
+ def __init__(self,
+ delimiter=',',
+ doublequote = True,
+ escapechar = None,
+ lineterminator = '\n',
+ quotechar = '"',
+ skipinitialspace = False,
+ quoting = 0):
+ self.delimiter = delimiter
+ self.doublequote = doublequote
+ self.escapechar = escapechar
+ self.lineterminator = str(lineterminator)
+ self.quotechar = quotechar
+ self.skipinitialspace = skipinitialspace
+ self.quoting = quoting
+
+
import entities
from entities import *
import connectors
diff --git a/hshetl/extractors.py b/hshetl/extractors.py
index 43d5458bb82e3fd468298bbb4e52e64e989be112..bffc6e5fc0ad2246ee252789b5f8762b7272b5e6 100644
--- a/hshetl/extractors.py
+++ b/hshetl/extractors.py
@@ -23,8 +23,9 @@ An extractor inside of a YAML job definition with more parameters:
'''
import logging
import csv
+from uuid import uuid4 as uuid
import ldap
-from hshetl import yamlify, NameResolver
+from hshetl import yamlify, NameResolver, Dialect
from faker import Factory
from exc import ExtractorException, ConfigurationException
from ldap.controls import SimplePagedResultsControl
@@ -271,8 +272,8 @@ class CsvExtractor(AbstractExtractor):
Construction:
- :param str delimiter:
- The delimiter used in the csv.
+ :param hshetl.Dialect dialect:
+ The CSV dialect that ill be used for CSV style.
:param dict ``**kwargs``:
Accepts parameters from :class:`.AbstractExtractor`.
@@ -282,22 +283,24 @@ class CsvExtractor(AbstractExtractor):
!csvextractor
connector: myfile
- delimiter: ','
+ dialect: !dialect
+ lineterminator: "\\n"
+ quoting: 0
'''
yaml_tag = u'!csvextractor'
'''Use this tag inside your YAML configuration, to define this extractor.'''
- def __init__(self, delimiter = ',', *args, **kwargs):
+ def __init__(self, dialect = Dialect(), *args, **kwargs):
'''Initializes the CsvExtrator and cares about the given configuration.
You can define the delimiter in the configuration.
'''
super(CsvExtractor, self).__init__(*args, **kwargs)
- self.delimiter = delimiter
- '''The delimiter used in your CSV data.'''
+ self.dialect = dialect
+ self._resolve_dialect()
def can_execute(self, connector):
'''Defines which connector can be handled by this extractor.'''
@@ -312,7 +315,7 @@ class CsvExtractor(AbstractExtractor):
result = []
try:
with self.connector as connection:
- reader = csv.DictReader(connection, delimiter = self.delimiter)
+ reader = csv.DictReader(connection, dialect=self.dialect_name)
for row in reader:
for key, value in row.iteritems():
row[key] = value.decode(self.connector.encoding)
@@ -321,6 +324,14 @@ class CsvExtractor(AbstractExtractor):
logging.warn('Can not open file {}, this may be ok, if you will write into this file.'.format(self.connector.path))
return result
+ def _resolve_dialect(self):
+ if isinstance(self.dialect, str):
+ pass
+ else:
+ self.dialect_name = unicode(uuid())
+ csv.register_dialect(self.dialect_name, self.dialect)
+
+
@yamlify
class FakerExtractor(AbstractExtractor):
diff --git a/hshetl/loaders.py b/hshetl/loaders.py
index 65e413b3ff94c55f440d86a0aec2da5a364a79c6..3e8cddf61d50662da2348f237582f17034c29342 100644
--- a/hshetl/loaders.py
+++ b/hshetl/loaders.py
@@ -25,9 +25,11 @@ A loader inside of a YAML job definition with more parameters:
from __future__ import print_function
import logging
import codecs
+import csv
+from uuid import uuid4 as uuid
from ldap import modlist
from sqlalchemy import Table, MetaData
-from hshetl import yamlify, NameResolver
+from hshetl import yamlify, NameResolver, Dialect
from connectors import AbstractConnector, connector_repository
from hshetl.exc import LoaderException, ConfigurationException
@@ -105,28 +107,31 @@ class CsvLoader(AbstractLoader):
Construction:
- :param str delimiter:
- The delimiter that will be used in the csv.
+ :param hshetl.Dialect dialect:
+ The CSV dialect that ill be used for CSV style.
:param dict ``**kwargs``:
Accepts parameters from :class:`.AbstractExtractor`.
YAML definition sample:
.. code-block:: yaml
-
+
!csvloader
connector: myfile
- delimiter: ','
+ dialect: !dialect
+ lineterminator: "\\n"
+ quoting: 0
'''
yaml_tag = u'!csvloader'
'''Use this tag inside your YAML configuration, to define this loader.'''
- def __init__(self, delimiter = ',', **kwargs):
+ def __init__(self, dialect = Dialect(), **kwargs):
super(CsvLoader, self).__init__(**kwargs)
- self.delimiter = delimiter
- '''The delimiter used to separate columns in the CSV.'''
+ self.dialect = dialect
+ self._resolve_dialect()
+
def _update(self, data = []):
'''This loader can not update records. Therefore only empty data is allowed'''
@@ -137,28 +142,35 @@ class CsvLoader(AbstractLoader):
'''Writes data in csv format'''
if data == []: logging.info('Attempting to write nothing into the file: ' + self.connector.path); return
else: logging.info('Attempting to write ' + repr(len(data)) + ' records into the : ' + self.connector.path)
- header = self.delimiter.join(data[0].to_dict().iterkeys())
- logging.info('Writing header: ' + header)
+ header = data[0].to_dict().keys()
+ logging.info('Writing header: ' + str(header))
+
with self.connector as connection:
# if self.connector.encoding: connection.write(codecs.BOM_UTF8)
- print(header.encode(self.connector.encoding), file = connection)
+ writer = csv.writer(connection, self.dialect_name)
+ writer.writerow(header)
for record in data:
- line = u''
+ line = []
for prop in record.to_dict().itervalues():
if type(prop) == list:
- line += '|'.join(map(lambda x: str(x), prop))
+ line.append(u'|'.join(map(lambda x: str(x), prop)))
else:
- line += unicode(prop)
- line += self.delimiter
- line = line[:-1]
- logging.debug('Writing line: ' + line)
- print(line.encode(self.connector.encoding), file = connection)
+ line.append(unicode(prop))
+ logging.debug('Writing line: ' + str(line))
+ writer.writerow(line)
def _delete(self, data = []):
'''This loader can not delete records in the target. Therefore only empty data is allowed'''
if data == []: return
raise LoaderException('Deletion is not available in the CsvLoader')
+ def _resolve_dialect(self):
+ if isinstance(self.dialect, str):
+ pass
+ else:
+ self.dialect_name = unicode(uuid())
+ csv.register_dialect(self.dialect_name, self.dialect)
+
@yamlify
class SqlAlchemyLoader(AbstractLoader):
diff --git a/hshetl/test/functional/test_copy_faker2file.py b/hshetl/test/functional/test_copy_faker2file.py
index 4cd87e8adcbf6a61f8cac42cabd7aec0583ce4de..0392669e5588c254c09ff9a173f2b3f9c808a955 100644
--- a/hshetl/test/functional/test_copy_faker2file.py
+++ b/hshetl/test/functional/test_copy_faker2file.py
@@ -15,4 +15,4 @@ class TestFakerCopyJob(TestBaseClass):
def test_job(self):
Controller().configure().execute()
- self.assertTrue(filecmp.cmp(self.test_working_directory + 'data/expected_fakeperson.csv', self.test_working_directory + 'fakeperson.csv'))
\ No newline at end of file
+ self.assertTrue(filecmp.cmp(self.test_working_directory + 'data/expected_fakeperson.csv', self.test_working_directory + 'fakeperson.csv'))
diff --git a/hshetl/test/functional/test_copy_file2file.py b/hshetl/test/functional/test_copy_file2file.py
new file mode 100644
index 0000000000000000000000000000000000000000..05e0a32a9b40384a4b85a0517df6c07034b69e55
--- /dev/null
+++ b/hshetl/test/functional/test_copy_file2file.py
@@ -0,0 +1,18 @@
+from hshetl.test.functional import TestBaseClass
+from hshetl.cli import Controller
+import sys
+import filecmp
+
+
+class TestCopyFileJob(TestBaseClass):
+
+ example_dir_name = 'copy_file2file'
+
+ def setUp(self):
+ super(TestCopyFileJob, self).setUp()
+ sys.argv = ['hshetl', '-d', self.test_working_directory]
+
+
+ def test_job(self):
+ Controller().configure().execute()
+ self.assertTrue(filecmp.cmp(self.test_working_directory + 'data/expected_output.csv', self.test_working_directory + 'output.csv'))
diff --git a/hshetl/test/unit/test_extractors.py b/hshetl/test/unit/test_extractors.py
index 55d061d7b3a6c4dbc4967d35dbc6eeef9743adb7..a0de53b112611677e98c35ca2e7cd594b47bcff3 100644
--- a/hshetl/test/unit/test_extractors.py
+++ b/hshetl/test/unit/test_extractors.py
@@ -1,7 +1,7 @@
import unittest
import os
from mock import Mock
-from hshetl import extractors, connectors
+from hshetl import extractors, connectors, Dialect
from hshetl.test import unit as test
from hshetl.exc import ExtractorException, ConfigurationException
from sqlalchemy.engine import base as sqlalchemybase
@@ -30,7 +30,7 @@ class TestCsvExtractor(unittest.TestCase):
file_connector.connection = open(os.path.dirname(__file__) + '/../fixture/short_sample_semicolon.csv', 'r')
file_connector.__enter__ = Mock(return_value = file_connector.connection)
file_connector.__exit__ = Mock()
- csv_extractor = extractors.CsvExtractor(delimiter = ';', connector = file_connector)
+ csv_extractor = extractors.CsvExtractor(dialect = Dialect(delimiter = ';'), connector = file_connector)
result = csv_extractor.execute()
self.assertEqual(result, test.fixtures['extractor_test_extractions_expected_result'])