Hi,
I'm trying to do data anonymization using faker package in Nifi for which I'm
using executescript processor.
The code references are from
http://go.databricks.com/hubfs/notebooks/blogs/Healthcare%20PII%20anonymization/Healthcare%20PII%20anonymization%20example.html
I have added my code here.
import java.io
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
import unicodecsv as csv
from faker import Factory
from collections import defaultdict
import io
class TransformCallback(StreamCallback):
def _init_(self):
pass
def process(self,inputStream,outputStream):
inputdata = IOUtils.toString(inputStream,StandardCharsets.ISO_8859_1)
text = csv.DictReader(io.StringIO(inputdata))
output=[]
#text = inputdata.decode('utf-8')
faker = Factory.create()
names = defaultdict(faker.name)
emails = defaultdict(faker.email)
ssns = defaultdict(faker.ssn)
phone_numbers = defaultdict(faker.phone_number)
for row in text:
row["name"] = names[row["name"]]
row["email"] = emails[row["email"]]
row["ssn"] = ssns[row["ssn"]]
row["phone_number"] = phone_numbers[row["phone_number"]]
outputStream.write(text)
flowFile = session.get()
if flowFile != None:
flowFile = session.write(flowFile,TransformCallback())
session.transfer(flowFile, REL_SUCCESS)
session.commit()
I'm stuck with an error "TypeError:write():1st arg cant be coerced to
int,byte[] at <script> at line number 40"
Please give me some suggestions on how should I improve.I'm quite new to python
and executescript processor.
Thanks,
Vyshali