Nischay created SPARK-20339:
-------------------------------
Summary: Issue in regex_replace in Apache Spark Java
Key: SPARK-20339
URL: https://issues.apache.org/jira/browse/SPARK-20339
Project: Spark
Issue Type: Question
Components: Java API, Spark Core, SQL
Affects Versions: 2.1.0
Reporter: Nischay
We are currently facing couple of issues<br>
1. "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator"
grows beyond 64 KB".<br>
2. "java.lang.StackOverflowError"
The first issue is reported as a Major bug in Jira of Apache spark
https://issues.apache.org/jira/browse/SPARK-18492
We got these issues by the following program. We are trying to replace the
Manufacturer name by its equivalent alternate name,
These issues occur only when we have Huge number of alternate names to replace,
for small number of replacements it works with no issues.<br><br>
`dataFileContent=dataFileContent.withColumn("ManufacturerSource",
regexp_replace(col("ManufacturerSource"),str,manufacturerNames.get(str).toString()));`
Kindly suggest us an alternative method or a solution to go around this problem.
Hashtable manufacturerNames = new Hashtable();
Enumeration names;
String str;
double bal;
manufacturerNames.put("Allen","Apex Tool Group");
manufacturerNames.put("Armstrong","Apex Tool Group");
manufacturerNames.put("Campbell","Apex Tool Group");
manufacturerNames.put("Lubriplate","Apex Tool Group");
manufacturerNames.put("Delta","Apex Tool Group");
manufacturerNames.put("Gearwrench","Apex Tool Group");
manufacturerNames.put("H.K. Porter","Apex Tool
Group");
manufacturerNames.put("Jacobs","Apex Tool Group");
manufacturerNames.put("Jobox","Apex Tool Group");
manufacturerNames.put("Lufkin","Apex Tool Group");
manufacturerNames.put("Nicholson","Apex Tool Group");
manufacturerNames.put("Plumb","Apex Tool Group");
manufacturerNames.put("Wiss","Apex Tool Group");
manufacturerNames.put("Covert","Apex Tool Group");
manufacturerNames.put("Apex-Geta","Apex Tool Group");
manufacturerNames.put("Dotco-Airetool","Apex Tool
Group");
manufacturerNames.put("Apex","Apex Tool Group");
manufacturerNames.put("Cleco","Apex Tool Group");
manufacturerNames.put("Dotco","Apex Tool Group");
manufacturerNames.put("Erem","Apex Tool Group");
manufacturerNames.put("Master Power","Apex Tool
Group");
manufacturerNames.put("Recoules Quackenbush","Apex
Tool Group");
manufacturerNames.put("Apex-Utica","Apex Tool Group");
manufacturerNames.put("Weller","Apex Tool Group");
manufacturerNames.put("Xcelite","Apex Tool Group");
manufacturerNames.put("JET","JPW Industries");
manufacturerNames.put("Powermatic","JPW Industries");
manufacturerNames.put("Wilton","JPW Industries");
manufacturerNames.put("Black+Decker","StanleyBlack &
Decker");
manufacturerNames.put("BlackhawkBy
Proto","StanleyBlack & Decker");
manufacturerNames.put("Bostitch","StanleyBlack &
Decker");
manufacturerNames.put("Cribmaster","StanleyBlack &
Decker");
manufacturerNames.put("DeWALT","StanleyBlack &
Decker");
manufacturerNames.put("Expert (Hand Tools &
Accessories); Expert (Wrenches)","StanleyBlack & Decker");
manufacturerNames.put("Facom","StanleyBlack &
Decker");
manufacturerNames.put("Mac","StanleyBlack & Decker");
manufacturerNames.put("Lista","StanleyBlack &
Decker");
manufacturerNames.put("Porter-Cable","StanleyBlack &
Decker");
manufacturerNames.put("Powers","StanleyBlack &
Decker");
manufacturerNames.put("Proto","StanleyBlack &
Decker");
manufacturerNames.put("Stanley","StanleyBlack &
Decker");
manufacturerNames.put("Vidmar","StanleyBlack &
Decker");
manufacturerNames.put("Abell-Howe","Columbus
McKinnon");
manufacturerNames.put("Budgit Hoists","Columbus
McKinnon");
manufacturerNames.put("Cady Lifters","Columbus
McKinnon");
manufacturerNames.put("Chester Hoist","Columbus
McKinnon");
manufacturerNames.put("CM","Columbus McKinnon");
manufacturerNames.put("Coffing Hoists","Columbus
McKinnon");
manufacturerNames.put("Duff Norton","Columbus
McKinnon");
manufacturerNames.put("Little Mule","Columbus
McKinnon");
manufacturerNames.put("Shaw-Box","Columbus McKinnon");
manufacturerNames.put("WECO","Columbus McKinnon");
manufacturerNames.put("Yale Hoist","Columbus
McKinnon");
manufacturerNames.put("Dymo","Newell-Rubbermaid");
manufacturerNames.put("Hilmor","Newell-Rubbermaid");
manufacturerNames.put("Irwin
Hanson","Newell-Rubbermaid");
manufacturerNames.put("Irwin
Marathon","Newell-Rubbermaid");
manufacturerNames.put("Irwin
Marples","Newell-Rubbermaid");
manufacturerNames.put("Irwin
Speedbor","Newell-Rubbermaid");
manufacturerNames.put("Irwin
Straight-Line","Newell-Rubbermaid");
manufacturerNames.put("Irwin
Quick-Grip","Newell-Rubbermaid");
manufacturerNames.put("Irwin
Unibit","Newell-Rubbermaid");
manufacturerNames.put("Irwin
Vise-Grip","Newell-Rubbermaid");
manufacturerNames.put("Lenox","Newell-Rubbermaid");
manufacturerNames.put("Rubbermaid Commercial
Products","Newell-Rubbermaid");
manufacturerNames.put("Sharpie","Newell-Rubbermaid");
manufacturerNames.put("Accu-Lube","ITW Pro Brands
(Itw Fluids North America)");
manufacturerNames.put("ITW","ITW Pro Brands (Itw
Fluids North America)");
manufacturerNames.put("Rustlick","ITW Pro Brands (Itw
Fluids North America)");
manufacturerNames.put("Rustlick B","ITW Pro Brands
(Itw Fluids North America)");
manufacturerNames.put("Dymon","ITW Pro Brands (Itw
Fluids North America)");
manufacturerNames.put("DYKEM","ITW Pro Brands (Itw
Fluids North America)");
manufacturerNames.put("SCRUBS","ITW Pro Brands (Itw
Fluids North America)");
manufacturerNames.put("Spray Nine","ITW Pro Brands
(Itw Fluids North America)");
manufacturerNames.put("LPS","ITW Pro Brands (Itw
Fluids North America)");
manufacturerNames.put("SafeTap","ITW Pro Brands (Itw
Fluids North America)");
manufacturerNames.put("Sertun","ITW Pro Brands (Itw
Fluids North America)");
manufacturerNames.put("Atlantic Mills","ITW Pro
Brands (Itw Fluids North America)");
manufacturerNames.put("Devcon","ITW Polymers
Adhesives");
manufacturerNames.put("Permatex","ITW Polymers
Adhesives");
manufacturerNames.put("Plexus","ITW Polymers
Adhesives");
manufacturerNames.put("Spraycore","ITW Polymers
Adhesives");
manufacturerNames.put("Norton","Norton Saint Gobain");
manufacturerNames.put("Merit","Norton Saint Gobain");
manufacturerNames.put("Carborundum","Norton Saint
Gobain");
manufacturerNames.put("Winter","Norton Saint Gobain");
manufacturerNames.put("3M","3M");
manufacturerNames.put("Standard Abrasives","3M");
manufacturerNames.put("ABS","ABS Import Tools");
manufacturerNames.put("Accuform","Accuform Mfg");
manufacturerNames.put("Acme","Acme United");
manufacturerNames.put("TREDS","Advantage Products");
manufacturerNames.put("Radnor","Airgas");
manufacturerNames.put("Ajax","Ajax Tool Works");
manufacturerNames.put("Akers","Akers Industries");
manufacturerNames.put("Aldon","Aldon");
manufacturerNames.put("Gator","Ali Industries");
manufacturerNames.put("Allegro","Allegro Industries");
manufacturerNames.put("Alliance","Alliance
Mercantile");
manufacturerNames.put("Viking","Alliance Mercantile");
manufacturerNames.put("Alta","Alta Industries");
manufacturerNames.put("Ammex","Ammex");
manufacturerNames.put("Ampco","Ampco Safety Tools");
manufacturerNames.put("Enforcer","Amrep");
manufacturerNames.put("Anchor","Anchor Brand");
manufacturerNames.put("Anchor Chemical","Anchor
Chemical");
manufacturerNames.put("Anderson Brush","Anderson
Products");
manufacturerNames.put("Ansell","Ansell");
manufacturerNames.put("Ansell","Ansell");
manufacturerNames.put("Ansell","Ansell");
manufacturerNames.put("Apache Mills","Apache Mills");
manufacturerNames.put("Allen","Apex Tool Group");
manufacturerNames.put("Allen","Apex Tool Group");
manufacturerNames.put("Apex","Apex Tool Group");
manufacturerNames.put("Weller","Apex Tool Group");
manufacturerNames.put("Xcelite","Apex Tool Group");
manufacturerNames.put("ARC","Arc Abrasives");
manufacturerNames.put("Armstrong","Armstrong
Industrial Hand Tools");
manufacturerNames.put("Ashburn","Ashburn Chemical
Technologies");
manufacturerNames.put("Atlantic Safety","Atlantic
Safety Products");
manufacturerNames.put("Baffin","Baffin");
manufacturerNames.put("Bausch + Lomb","Bausch &
Lomb");
manufacturerNames.put("Bayco","Bayco Products");
manufacturerNames.put("Bayer","Bayer HealthCare");
manufacturerNames.put("Bee Line","Bee Line
Abrasives");
manufacturerNames.put("Berne","Berne Apparel");
manufacturerNames.put("Berry","Berry Plastics");
manufacturerNames.put("Nashua","Berry Plastics");
manufacturerNames.put("Bessey","Bessey Tools");
manufacturerNames.put("Best Sanitizers","Best
Sanitizers");
manufacturerNames.put("Boardwalk","Boardwalk");
manufacturerNames.put("BDG","Bob Dale Gloves &
Imports");
manufacturerNames.put("Bondhus","Bondhus");
manufacturerNames.put("Boss","Boss Manufacturing");
manufacturerNames.put("Never-Seez","Bostik");
manufacturerNames.put("Bradley","Bradley");
manufacturerNames.put("Brady","Brady Worldwide");
manufacturerNames.put("Bright Star","Koehler Lighting
Products");
manufacturerNames.put("BBI","Brighton-Best
International");
manufacturerNames.put("BBI","Brighton-Best
International");
manufacturerNames.put("Brunner & Lay","Brunner &
Lay");
manufacturerNames.put("BRM","Brush Research Mfg");
manufacturerNames.put("Buehler","Buehler");
manufacturerNames.put("Buffalo","Buffalo Industries");
manufacturerNames.put("C.H.Hanson","C.H.Hanson");
manufacturerNames.put("C.H.Hanson","C.H.Hanson");
manufacturerNames.put("C.S. Osborne","C.S. Osborne");
manufacturerNames.put("CGW","Camel Grinding Wheels");
manufacturerNames.put("CGW","Camel Grinding Wheels");
manufacturerNames.put("CGW","Camel Grinding Wheels");
manufacturerNames.put("DBI-SALA","Capital Safety");
manufacturerNames.put("Carborundum","Carborundum
Abrasives");
manufacturerNames.put("Carborundum","Carborundum
Abrasives");
manufacturerNames.put("Carhartt","Carhartt");
manufacturerNames.put("Carolina","Carolina Glove");
manufacturerNames.put("Carr Lane","Carr Lane Mfg");
manufacturerNames.put("CCL","CCL Security Products");
manufacturerNames.put("Central","Central Tools");
manufacturerNames.put("Certified Safety","Certified
Safety Mfg");
manufacturerNames.put("Channellock","Channellock");
manufacturerNames.put("Chase","Chase Ergonomics");
manufacturerNames.put("CHECKERS","Checkers Industrial
Safety Products");
manufacturerNames.put("CP","Chicago Pneumatic");
manufacturerNames.put("CP","Chicago Pneumatic");
manufacturerNames.put("CPA","Chicago Protective
Apparel");
manufacturerNames.put("Choctaw-Kaul","Choctaw - Kaul
Distribution");
manufacturerNames.put("Climax","Climax Metal
Products");
manufacturerNames.put("CMC Rescue","CMC Rescue");
manufacturerNames.put("Coastal Wipers","Coastal
Wipers");
manufacturerNames.put("Codet","Codet");
manufacturerNames.put("Comfort","Comfort Clothing &
Gloves");
manufacturerNames.put("Comfort Eye
Protection","Comfort Eye Protection");
manufacturerNames.put("CEP","Complete Enviromental
Products");
manufacturerNames.put("Condor","Condor Protective
Gear");
manufacturerNames.put("Cordova","Cordova Safety
Products");
manufacturerNames.put("Cortina","Cortina");
manufacturerNames.put("COX","COX North America");
manufacturerNames.put("Craftsman","Craftsman");
manufacturerNames.put("Craftsman","Craftsman");
manufacturerNames.put("Craftsman","Craftsman");
manufacturerNames.put("Cratex","Cratex Mfg");
manufacturerNames.put("CRC","CRC Industries");
manufacturerNames.put("CrossFire","CrossFire Safety
Eyewear");
manufacturerNames.put("Crown-Mats","Crown Matting
Technologies");
manufacturerNames.put("CS Unitec","CS Unitec");
manufacturerNames.put("CLC","Custom Leather Craft");
manufacturerNames.put("DAP","DAP Products");
manufacturerNames.put("DascoPro","Dasco Pro Tools");
manufacturerNames.put("DascoPro","Dasco Pro Tools");
manufacturerNames.put("Degil","Degil Safety
Products");
manufacturerNames.put("DenTec","Dentec Safety
Specialists");
manufacturerNames.put("Dorcy","Dorcy International");
manufacturerNames.put("Dormer Pramet","Dormer
Pramet");
manufacturerNames.put("Precision Dormer","Dormer
Pramet");
manufacturerNames.put("Dräger","Draeger Safety");
manufacturerNames.put("Dupont","Dupont Protective
Apparel");
manufacturerNames.put("Duracell","Duracell");
manufacturerNames.put("Dynabrade","Dynabrade");
manufacturerNames.put("Eagle","Eagle Manufacturing");
manufacturerNames.put("Bussmann","Eaton");
manufacturerNames.put("Bullard","ED Bullard");
manufacturerNames.put("Eklind","Eklind Tool Company");
manufacturerNames.put("Eklind","Eklind Tool Company");
manufacturerNames.put("Elk River","Elk River");
manufacturerNames.put("Elmers","Elmers Products");
manufacturerNames.put("Elvex","Elvex");
manufacturerNames.put("RIDGID","Emerson Electric");
manufacturerNames.put("Empire","Empire Level");
manufacturerNames.put("Emuge","Emuge");
manufacturerNames.put("Encon","Encon Safety
Products");
manufacturerNames.put("Energizer","Energizer
Holdings");
manufacturerNames.put("ERB","ERB Industries");
manufacturerNames.put("Ergodyne","Ergodyne");
manufacturerNames.put("ESCA","ESCA Tech");
manufacturerNames.put("ESP","ESP");
manufacturerNames.put("Estwing","Estwing Mfg");
manufacturerNames.put("Estwing","Estwing Mfg");
manufacturerNames.put("Everett","Everett Industries");
manufacturerNames.put("EXTECH","Extech Instruments");
manufacturerNames.put("Falcon Abrasive","Falcon
Abrasive");
manufacturerNames.put("Falcon Abrasive","Falcon
Abrasive");
manufacturerNames.put("Falcon Abrasive","Falcon
Abrasive");
manufacturerNames.put("FallTech","FallTech");
manufacturerNames.put("Fibre-Metal By
Honeywell","Honeywell Safety Products");
manufacturerNames.put("First Aid Only","First Aid
Only");
manufacturerNames.put("FlexOvit","Flexovit");
manufacturerNames.put("FlexOvit","Flexovit");
manufacturerNames.put("Fluke","Fluke");
manufacturerNames.put("Forcefield","Forcefield");
manufacturerNames.put("Titebond","Franklin
International");
manufacturerNames.put("Franmar","Franmar");
manufacturerNames.put("Fuller","Fuller");
manufacturerNames.put("Gants Laurentide","Gants
Laurentide");
manufacturerNames.put("Gardner-Gibson","Gardner-Gibson");
manufacturerNames.put("Garland","Garland Mfg");
manufacturerNames.put("Garr Tool","Garr Tool");
manufacturerNames.put("Gates","Gates");
manufacturerNames.put("Gateway Safety","Gateway
Safety");
manufacturerNames.put("Gatorade","Gatorade");
manufacturerNames.put("Gedore","Gedore Tools");
manufacturerNames.put("General","General Tools &
Instruments");
manufacturerNames.put("Generic","Generic");
manufacturerNames.put("GF","GF Protection");
manufacturerNames.put("Global Glove","Global Glove &
Safety Mfg");
manufacturerNames.put("Global Glove","Global Glove &
Safety Mfg");
manufacturerNames.put("Gorilla","Gorilla Glue");
manufacturerNames.put("Lutz","Gorilla Glue");
manufacturerNames.put("GreatNeck","GreatNeck Tools");
manufacturerNames.put("GreatNeck","GreatNeck Tools");
manufacturerNames.put("Greenfield
Industries","Greenfield Industries");
manufacturerNames.put("Greenlee","Greenlee Textron");
manufacturerNames.put("Greenlee","Greenlee Textron");
manufacturerNames.put("GP","Grey Pneumatic");
manufacturerNames.put("Grobet","Grobet");
manufacturerNames.put("Guardian","Guardian
Equipment");
manufacturerNames.put("Guard-Line","Guard-Line");
manufacturerNames.put("Guhring","Guhring");
manufacturerNames.put("Harley-Davidson","Harley-Davidson Safety Wear");
manufacturerNames.put("Harris","Harris Industries");
manufacturerNames.put("Hart Health","Hart Health");
manufacturerNames.put("Haws","Haws");
manufacturerNames.put("Helly-Hansen","Helly-Hansen");
manufacturerNames.put("Henkel","Henkel");
manufacturerNames.put("Hermes","Hermes Abrasives");
manufacturerNames.put("HexArmor","Hex Armor");
manufacturerNames.put("Honeywell","Honeywell Safety
Products");
manufacturerNames.put("Hyde","Hyde Tools");
manufacturerNames.put("Hygiena","Hygiena");
manufacturerNames.put("Hygrade","Hygrade/LDF
Industries");
manufacturerNames.put("Hy-Tech","Hy-Tech Machine");
manufacturerNames.put("HyTest","HyTest Safety
Footwear");
manufacturerNames.put("Ideal","Ideal Industries");
manufacturerNames.put("Honest Abe","Illinois Glove");
manufacturerNames.put("ITW","ITW Global Brands");
manufacturerNames.put("Impacto","Impacto Protective
Products");
manufacturerNames.put("Impacto","Impacto Protective
Products");
manufacturerNames.put("EnviroGuard","International
Enviroguard Systems");
manufacturerNames.put("ipg","Intertape Polymer
Group");
manufacturerNames.put("ironClad","Ironclad
Performance Wear");
manufacturerNames.put("ironClad","Ironclad
Performance Wear");
manufacturerNames.put("ironClad","Ironclad
Performance Wear");
manufacturerNames.put("Ironwear","Ironwear Rainwear");
manufacturerNames.put("Irwin","Irwin Tools");
manufacturerNames.put("Irwin","Irwin Tools");
manufacturerNames.put("Iscar","Iscar");
manufacturerNames.put("Devcon","ITW Devcon");
manufacturerNames.put("ITW","ITW Pro Brands");
manufacturerNames.put("ITW","ITW Pro Brands");
manufacturerNames.put("J&M","J&M Innovative
Products");
manufacturerNames.put("JBC","JBC Safety Plastic");
manufacturerNames.put("Jet Lube","Jet Lube");
manufacturerNames.put("Jisco","Jisco Abrasives");
manufacturerNames.put("Tillman","John Tillman");
manufacturerNames.put("Johnson & Johnson","Johnson &
Johnson Services");
manufacturerNames.put("Jomac","Wells Lamont
Industrial");
manufacturerNames.put("JPW","JPW Industries");
manufacturerNames.put("Steelman","JS Products");
manufacturerNames.put("Justrite","Justrite Mfg");
manufacturerNames.put("Justrite","Justrite Mfg");
manufacturerNames.put("Kaba","Kaba Ilco");
manufacturerNames.put("Kasco","Kasco Abrasives");
manufacturerNames.put("HotMax","Kdar");
manufacturerNames.put("Keystone","Keystone Adjustable
Cap");
manufacturerNames.put("Kimberly
Clark*","Kimberly-Clark Professional");
manufacturerNames.put("Kimberly
Clark*","Kimberly-Clark Professional");
manufacturerNames.put("Kimberly
Clark*","Kimberly-Clark Professional");
manufacturerNames.put("Kinco","Kinco International");
manufacturerNames.put("Kinco","Kinco International");
manufacturerNames.put("KIRCHHOFF","KIRCHHOFF Group");
manufacturerNames.put("WITTE","KIRCHHOFF Group");
manufacturerNames.put("Klein","Klein Tools");
manufacturerNames.put("Klingspor","Klingspor
Abrasives");
manufacturerNames.put("Komelon","Komelon");
manufacturerNames.put("Kool Mist","Kool Mist");
manufacturerNames.put("Krylon","Krylon Products
Group");
manufacturerNames.put("Krylon","Krylon Products
Group");
manufacturerNames.put("Mirka","KWH Mirka");
manufacturerNames.put("Starrett","L.S. Starrett");
manufacturerNames.put("LA-CO","LA-CO Industries");
manufacturerNames.put("Markal","LA-CO Industries");
manufacturerNames.put("LaCrosse","LaCrosse Footwear");
manufacturerNames.put("Lakeland","Lakeland
Industries");
manufacturerNames.put("A & E","Lang Tools");
manufacturerNames.put("LAPCO FR","Lapco Mfg");
manufacturerNames.put("Leatherman","Leatherman Tool
Group");
manufacturerNames.put("Lenox","Lenox");
manufacturerNames.put("Lenox","Lenox");
manufacturerNames.put("LFS","LFS Glove & Safety");
manufacturerNames.put("Liberty Glove","Liberty
Glove");
manufacturerNames.put("Lincoln Electric","Lincoln
Electric");
manufacturerNames.put("Lixie","Lixie");
manufacturerNames.put("LP Royer","LP Royer");
manufacturerNames.put("Lubriplate","Lubriplate
Lubricants");
manufacturerNames.put("Lyndex-Nikken","Lyndex-Nikken");
manufacturerNames.put("M.A. Ford","M.A. Ford");
manufacturerNames.put("MAGLITE","Mag Instrument");
manufacturerNames.put("Magid","Magid Glove");
manufacturerNames.put("Majestic Glove","Majestic
Glove");
manufacturerNames.put("Makita","Makita");
manufacturerNames.put("Makita","Makita");
manufacturerNames.put("Malt","Malt Industries");
manufacturerNames.put("Mapa","Mapa Professional");
manufacturerNames.put("Martin","Martin Sprocket &
Gear");
manufacturerNames.put("Martin","Martin Sprocket &
Gear");
manufacturerNames.put("Master Chemical","Master
Chemical Corp");
manufacturerNames.put("Master Lock","Master Lock");
manufacturerNames.put("MaxCraft","MaxCraft");
manufacturerNames.put("Mayhew","Mayhew Steel
Products");
manufacturerNames.put("Mayhew","Mayhew Steel
Products");
manufacturerNames.put("Workhorse","McCordick Glove &
Safety");
manufacturerNames.put("MCR Safety","MCR Safety");
manufacturerNames.put("River City","MCR Safety");
manufacturerNames.put("U.S. Safety","MCR Safety");
manufacturerNames.put("Mechanix Wear","Mechanix
Wear");
manufacturerNames.put("Medique","Medique Products");
manufacturerNames.put("Merit","Norton Saint Gobain");
manufacturerNames.put("Merit","Norton Saint Gobain");
manufacturerNames.put("Merit","Norton Saint Gobain");
manufacturerNames.put("metabo","Metabo");
manufacturerNames.put("Microflex","Ansell");
manufacturerNames.put("Milwaukee","Milwaukee Tool");
manufacturerNames.put("ML Kishigo","ML Kishigo");
manufacturerNames.put("Moldex","Moldex");
manufacturerNames.put("Moldex","Moldex");
manufacturerNames.put("MORSE","Morse Cutting Tools");
manufacturerNames.put("MRO Solutions","MRO
Solutions");
manufacturerNames.put("MSA","MSA Safety");
manufacturerNames.put("Nasco","Nasco Industries");
manufacturerNames.put("NMC","National Marker");
manufacturerNames.put("NSA","National Safety
Apparel");
manufacturerNames.put("Neese","Neese Industries");
manufacturerNames.put("PIG","New Pig");
manufacturerNames.put("Newborn","Newborn Brothers");
manufacturerNames.put("Newell-Rubbermaid","Newell-Rubbermaid");
manufacturerNames.put("NGK BERYLCO","NGK Metals");
manufacturerNames.put("Nite Ize","Nite Ize");
manufacturerNames.put("Nitto Kohki","Nitto Kohki");
manufacturerNames.put("North By Honeywell","Honeywell
Safety Products");
manufacturerNames.put("North Star Glove","North Star
Glove");
manufacturerNames.put("Norton","Norton Saint Gobain");
manufacturerNames.put("Norton","Norton Saint Gobain");
manufacturerNames.put("Norton","Norton Saint Gobain");
manufacturerNames.put("OateySCS","OateySCS");
manufacturerNames.put("Oberon","Oberon");
manufacturerNames.put("MiraCool","OccuNomix
International");
manufacturerNames.put("Olympia","Olympia Tools");
manufacturerNames.put("Omar","Omar");
manufacturerNames.put("Onguard","Ansell");
manufacturerNames.put("ORR Safety","ORR Safety");
manufacturerNames.put("Osborn","Osborn");
manufacturerNames.put("OSG","OSG");
manufacturerNames.put("OSG","OSG");
manufacturerNames.put("OSG","OSG");
manufacturerNames.put("Ossian","Ossian");
manufacturerNames.put("Pac-Kit","Acme United");
manufacturerNames.put("Panduit","Panduit");
manufacturerNames.put("Paulson","Paulson Mfg");
manufacturerNames.put("Pearl","Pearl Abrasive");
manufacturerNames.put("Pelican","Pelican Products");
manufacturerNames.put("Petzl","Petzl");
manufacturerNames.put("Pferd","Pferd");
manufacturerNames.put("PONY","Pony Tools");
manufacturerNames.put("PONY","Pony Tools");
manufacturerNames.put("PONY","Pony Tools");
manufacturerNames.put("Pratt-Read","Pratt-Read
Tools");
manufacturerNames.put("Precision
Abrasives","Precision Abrasives");
manufacturerNames.put("PTA","Premium Tool &
Abrasives");
manufacturerNames.put("Presco","Presco");
manufacturerNames.put("Caiman","Primax Mfg &
Trading");
manufacturerNames.put("Primax","Primax Mfg &
Trading");
manufacturerNames.put("bouton","Protective Industrial
Products");
manufacturerNames.put("PIP","Protective Industrial
Products");
manufacturerNames.put("Pyramex","Pyramex Safety
Products");
manufacturerNames.put("QRP","QRP Gloves &
Fingercots");
manufacturerNames.put("Quality Import","Quality
Import");
manufacturerNames.put("Quest","Quest Environmental &
Safety Products");
manufacturerNames.put("Radiac","Radiac Abrasives");
manufacturerNames.put("Radians","Radians");
manufacturerNames.put("RSC","Radiator Specialty");
manufacturerNames.put("Rawhyde Frontier","Rawhyde
Frontier");
manufacturerNames.put("RectorSeal","RectorSeal");
manufacturerNames.put("Red Devil","Red Devil");
manufacturerNames.put("Red Kap","Red Kap Industries");
manufacturerNames.put("Reed","Reed Mfg");
manufacturerNames.put("Reed","Reed Mfg");
manufacturerNames.put("Revco","Revco");
manufacturerNames.put("Rex-Cut","Rex-Cut Abrasives");
manufacturerNames.put("RIDGID","Ridgid Tool");
manufacturerNames.put("RIDGID","Ridgid Tool");
manufacturerNames.put("RIDGID","Ridgid Tool");
manufacturerNames.put("RIDGID","Ridgid Tool");
manufacturerNames.put("RI","Rigid Industries");
manufacturerNames.put("Ringers","Ringers Gloves");
manufacturerNames.put("Dremel","Robert Bosch");
manufacturerNames.put("Bosch","Robert Bosch");
manufacturerNames.put("Bosch","Robert Bosch");
manufacturerNames.put("Round House","Round House");
manufacturerNames.put("rpb","RPB Safety");
manufacturerNames.put("Rust-Oleum","Rust-Oleum");
manufacturerNames.put("Rutland","Rutland Fire Clay");
manufacturerNames.put("Safco","Safco Products");
manufacturerNames.put("Safecross","Safecross (First
Aid Products)");
manufacturerNames.put("Saf-Tech","Saf-Tech");
manufacturerNames.put("Saf-T-Glove","Saf-T-Glove");
manufacturerNames.put("Salisbury By
Honeywell","Honeywell Safety Products");
manufacturerNames.put("San Jamar","San Jamar");
manufacturerNames.put("SAS","SAS Safety");
manufacturerNames.put("Sashco","Sashco");
manufacturerNames.put("SF","Seal Fast");
manufacturerNames.put("SG","Seattle Glove");
manufacturerNames.put("Sellstrom","Sellstrom");
manufacturerNames.put("Sellstrom","Sellstrom");
manufacturerNames.put("Sempermed","Sempermed");
manufacturerNames.put("Serim","Serim Research");
manufacturerNames.put("Seymour Midwest","Seymour
Midwest");
manufacturerNames.put("Seymour","Seymour Of
Sycamore");
manufacturerNames.put("SGS","Boss Manufacturing");
manufacturerNames.put("Showa Best","Showa Best
Glove");
manufacturerNames.put("Showa Best","Showa Best
Glove");
manufacturerNames.put("Shurtape","Shurtape
Technologies");
manufacturerNames.put("Shurtape","Shurtape
Technologies");
manufacturerNames.put("Sia","Sia Abrasives
Industries");
manufacturerNames.put("Simonds","Simonds
International");
manufacturerNames.put("SK","SK Hand Tool");
manufacturerNames.put("Bahco","Snap-On Industrial");
manufacturerNames.put("Williams","Snap-On
Industrial");
manufacturerNames.put("Snap-on","Snap-On Industrial");
manufacturerNames.put("Snap-on","Snap-On Industrial");
manufacturerNames.put("Snap-on","Snap-On Industrial");
manufacturerNames.put("Snap-on","Snap-On Industrial");
manufacturerNames.put("Williams","Snap-On
Industrial");
manufacturerNames.put("Soudal Accumetric","Soudal
Accumetric");
manufacturerNames.put("SPEAKMAN","Speakman");
manufacturerNames.put("Rayovac","Spectrum Brands");
manufacturerNames.put("SpillTech","SpillTech
Environmental");
manufacturerNames.put("Stabilicer","Stabilicer");
manufacturerNames.put("Stanco","Stanco Mfg");
manufacturerNames.put("Stanco","Stanco Mfg");
manufacturerNames.put("Standard Safety","Standard
Safety Equipment Company");
manufacturerNames.put("Standard Safety","Standard
Safety Equipment Company");
// Show all balances in hash table.
names = manufacturerNames.keys();
Dataset<Row> dataFileContent =
sqlContext.load("com.databricks.spark.csv", options);
while(names.hasMoreElements()) {
str = (String) names.nextElement();
dataFileContent=dataFileContent.withColumn("ManufacturerSource",
regexp_replace(col("ManufacturerSource"),str,manufacturerNames.get(str).toString()));
}
dataFileContent.show();
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]