This is an automated email from the git hooks/post-receive script. fsfs pushed a commit to annotated tag release/0.09 in repository libhtml-scrubber-perl.
commit c425d7eb10a539ee6bf50ce5e4120e6ce5c0f060 Author: Nigel Metheringham <[email protected]> Date: Sun Mar 27 21:15:14 2011 +0100 Fixed windows line endings --- Changes | 88 +-- LICENSE | 766 +++++++++++++------------- MANIFEST | 30 +- MANIFEST.SKIP | 58 +- META.yml | 26 +- Makefile.PL | 44 +- README | 52 +- Scrubber.pm | 1520 +++++++++++++++++++++++++-------------------------- t/01_use.t | 24 +- t/02_basic.t | 302 +++++----- t/03_more.t | 78 +-- t/04_style_script.t | 38 +- t/05_pi_comment.t | 38 +- t/06_scrub_file.t | 100 ++-- t/07_booleans.t | 114 ++-- 15 files changed, 1639 insertions(+), 1639 deletions(-) diff --git a/Changes b/Changes index 81ac1c2..6f4ddb7 100644 --- a/Changes +++ b/Changes @@ -1,44 +1,44 @@ -Revision history for Perl extension HTML::Scrubber. - -0.08 Thu Apr 1 14:14:38 2004 - - removed test which relied on stuff that changed in HTML-Parser-3.36 - <https://rt.cpan.org/Ticket/Display.html?id=5472> - -0.07 Thu Mar 18 06:21:38 2004 - - allow for boolean attributes (thanks b10m) - - which is why now attribute order is followed (attrseq) - repeated elements get squashed (see 07_booleans.t for details). - -0.06 Sun Nov 2 01:26:42 2003 - - fixed more typos - - added t\06_scrub_file.t (that part was broken, now fixed) - -0.05 Thu Oct 30 23:27:37 2003 - - fixed up various typos in tests ... - - bumped up version number ;( - -0.04 Wed Oct 29 18:35:08 2003 - - added missing lc in a few places (and got rid of for @_) - - fixed (and improved) optimizations (stupid typo) - - added DESTROY to break circular reference (I lost my TODO, so i forgot) - - added more pod (allow deny ...) - - improved test suite - - added LICENSE file - - added script/style functions (nice) - -0.03 Mon Jul 21 07:32:10 2003 - - perltidy ;) - - closed http://rt.cpan.org/NoAuth/Bug.html?id=2969 - now escape spurious >< in text - - updated test.pl - -0.02 Fri Apr 18 14:12:02 2003 - - finished TODO, settled on API - - created a cpan worthy distribution and uploaded to CPAN - -0.01 Thu Apr 17 20:34:11 2003 - - original version; created by h2xs 1.21 with options - -AX HTML::Scrubber - - wrote initial version and released at - http://perlmonks.org/index.pl?node_id=251427 - +Revision history for Perl extension HTML::Scrubber. + +0.08 Thu Apr 1 14:14:38 2004 + - removed test which relied on stuff that changed in HTML-Parser-3.36 + <https://rt.cpan.org/Ticket/Display.html?id=5472> + +0.07 Thu Mar 18 06:21:38 2004 + - allow for boolean attributes (thanks b10m) + - which is why now attribute order is followed (attrseq) + repeated elements get squashed (see 07_booleans.t for details). + +0.06 Sun Nov 2 01:26:42 2003 + - fixed more typos + - added t\06_scrub_file.t (that part was broken, now fixed) + +0.05 Thu Oct 30 23:27:37 2003 + - fixed up various typos in tests ... + - bumped up version number ;( + +0.04 Wed Oct 29 18:35:08 2003 + - added missing lc in a few places (and got rid of for @_) + - fixed (and improved) optimizations (stupid typo) + - added DESTROY to break circular reference (I lost my TODO, so i forgot) + - added more pod (allow deny ...) + - improved test suite + - added LICENSE file + - added script/style functions (nice) + +0.03 Mon Jul 21 07:32:10 2003 + - perltidy ;) + - closed http://rt.cpan.org/NoAuth/Bug.html?id=2969 + now escape spurious >< in text + - updated test.pl + +0.02 Fri Apr 18 14:12:02 2003 + - finished TODO, settled on API + - created a cpan worthy distribution and uploaded to CPAN + +0.01 Thu Apr 17 20:34:11 2003 + - original version; created by h2xs 1.21 with options + -AX HTML::Scrubber + - wrote initial version and released at + http://perlmonks.org/index.pl?node_id=251427 + diff --git a/LICENSE b/LICENSE index 9bb6486..9d0305b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,383 +1,383 @@ -Terms of Perl itself - -a) the GNU General Public License as published by the Free - Software Foundation; either version 1, or (at your option) any - later version, or -b) the "Artistic License" - ---------------------------------------------------------------------------- - -The General Public License (GPL) -Version 2, June 1991 - -Copyright (C) 1989, 1991 Free Software Foundation, Inc. 675 Mass Ave, -Cambridge, MA 02139, USA. Everyone is permitted to copy and distribute -verbatim copies of this license document, but changing it is not allowed. - -Preamble - -The licenses for most software are designed to take away your freedom to share -and change it. By contrast, the GNU General Public License is intended to -guarantee your freedom to share and change free software--to make sure the -software is free for all its users. This General Public License applies to most of -the Free Software Foundation's software and to any other program whose -authors commit to using it. (Some other Free Software Foundation software is -covered by the GNU Library General Public License instead.) You can apply it to -your programs, too. - -When we speak of free software, we are referring to freedom, not price. Our -General Public Licenses are designed to make sure that you have the freedom -to distribute copies of free software (and charge for this service if you wish), that -you receive source code or can get it if you want it, that you can change the -software or use pieces of it in new free programs; and that you know you can do -these things. - -To protect your rights, we need to make restrictions that forbid anyone to deny -you these rights or to ask you to surrender the rights. These restrictions -translate to certain responsibilities for you if you distribute copies of the -software, or if you modify it. - -For example, if you distribute copies of such a program, whether gratis or for a -fee, you must give the recipients all the rights that you have. You must make -sure that they, too, receive or can get the source code. And you must show -them these terms so they know their rights. - -We protect your rights with two steps: (1) copyright the software, and (2) offer -you this license which gives you legal permission to copy, distribute and/or -modify the software. - -Also, for each author's protection and ours, we want to make certain that -everyone understands that there is no warranty for this free software. If the -software is modified by someone else and passed on, we want its recipients to -know that what they have is not the original, so that any problems introduced by -others will not reflect on the original authors' reputations. - -Finally, any free program is threatened constantly by software patents. We wish -to avoid the danger that redistributors of a free program will individually obtain -patent licenses, in effect making the program proprietary. To prevent this, we -have made it clear that any patent must be licensed for everyone's free use or -not licensed at all. - -The precise terms and conditions for copying, distribution and modification -follow. - -GNU GENERAL PUBLIC LICENSE -TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND -MODIFICATION - -0. This License applies to any program or other work which contains a notice -placed by the copyright holder saying it may be distributed under the terms of -this General Public License. The "Program", below, refers to any such program -or work, and a "work based on the Program" means either the Program or any -derivative work under copyright law: that is to say, a work containing the -Program or a portion of it, either verbatim or with modifications and/or translated -into another language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not covered by -this License; they are outside its scope. The act of running the Program is not -restricted, and the output from the Program is covered only if its contents -constitute a work based on the Program (independent of having been made by -running the Program). Whether that is true depends on what the Program does. - -1. You may copy and distribute verbatim copies of the Program's source code as -you receive it, in any medium, provided that you conspicuously and appropriately -publish on each copy an appropriate copyright notice and disclaimer of warranty; -keep intact all the notices that refer to this License and to the absence of any -warranty; and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and you may at -your option offer warranty protection in exchange for a fee. - -2. You may modify your copy or copies of the Program or any portion of it, thus -forming a work based on the Program, and copy and distribute such -modifications or work under the terms of Section 1 above, provided that you also -meet all of these conditions: - -a) You must cause the modified files to carry prominent notices stating that you -changed the files and the date of any change. - -b) You must cause any work that you distribute or publish, that in whole or in -part contains or is derived from the Program or any part thereof, to be licensed -as a whole at no charge to all third parties under the terms of this License. - -c) If the modified program normally reads commands interactively when run, you -must cause it, when started running for such interactive use in the most ordinary -way, to print or display an announcement including an appropriate copyright -notice and a notice that there is no warranty (or else, saying that you provide a -warranty) and that users may redistribute the program under these conditions, -and telling the user how to view a copy of this License. (Exception: if the -Program itself is interactive but does not normally print such an announcement, -your work based on the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If identifiable -sections of that work are not derived from the Program, and can be reasonably -considered independent and separate works in themselves, then this License, -and its terms, do not apply to those sections when you distribute them as -separate works. But when you distribute the same sections as part of a whole -which is a work based on the Program, the distribution of the whole must be on -the terms of this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest your rights to -work written entirely by you; rather, the intent is to exercise the right to control -the distribution of derivative or collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program with the -Program (or with a work based on the Program) on a volume of a storage or -distribution medium does not bring the other work under the scope of this -License. - -3. You may copy and distribute the Program (or a work based on it, under -Section 2) in object code or executable form under the terms of Sections 1 and 2 -above provided that you also do one of the following: - -a) Accompany it with the complete corresponding machine-readable source -code, which must be distributed under the terms of Sections 1 and 2 above on a -medium customarily used for software interchange; or, - -b) Accompany it with a written offer, valid for at least three years, to give any -third party, for a charge no more than your cost of physically performing source -distribution, a complete machine-readable copy of the corresponding source -code, to be distributed under the terms of Sections 1 and 2 above on a medium -customarily used for software interchange; or, - -c) Accompany it with the information you received as to the offer to distribute -corresponding source code. (This alternative is allowed only for noncommercial -distribution and only if you received the program in object code or executable -form with such an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for making -modifications to it. For an executable work, complete source code means all the -source code for all modules it contains, plus any associated interface definition -files, plus the scripts used to control compilation and installation of the -executable. However, as a special exception, the source code distributed need -not include anything that is normally distributed (in either source or binary form) -with the major components (compiler, kernel, and so on) of the operating system -on which the executable runs, unless that component itself accompanies the -executable. - -If distribution of executable or object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the source -code from the same place counts as distribution of the source code, even though -third parties are not compelled to copy the source along with the object code. - -4. You may not copy, modify, sublicense, or distribute the Program except as -expressly provided under this License. Any attempt otherwise to copy, modify, -sublicense or distribute the Program is void, and will automatically terminate -your rights under this License. However, parties who have received copies, or -rights, from you under this License will not have their licenses terminated so long -as such parties remain in full compliance. - -5. You are not required to accept this License, since you have not signed it. -However, nothing else grants you permission to modify or distribute the Program -or its derivative works. These actions are prohibited by law if you do not accept -this License. Therefore, by modifying or distributing the Program (or any work -based on the Program), you indicate your acceptance of this License to do so, -and all its terms and conditions for copying, distributing or modifying the -Program or works based on it. - -6. Each time you redistribute the Program (or any work based on the Program), -the recipient automatically receives a license from the original licensor to copy, -distribute or modify the Program subject to these terms and conditions. You -may not impose any further restrictions on the recipients' exercise of the rights -granted herein. You are not responsible for enforcing compliance by third parties -to this License. - -7. If, as a consequence of a court judgment or allegation of patent infringement -or for any other reason (not limited to patent issues), conditions are imposed on -you (whether by court order, agreement or otherwise) that contradict the -conditions of this License, they do not excuse you from the conditions of this -License. If you cannot distribute so as to satisfy simultaneously your obligations -under this License and any other pertinent obligations, then as a consequence -you may not distribute the Program at all. For example, if a patent license would -not permit royalty-free redistribution of the Program by all those who receive -copies directly or indirectly through you, then the only way you could satisfy -both it and this License would be to refrain entirely from distribution of the -Program. - -If any portion of this section is held invalid or unenforceable under any particular -circumstance, the balance of the section is intended to apply and the section as -a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any patents or other -property right claims or to contest validity of any such claims; this section has -the sole purpose of protecting the integrity of the free software distribution -system, which is implemented by public license practices. Many people have -made generous contributions to the wide range of software distributed through -that system in reliance on consistent application of that system; it is up to the -author/donor to decide if he or she is willing to distribute software through any -other system and a licensee cannot impose that choice. - -This section is intended to make thoroughly clear what is believed to be a -consequence of the rest of this License. - -8. If the distribution and/or use of the Program is restricted in certain countries -either by patents or by copyrighted interfaces, the original copyright holder who -places the Program under this License may add an explicit geographical -distribution limitation excluding those countries, so that distribution is permitted -only in or among countries not thus excluded. In such case, this License -incorporates the limitation as if written in the body of this License. - -9. The Free Software Foundation may publish revised and/or new versions of the -General Public License from time to time. Such new versions will be similar in -spirit to the present version, but may differ in detail to address new problems or -concerns. - -Each version is given a distinguishing version number. If the Program specifies a -version number of this License which applies to it and "any later version", you -have the option of following the terms and conditions either of that version or of -any later version published by the Free Software Foundation. If the Program does -not specify a version number of this License, you may choose any version ever -published by the Free Software Foundation. - -10. If you wish to incorporate parts of the Program into other free programs -whose distribution conditions are different, write to the author to ask for -permission. For software which is copyrighted by the Free Software Foundation, -write to the Free Software Foundation; we sometimes make exceptions for this. -Our decision will be guided by the two goals of preserving the free status of all -derivatives of our free software and of promoting the sharing and reuse of -software generally. - -NO WARRANTY - -11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS -NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE -COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM -"AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR -IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE -ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE -PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, -YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR -CORRECTION. - -12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED -TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY -WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS -PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES -ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM -(INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY -OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS -BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - -END OF TERMS AND CONDITIONS - - ---------------------------------------------------------------------------- - -The Artistic License - -Preamble - -The intent of this document is to state the conditions under which a Package -may be copied, such that the Copyright Holder maintains some semblance of -artistic control over the development of the package, while giving the users of the -package the right to use and distribute the Package in a more-or-less customary -fashion, plus the right to make reasonable modifications. - -Definitions: - -- "Package" refers to the collection of files distributed by the Copyright - Holder, and derivatives of that collection of files created through textual - modification. -- "Standard Version" refers to such a Package if it has not been modified, - or has been modified in accordance with the wishes of the Copyright - Holder. -- "Copyright Holder" is whoever is named in the copyright or copyrights for - the package. -- "You" is you, if you're thinking about copying or distributing this Package. -- "Reasonable copying fee" is whatever you can justify on the basis of - media cost, duplication charges, time of people involved, and so on. (You - will not be required to justify it to the Copyright Holder, but only to the - computing community at large as a market that must bear the fee.) -- "Freely Available" means that no fee is charged for the item itself, though - there may be fees involved in handling the item. It also means that - recipients of the item may redistribute it under the same conditions they - received it. - -1. You may make and give away verbatim copies of the source form of the -Standard Version of this Package without restriction, provided that you duplicate -all of the original copyright notices and associated disclaimers. - -2. You may apply bug fixes, portability fixes and other modifications derived from -the Public Domain or from the Copyright Holder. A Package modified in such a -way shall still be considered the Standard Version. - -3. You may otherwise modify your copy of this Package in any way, provided -that you insert a prominent notice in each changed file stating how and when -you changed that file, and provided that you do at least ONE of the following: - - a) place your modifications in the Public Domain or otherwise - make them Freely Available, such as by posting said modifications - to Usenet or an equivalent medium, or placing the modifications on - a major archive site such as ftp.uu.net, or by allowing the - Copyright Holder to include your modifications in the Standard - Version of the Package. - - b) use the modified Package only within your corporation or - organization. - - c) rename any non-standard executables so the names do not - conflict with standard executables, which must also be provided, - and provide a separate manual page for each non-standard - executable that clearly documents how it differs from the Standard - Version. - - d) make other distribution arrangements with the Copyright Holder. - -4. You may distribute the programs of this Package in object code or executable -form, provided that you do at least ONE of the following: - - a) distribute a Standard Version of the executables and library - files, together with instructions (in the manual page or equivalent) - on where to get the Standard Version. - - b) accompany the distribution with the machine-readable source of - the Package with your modifications. - - c) accompany any non-standard executables with their - corresponding Standard Version executables, giving the - non-standard executables non-standard names, and clearly - documenting the differences in manual pages (or equivalent), - together with instructions on where to get the Standard Version. - - d) make other distribution arrangements with the Copyright Holder. - -5. You may charge a reasonable copying fee for any distribution of this Package. -You may charge any fee you choose for support of this Package. You may not -charge a fee for this Package itself. However, you may distribute this Package in -aggregate with other (possibly commercial) programs as part of a larger -(possibly commercial) software distribution provided that you do not advertise -this Package as a product of your own. - -6. The scripts and library files supplied as input to or produced as output from -the programs of this Package do not automatically fall under the copyright of this -Package, but belong to whomever generated them, and may be sold -commercially, and may be aggregated with this Package. - -7. C or perl subroutines supplied by you and linked into this Package shall not -be considered part of this Package. - -8. Aggregation of this Package with a commercial distribution is always permitted -provided that the use of this Package is embedded; that is, when no overt attempt -is made to make this Package's interfaces visible to the end user of the -commercial distribution. Such use shall not be construed as a distribution of -this Package. - -9. The name of the Copyright Holder may not be used to endorse or promote -products derived from this software without specific prior written permission. - -10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED -WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR -PURPOSE. - -The End - - +Terms of Perl itself + +a) the GNU General Public License as published by the Free + Software Foundation; either version 1, or (at your option) any + later version, or +b) the "Artistic License" + +--------------------------------------------------------------------------- + +The General Public License (GPL) +Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. 675 Mass Ave, +Cambridge, MA 02139, USA. Everyone is permitted to copy and distribute +verbatim copies of this license document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to share +and change it. By contrast, the GNU General Public License is intended to +guarantee your freedom to share and change free software--to make sure the +software is free for all its users. This General Public License applies to most of +the Free Software Foundation's software and to any other program whose +authors commit to using it. (Some other Free Software Foundation software is +covered by the GNU Library General Public License instead.) You can apply it to +your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom +to distribute copies of free software (and charge for this service if you wish), that +you receive source code or can get it if you want it, that you can change the +software or use pieces of it in new free programs; and that you know you can do +these things. + +To protect your rights, we need to make restrictions that forbid anyone to deny +you these rights or to ask you to surrender the rights. These restrictions +translate to certain responsibilities for you if you distribute copies of the +software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or for a +fee, you must give the recipients all the rights that you have. You must make +sure that they, too, receive or can get the source code. And you must show +them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) offer +you this license which gives you legal permission to copy, distribute and/or +modify the software. + +Also, for each author's protection and ours, we want to make certain that +everyone understands that there is no warranty for this free software. If the +software is modified by someone else and passed on, we want its recipients to +know that what they have is not the original, so that any problems introduced by +others will not reflect on the original authors' reputations. + +Finally, any free program is threatened constantly by software patents. We wish +to avoid the danger that redistributors of a free program will individually obtain +patent licenses, in effect making the program proprietary. To prevent this, we +have made it clear that any patent must be licensed for everyone's free use or +not licensed at all. + +The precise terms and conditions for copying, distribution and modification +follow. + +GNU GENERAL PUBLIC LICENSE +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND +MODIFICATION + +0. This License applies to any program or other work which contains a notice +placed by the copyright holder saying it may be distributed under the terms of +this General Public License. The "Program", below, refers to any such program +or work, and a "work based on the Program" means either the Program or any +derivative work under copyright law: that is to say, a work containing the +Program or a portion of it, either verbatim or with modifications and/or translated +into another language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not covered by +this License; they are outside its scope. The act of running the Program is not +restricted, and the output from the Program is covered only if its contents +constitute a work based on the Program (independent of having been made by +running the Program). Whether that is true depends on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code as +you receive it, in any medium, provided that you conspicuously and appropriately +publish on each copy an appropriate copyright notice and disclaimer of warranty; +keep intact all the notices that refer to this License and to the absence of any +warranty; and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and you may at +your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, thus +forming a work based on the Program, and copy and distribute such +modifications or work under the terms of Section 1 above, provided that you also +meet all of these conditions: + +a) You must cause the modified files to carry prominent notices stating that you +changed the files and the date of any change. + +b) You must cause any work that you distribute or publish, that in whole or in +part contains or is derived from the Program or any part thereof, to be licensed +as a whole at no charge to all third parties under the terms of this License. + +c) If the modified program normally reads commands interactively when run, you +must cause it, when started running for such interactive use in the most ordinary +way, to print or display an announcement including an appropriate copyright +notice and a notice that there is no warranty (or else, saying that you provide a +warranty) and that users may redistribute the program under these conditions, +and telling the user how to view a copy of this License. (Exception: if the +Program itself is interactive but does not normally print such an announcement, +your work based on the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If identifiable +sections of that work are not derived from the Program, and can be reasonably +considered independent and separate works in themselves, then this License, +and its terms, do not apply to those sections when you distribute them as +separate works. But when you distribute the same sections as part of a whole +which is a work based on the Program, the distribution of the whole must be on +the terms of this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest your rights to +work written entirely by you; rather, the intent is to exercise the right to control +the distribution of derivative or collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program with the +Program (or with a work based on the Program) on a volume of a storage or +distribution medium does not bring the other work under the scope of this +License. + +3. You may copy and distribute the Program (or a work based on it, under +Section 2) in object code or executable form under the terms of Sections 1 and 2 +above provided that you also do one of the following: + +a) Accompany it with the complete corresponding machine-readable source +code, which must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange; or, + +b) Accompany it with a written offer, valid for at least three years, to give any +third party, for a charge no more than your cost of physically performing source +distribution, a complete machine-readable copy of the corresponding source +code, to be distributed under the terms of Sections 1 and 2 above on a medium +customarily used for software interchange; or, + +c) Accompany it with the information you received as to the offer to distribute +corresponding source code. (This alternative is allowed only for noncommercial +distribution and only if you received the program in object code or executable +form with such an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for making +modifications to it. For an executable work, complete source code means all the +source code for all modules it contains, plus any associated interface definition +files, plus the scripts used to control compilation and installation of the +executable. However, as a special exception, the source code distributed need +not include anything that is normally distributed (in either source or binary form) +with the major components (compiler, kernel, and so on) of the operating system +on which the executable runs, unless that component itself accompanies the +executable. + +If distribution of executable or object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the source +code from the same place counts as distribution of the source code, even though +third parties are not compelled to copy the source along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as +expressly provided under this License. Any attempt otherwise to copy, modify, +sublicense or distribute the Program is void, and will automatically terminate +your rights under this License. However, parties who have received copies, or +rights, from you under this License will not have their licenses terminated so long +as such parties remain in full compliance. + +5. You are not required to accept this License, since you have not signed it. +However, nothing else grants you permission to modify or distribute the Program +or its derivative works. These actions are prohibited by law if you do not accept +this License. Therefore, by modifying or distributing the Program (or any work +based on the Program), you indicate your acceptance of this License to do so, +and all its terms and conditions for copying, distributing or modifying the +Program or works based on it. + +6. Each time you redistribute the Program (or any work based on the Program), +the recipient automatically receives a license from the original licensor to copy, +distribute or modify the Program subject to these terms and conditions. You +may not impose any further restrictions on the recipients' exercise of the rights +granted herein. You are not responsible for enforcing compliance by third parties +to this License. + +7. If, as a consequence of a court judgment or allegation of patent infringement +or for any other reason (not limited to patent issues), conditions are imposed on +you (whether by court order, agreement or otherwise) that contradict the +conditions of this License, they do not excuse you from the conditions of this +License. If you cannot distribute so as to satisfy simultaneously your obligations +under this License and any other pertinent obligations, then as a consequence +you may not distribute the Program at all. For example, if a patent license would +not permit royalty-free redistribution of the Program by all those who receive +copies directly or indirectly through you, then the only way you could satisfy +both it and this License would be to refrain entirely from distribution of the +Program. + +If any portion of this section is held invalid or unenforceable under any particular +circumstance, the balance of the section is intended to apply and the section as +a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any patents or other +property right claims or to contest validity of any such claims; this section has +the sole purpose of protecting the integrity of the free software distribution +system, which is implemented by public license practices. Many people have +made generous contributions to the wide range of software distributed through +that system in reliance on consistent application of that system; it is up to the +author/donor to decide if he or she is willing to distribute software through any +other system and a licensee cannot impose that choice. + +This section is intended to make thoroughly clear what is believed to be a +consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain countries +either by patents or by copyrighted interfaces, the original copyright holder who +places the Program under this License may add an explicit geographical +distribution limitation excluding those countries, so that distribution is permitted +only in or among countries not thus excluded. In such case, this License +incorporates the limitation as if written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of the +General Public License from time to time. Such new versions will be similar in +spirit to the present version, but may differ in detail to address new problems or +concerns. + +Each version is given a distinguishing version number. If the Program specifies a +version number of this License which applies to it and "any later version", you +have the option of following the terms and conditions either of that version or of +any later version published by the Free Software Foundation. If the Program does +not specify a version number of this License, you may choose any version ever +published by the Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs +whose distribution conditions are different, write to the author to ask for +permission. For software which is copyrighted by the Free Software Foundation, +write to the Free Software Foundation; we sometimes make exceptions for this. +Our decision will be guided by the two goals of preserving the free status of all +derivatives of our free software and of promoting the sharing and reuse of +software generally. + +NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS +NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE +COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM +"AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR +IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE +ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, +YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR +CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED +TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY +WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS +PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM +(INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY +OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS +BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + + +--------------------------------------------------------------------------- + +The Artistic License + +Preamble + +The intent of this document is to state the conditions under which a Package +may be copied, such that the Copyright Holder maintains some semblance of +artistic control over the development of the package, while giving the users of the +package the right to use and distribute the Package in a more-or-less customary +fashion, plus the right to make reasonable modifications. + +Definitions: + +- "Package" refers to the collection of files distributed by the Copyright + Holder, and derivatives of that collection of files created through textual + modification. +- "Standard Version" refers to such a Package if it has not been modified, + or has been modified in accordance with the wishes of the Copyright + Holder. +- "Copyright Holder" is whoever is named in the copyright or copyrights for + the package. +- "You" is you, if you're thinking about copying or distributing this Package. +- "Reasonable copying fee" is whatever you can justify on the basis of + media cost, duplication charges, time of people involved, and so on. (You + will not be required to justify it to the Copyright Holder, but only to the + computing community at large as a market that must bear the fee.) +- "Freely Available" means that no fee is charged for the item itself, though + there may be fees involved in handling the item. It also means that + recipients of the item may redistribute it under the same conditions they + received it. + +1. You may make and give away verbatim copies of the source form of the +Standard Version of this Package without restriction, provided that you duplicate +all of the original copyright notices and associated disclaimers. + +2. You may apply bug fixes, portability fixes and other modifications derived from +the Public Domain or from the Copyright Holder. A Package modified in such a +way shall still be considered the Standard Version. + +3. You may otherwise modify your copy of this Package in any way, provided +that you insert a prominent notice in each changed file stating how and when +you changed that file, and provided that you do at least ONE of the following: + + a) place your modifications in the Public Domain or otherwise + make them Freely Available, such as by posting said modifications + to Usenet or an equivalent medium, or placing the modifications on + a major archive site such as ftp.uu.net, or by allowing the + Copyright Holder to include your modifications in the Standard + Version of the Package. + + b) use the modified Package only within your corporation or + organization. + + c) rename any non-standard executables so the names do not + conflict with standard executables, which must also be provided, + and provide a separate manual page for each non-standard + executable that clearly documents how it differs from the Standard + Version. + + d) make other distribution arrangements with the Copyright Holder. + +4. You may distribute the programs of this Package in object code or executable +form, provided that you do at least ONE of the following: + + a) distribute a Standard Version of the executables and library + files, together with instructions (in the manual page or equivalent) + on where to get the Standard Version. + + b) accompany the distribution with the machine-readable source of + the Package with your modifications. + + c) accompany any non-standard executables with their + corresponding Standard Version executables, giving the + non-standard executables non-standard names, and clearly + documenting the differences in manual pages (or equivalent), + together with instructions on where to get the Standard Version. + + d) make other distribution arrangements with the Copyright Holder. + +5. You may charge a reasonable copying fee for any distribution of this Package. +You may charge any fee you choose for support of this Package. You may not +charge a fee for this Package itself. However, you may distribute this Package in +aggregate with other (possibly commercial) programs as part of a larger +(possibly commercial) software distribution provided that you do not advertise +this Package as a product of your own. + +6. The scripts and library files supplied as input to or produced as output from +the programs of this Package do not automatically fall under the copyright of this +Package, but belong to whomever generated them, and may be sold +commercially, and may be aggregated with this Package. + +7. C or perl subroutines supplied by you and linked into this Package shall not +be considered part of this Package. + +8. Aggregation of this Package with a commercial distribution is always permitted +provided that the use of this Package is embedded; that is, when no overt attempt +is made to make this Package's interfaces visible to the end user of the +commercial distribution. Such use shall not be construed as a distribution of +this Package. + +9. The name of the Copyright Holder may not be used to endorse or promote +products derived from this software without specific prior written permission. + +10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED +WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR +PURPOSE. + +The End + + diff --git a/MANIFEST b/MANIFEST index beda636..74008fc 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,15 +1,15 @@ -Changes -LICENSE -Makefile.PL -MANIFEST -MANIFEST.SKIP -README -Scrubber.pm -t/01_use.t -t/02_basic.t -t/03_more.t -t/04_style_script.t -t/05_pi_comment.t -t/06_scrub_file.t -t/07_booleans.t -META.yml Module meta-data (added by MakeMaker) +Changes +LICENSE +Makefile.PL +MANIFEST +MANIFEST.SKIP +README +Scrubber.pm +t/01_use.t +t/02_basic.t +t/03_more.t +t/04_style_script.t +t/05_pi_comment.t +t/06_scrub_file.t +t/07_booleans.t +META.yml Module meta-data (added by MakeMaker) diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP index 9abcdc4..f16ac2a 100644 --- a/MANIFEST.SKIP +++ b/MANIFEST.SKIP @@ -1,30 +1,30 @@ -^MANIFEST\. -^Makefile$ -^blib/ -^MakeMaker-\d -^pm_to_blib$ -\.def$ -\.bs$ -\.o$ -\.obj$ -\.def$ -\.old$ -\.c$ -\.lib$ -\.exe$ -\.la$ -\.a$ -\.lnk$ -\.lai$ -\.lo$ -\.log$ -\.i$ -\.s$ -\.tar$ -\.gz$ -\.zip$ -\.htm$ -\.html$ -\.xsc$ -^pod2htm +^MANIFEST\. +^Makefile$ +^blib/ +^MakeMaker-\d +^pm_to_blib$ +\.def$ +\.bs$ +\.o$ +\.obj$ +\.def$ +\.old$ +\.c$ +\.lib$ +\.exe$ +\.la$ +\.a$ +\.lnk$ +\.lai$ +\.lo$ +\.log$ +\.i$ +\.s$ +\.tar$ +\.gz$ +\.zip$ +\.htm$ +\.html$ +\.xsc$ +^pod2htm \.t$ \ No newline at end of file diff --git a/META.yml b/META.yml index d2ddbc7..4350ead 100644 --- a/META.yml +++ b/META.yml @@ -1,13 +1,13 @@ -# http://module-build.sourceforge.net/META-spec.html -#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# -name: HTML-Scrubber -version: 0.08 -version_from: Scrubber.pm -installdirs: site -requires: - HTML::Parser: 3 - Test: 0 - Test::More: 0 - -distribution_type: module -generated_by: ExtUtils::MakeMaker version 6.21 +# http://module-build.sourceforge.net/META-spec.html +#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# +name: HTML-Scrubber +version: 0.08 +version_from: Scrubber.pm +installdirs: site +requires: + HTML::Parser: 3 + Test: 0 + Test::More: 0 + +distribution_type: module +generated_by: ExtUtils::MakeMaker version 6.21 diff --git a/Makefile.PL b/Makefile.PL index 2ea6e49..be5b5e8 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -1,22 +1,22 @@ -use ExtUtils::MakeMaker; -# See lib/ExtUtils/MakeMaker.pm for details of how to influence -# the contents of the Makefile that is written. -WriteMakefile( - 'NAME' => 'HTML::Scrubber', - 'VERSION_FROM' => 'Scrubber.pm', # finds $VERSION - 'PREREQ_FATAL' => 1, - 'PREREQ_PM' => { - 'HTML::Parser' => 3, - 'Test' => 0, - 'Test::More' => 0, - }, # e.g., Module::Name => 1.1 - ($] >= 5.005 ? ## Add these new keywords supported since 5.005 - (ABSTRACT_FROM => 'Scrubber.pm', # retrieve abstract from module - AUTHOR => 'D. H. aka PodMaster') : ()), -); - -__END__ -perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake disttest -nmake dist TAR=ptar -chmod 7777 *.gz -perl -le" `cpan-upload $_` for( (sort glob q,*.gz,)[-1]) " +use ExtUtils::MakeMaker; +# See lib/ExtUtils/MakeMaker.pm for details of how to influence +# the contents of the Makefile that is written. +WriteMakefile( + 'NAME' => 'HTML::Scrubber', + 'VERSION_FROM' => 'Scrubber.pm', # finds $VERSION + 'PREREQ_FATAL' => 1, + 'PREREQ_PM' => { + 'HTML::Parser' => 3, + 'Test' => 0, + 'Test::More' => 0, + }, # e.g., Module::Name => 1.1 + ($] >= 5.005 ? ## Add these new keywords supported since 5.005 + (ABSTRACT_FROM => 'Scrubber.pm', # retrieve abstract from module + AUTHOR => 'D. H. aka PodMaster') : ()), +); + +__END__ +perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake disttest +nmake dist TAR=ptar +chmod 7777 *.gz +perl -le" `cpan-upload $_` for( (sort glob q,*.gz,)[-1]) " diff --git a/README b/README index 5232f41..9255bc7 100644 --- a/README +++ b/README @@ -1,26 +1,26 @@ -HTML/Scrubber -========================== - -INSTALLATION - -To install this module type the following: - - perl Makefile.PL - make - make test - make install - -DEPENDENCIES - -This module requires these other modules and libraries: - - HTML::Parser - -COPYRIGHT AND LICENCE - -Copyright (C) 2003 D.H. aka PodMaster - -This library is free software; you can redistribute it and/or modify -it under the same terms as Perl itself. -The LICENSE file contains the full text of the license. - +HTML/Scrubber +========================== + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires these other modules and libraries: + + HTML::Parser + +COPYRIGHT AND LICENCE + +Copyright (C) 2003 D.H. aka PodMaster + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. +The LICENSE file contains the full text of the license. + diff --git a/Scrubber.pm b/Scrubber.pm index a6d990c..c287393 100644 --- a/Scrubber.pm +++ b/Scrubber.pm @@ -1,760 +1,760 @@ - -=head1 NAME - -HTML::Scrubber - Perl extension for scrubbing/sanitizing html - -=head1 SYNOPSIS - -=for example begin - - #!/usr/bin/perl -w - use HTML::Scrubber; - use strict; - # - my $html = q[ - <style type="text/css"> BAD { background: #666; color: #666;} </style> - <script language="javascript"> alert("Hello, I am EVIL!"); </script> - <HR> - a => <a href=1>link </a> - br => <br> - b => <B> bold </B> - u => <U> UNDERLINE </U> - ]; - # - my $scrubber = HTML::Scrubber->new( allow => [ qw[ p b i u hr br ] ] ); # - # - print $scrubber->scrub($html); # - # - $scrubber->deny( qw[ p b i u hr br ] ); # - # - print $scrubber->scrub($html); # - # - - -=for example end - -=head1 DESCRIPTION - -If you wanna "scrub" or "sanitize" html input -in a reliable an flexible fashion, -then this module is for you. - -I wasn't satisfied with HTML::Sanitizer because it is -based on HTML::TreeBuilder, -so I thought I'd write something similar -that works directly with HTML::Parser. - -=head1 METHODS - -First a note on documentation: just study the L<EXAMPLE|"EXAMPLE"> below. -It's all the documentation you could need - -Also, be sure to read all the comments as well as -L<How does it work?|"How does it work?">. - -If you're new to perl, good luck to you. - -=cut - -package HTML::Scrubber; -use HTML::Parser(); -use HTML::Entities; -use vars qw[ $VERSION @_scrub @_scrub_fh ]; -use strict; - -$VERSION = '0.08'; - -# my my my my, these here to prevent foolishness like -# http://perlmonks.org/index.pl?node_id=251127#Stealing+Lexicals -(@_scrub )= ( \&_scrub, "self, event, tagname, attr, attrseq, text"); -(@_scrub_fh )= ( \&_scrub_fh, "self, event, tagname, attr, attrseq, text"); - -sub new { - my $package = shift; - my $p = HTML::Parser->new( - api_version => 3, - default_h => \@_scrub, - marked_sections => 0, - strict_comment => 0, - unbroken_text => 1, - case_sensitive => 0, - boolean_attribute_value => undef, - ); - - my $self = { - _p => $p, - _rules => { - '*' => 0, - }, - _comment => 0, - _process => 0, - _r => "", - _optimize => 1, - _script => 0, - _style => 0, - }; - - $p->{"\0_s"} = bless $self, $package; - - return $self unless @_; - - my(%args)= @_; - - for my $f( qw[ default allow deny rules process comment ] ) { - next unless exists $args{$f}; - if( ref $args{$f} ) { - $self->$f( @{ $args{$f} } ) ; - } else { - $self->$f( $args{$f} ) ; - } - } - - return $self; -} - -=head2 comment - - warn "comments are ", $p->comment ? 'allowed' : 'not allowed'; - $p->comment(0); # off by default - -=cut - -sub comment { - return - $_[0]->{_comment} - if @_ == 1; - $_[0]->{_comment} = $_[1]; - return; -} - -=head2 process - - warn "process instructions are ", $p->process ? 'allowed' : 'not allowed'; - $p->process(0); # off by default - -=cut - - -sub process { - return - $_[0]->{_process} - if @_ == 1; - $_[0]->{_process} = $_[1]; - return; -} - - -=head2 script - - warn "script tags (and everything in between) are supressed" - if $p->script; # off by default - $p->script( 0 || 1 ); - -B<**> Please note that this is implemented -using HTML::Parser's ignore_elements function, -so if C<script> is set to true, -all script tags encountered will be validated like all other tags. - -=cut - -sub script { - return - $_[0]->{_script} - if @_ == 1; - $_[0]->{_script} = $_[1]; - return; -} - -=head2 style - - warn "style tags (and everything in between) are supressed" - if $p->style; # off by default - $p->style( 0 || 1 ); - -B<**> Please note that this is implemented -using HTML::Parser's ignore_elements function, -so if C<style> is set to true, -all style tags encountered will be validated like all other tags. - -=cut - -sub style { - return - $_[0]->{_style} - if @_ == 1; - $_[0]->{_style} = $_[1]; - return; -} - -=head2 allow - - $p->allow(qw[ t a g s ]); - -=cut - -sub allow { - my $self = shift; - for my $k(@_){ - $self->{_rules}{lc $k}=1; - } - $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse - - return; -} - - -=head2 deny - - $p->deny(qw[ t a g s ]); - -=cut - -sub deny { - my $self = shift; - - for my $k(@_){ - $self->{_rules}{lc $k} = 0; - } - - $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse - - return; -} - -=head2 rules - - $p->rules( - img => { - src => qr{^(?!http://)}i, # only relative image links allowed - alt => 1, # alt attribute allowed - '*' => 0, # deny all other attributes - }, - b => 1, - ... - ); - -=cut - -sub rules{ - my $self = shift; - my(%rules)= @_; - for my $k(keys %rules) { - $self->{_rules}{lc $k} = $rules{$k}; - } - - $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse - - return; -} - -=head2 default - - print "default is ", $p->default(); - $p->default(1); # allow tags by default - $p->default( - undef, # don't change - { # default attribute rules - '*' => 1, # allow attributes by default - } - ); - -=cut - -sub default { - return - $_[0]->{_rules}{'*'} - if @_ == 1; - - $_[0]->{_rules}{'*'} = $_[1] if defined $_[1]; - $_[0]->{_rules}{'_'} = $_[2] if defined $_[2] and ref $_[2]; - $_[0]->{_optimize} = 1; # each time a rule changes, reoptimize when parse - - return; -} - -=head2 scrub_file - - $html = $scrubber->scrub_file('foo.html'); ## returns giant string - die "Eeek $!" unless defined $html; ## opening foo.html may have failed - $scrubber->scrub_file('foo.html', 'new.html') or die "Eeek $!"; - $scrubber->scrub_file('foo.html', *STDOUT) - or die "Eeek $!" - if fileno STDOUT; - -=cut - -sub scrub_file { - if(@_ > 2){ - return unless defined $_[0]->_out($_[2]); - } else { - $_[0]->{_p}->handler( default => @_scrub ); - } - - $_[0]->_optimize() ;#if $_[0]->{_optimize}; - - $_[0]->{_p}->parse_file($_[1]); - - return delete $_[0]->{_r} unless exists $_[0]->{_out}; - delete $_[0]->{_out}; - return 1; -} - -=head2 scrub - - print $scrubber->scrub($html); ## returns giant string - $scrubber->scrub($html, 'new.html') or die "Eeek $!"; - $scrubber->scrub($html', *STDOUT) - or die "Eeek $!" - if fileno STDOUT; - - -=cut - -sub scrub { - if(@_ > 2){ - return unless defined $_[0]->_out($_[2]); - } else { - $_[0]->{_p}->handler( default => @_scrub ); - } - - $_[0]->_optimize();# if $_[0]->{_optimize}; - - $_[0]->{_p}->parse($_[1]); - $_[0]->{_p}->eof(); - - return delete $_[0]->{_r} unless exists $_[0]->{_out}; - delete $_[0]->{_out}; - return 1; -} - - -=for comment _out - $scrubber->_out(*STDOUT) if fileno STDOUT; - $scrubber->_out('foo.html') or die "eeek $!"; - -=cut - -sub _out { - my($self, $o ) = @_; - - unless( ref $o and ref \$o ne 'GLOB') { - local *F; - open F, ">$o" or return undef; - binmode F; - $self->{_out} = *F; - } else { - $self->{_out} = $o; - } - - $self->{_p}->handler( default => @_scrub_fh ); - - return 1; -} - - -=for comment _validate -Uses $self->{_rules} to do attribute validation. -Takes tag, rule('_' || $tag), attrref. - -=cut - -sub _validate { - my($s, $t, $r, $a, $as) = @_; - return "<$t>" unless %$a; - - $r = $s->{_rules}->{$r}; - my %f; - - for my $k( keys %$a ) { - if( exists $r->{$k} ) { - if( ref $r->{$k} || length($r->{$k}) > 1 ) { - $f{$k} = $a->{$k} if $a->{$k} =~ m{$r->{$k}}; - } elsif( $r->{$k} ) { - $f{$k} = $a->{$k}; - } - } elsif( exists $r->{'*'} and $r->{'*'} ) { - $f{$k} = $a->{$k}; - } - } - - if( %f ){ - my %seen; - return "<$t $r>" - if $r = join ' ', - map { - defined $f{$_} - ? qq[$_="].encode_entities($f{$_}).q["] - : $_; # boolean attribute (TODO?) - } grep { - exists $f{$_} and !$seen{$_}++; - } @$as; - } - - return "<$t>"; -} - -=for comment _scrub_fh -I<default> handler, does the scrubbing if we're scrubbing out to a file. - -=cut - -sub _scrub_fh { - my( $p, $e, $t, $a, $as, $text ) = @_; - my $s = $p->{"\0_s"} ; - - if ( $e eq 'start' ) - { - if( exists $s->{_rules}->{$t} ) # is there a specific rule - { - if( ref $s->{_rules}->{$t} ) # is it complicated?(not simple;) - { - print - {$s->{_out}} - $s->_validate($t, $t, $a, $as); - } - elsif( $s->{_rules}->{$t} ) # validate using default attribute rule - { - print - {$s->{_out}} - $s->_validate($t, '_', $a, $as); - } - } - elsif( $s->{_rules}->{'*'} ) # default allow tags - { - print - {$s->{_out}} - $s->_validate($t, '_', $a, $as); - } - } - elsif ( $e eq 'end' ) - { - if( exists $s->{_rules}->{$t} ) - { - print - {$s->{_out}} - "</$t>" - if $s->{_rules}->{$t}; - - } - elsif( $s->{_rules}->{'*'} ) - { - - print {$s->{_out}} "</$t>"; - } - } - elsif ( $e eq 'comment' ) - { - print - {$s->{_out}} - $text - if $s->{_comment}; - } - elsif ( $e eq 'process' ) - { - print - {$s->{_out}} - $text - if $s->{_process}; - } - elsif ( $e eq 'text' or $e eq 'default') - { - $text =~ s/</</g; #https://rt.cpan.org/Ticket/Attachment/8716/10332/scrubber.patch - $text =~ s/>/>/g; - - print - {$s->{_out}} - $text; - } -} - -=for comment _scrub -I<default> handler, does the scrubbing if we're returning a giant string. - -=cut - -sub _scrub { - my( $p, $e, $t, $a, $as, $text ) = @_; - my $s = $p->{"\0_s"} ; - - if ( $e eq 'start' ) - { - if( exists $s->{_rules}->{$t} ) # is there a specific rule - { - if( ref $s->{_rules}->{$t} ) # is it complicated?(not simple;) - { - $s->{_r} .= $s->_validate($t, $t, $a, $as); - } - elsif( $s->{_rules}->{$t} ) # validate using default attribute rule - { - $s->{_r} .= $s->_validate($t, '_', $a, $as); - } - } - elsif( $s->{_rules}->{'*'} ) # default allow tags - { - $s->{_r} .= $s->_validate($t, '_', $a, $as); - } - } - elsif ( $e eq 'end' ) - { - if( exists $s->{_rules}->{$t} ) - { - $s->{_r} .= "</$t>" if $s->{_rules}->{$t}; - } - elsif( $s->{_rules}->{'*'} ) - { - $s->{_r} .= "</$t>"; - } - } - elsif ( $e eq 'comment' ) - { - $s->{_r} .= $text if $s->{_comment}; - } - elsif ( $e eq 'process' ) - { - $s->{_r} .= $text if $s->{_process}; - } - elsif ( $e eq 'text' or $e eq 'default') - { - $text =~ s/</</g; #https://rt.cpan.org/Ticket/Attachment/8716/10332/scrubber.patch - $text =~ s/>/>/g; - - $s->{_r} .= $text; - } - elsif ( $e eq 'start_document' ) - { - $s->{_r} = ""; - } -} - -sub _optimize { - my($self) = @_; - - my( @ignore_elements ) = grep { not $self->{"_$_"} } qw(script style); - $self->{_p}->ignore_elements(@ignore_elements); # if @ is empty, we reset ;) - - return unless $self->{_optimize}; -#sub allow -# return unless $self->{_optimize}; # till I figure it out (huh) - - if( $self->{_rules}{'*'} ){ # default allow - $self->{_p}->report_tags(); # so clear it - } else { - - my(@reports) = - grep { # report only tags we want - $self->{_rules}{$_} - } keys %{ - $self->{_rules} - }; - - $self->{_p}->report_tags( # default deny, so optimize - @reports - ) if @reports; - } - -# sub deny -# return unless $self->{_optimize}; # till I figure it out (huh) - my(@ignores)= - grep { - not $self->{_rules}{$_} - } grep { - $_ ne '*' - } keys %{ - $self->{_rules} - }; - - $self->{_p}->ignore_tags( # always ignore stuff we don't want - @ignores - ) if @ignores; - - $self->{_optimize}=0; - return; -} - - -sub DESTROY { - delete $_[0]->{_p}->{"\0_s"}; # break circular reference -} -1; - -#print sprintf q[ '%-12s => %s,], "$_'", $h{$_} for sort keys %h;# perl! -#perl -ne"chomp;print $_;print qq'\t\t# test ', ++$a if /ok\(/;print $/" test.pl >test2.pl -#perl -ne"chomp;print $_;if( /ok\(/ ){s/\#test \d+$//;print qq'\t\t# test ', ++$a }print $/" test.pl >test2.pl -#perl -ne"chomp;if(/ok\(/){s/# test .*$//;print$_,qq'\t\t# test ',++$a}else{print$_}print$/" test.pl >test2.pl - -=head1 How does it work? - -When a tag is encountered, HTML::Scrubber -allows/denies the tag using the explicit rule if one exists. - -If no explicit rule exists, Scrubber applies the default rule. - -If an explicit rule exists, -but it's a simple rule(1), -the default attribute rule is applied. - -=head2 EXAMPLE - -=for example begin - - #!/usr/bin/perl -w - use HTML::Scrubber; - use strict; - # - my @allow = qw[ br hr b a ]; - # - my @rules = ( - script => 0, - img => { - src => qr{^(?!http://)}i, # only relative image links allowed - alt => 1, # alt attribute allowed - '*' => 0, # deny all other attributes - }, - ); - # - my @default = ( - 0 => # default rule, deny all tags - { - '*' => 1, # default rule, allow all attributes - 'href' => qr{^(?!(?:java)?script)}i, - 'src' => qr{^(?!(?:java)?script)}i, - # If your perl doesn't have qr - # just use a string with length greater than 1 - 'cite' => '(?i-xsm:^(?!(?:java)?script))', - 'language' => 0, - 'name' => 1, # could be sneaky, but hey ;) - 'onblur' => 0, - 'onchange' => 0, - 'onclick' => 0, - 'ondblclick' => 0, - 'onerror' => 0, - 'onfocus' => 0, - 'onkeydown' => 0, - 'onkeypress' => 0, - 'onkeyup' => 0, - 'onload' => 0, - 'onmousedown' => 0, - 'onmousemove' => 0, - 'onmouseout' => 0, - 'onmouseover' => 0, - 'onmouseup' => 0, - 'onreset' => 0, - 'onselect' => 0, - 'onsubmit' => 0, - 'onunload' => 0, - 'src' => 0, - 'type' => 0, - } - ); - # - my $scrubber = HTML::Scrubber->new(); - $scrubber->allow( @allow ); - $scrubber->rules( @rules ); # key/value pairs - $scrubber->default( @default ); - $scrubber->comment(1); # 1 allow, 0 deny - # - ## preferred way to create the same object - $scrubber = HTML::Scrubber->new( - allow => \@allow, - rules => \@rules, - default => \@default, - comment => 1, - process => 0, - ); - # - require Data::Dumper,die Data::Dumper::Dumper($scrubber) if @ARGV; - # - my $it = q[ - <?php echo(" EVIL EVIL EVIL "); ?> <!-- asdf --> - <hr> - <I FAKE="attribute" > IN ITALICS WITH FAKE="attribute" </I><br> - <B> IN BOLD </B><br> - <A NAME="evil"> - <A HREF="javascript:alert('die die die');">HREF=JAVA <!></A> - <br> - <A HREF="image/bigone.jpg" ONMOUSEOVER="alert('die die die');"> - <IMG SRC="image/smallone.jpg" ALT="ONMOUSEOVER JAVASCRIPT"> - </A> - </A> <br> - ]; - # - print "#original text",$/, $it, $/; - print - "#scrubbed text (default ", - $scrubber->default(), # no arguments returns the current value - " comment ", - $scrubber->comment(), - " process ", - $scrubber->process(), - " )", - $/, - $scrubber->scrub($it), - $/; - # - $scrubber->default(1); # allow all tags by default - $scrubber->comment(0); # deny comments - # - print - "#scrubbed text (default ", - $scrubber->default(), - " comment ", - $scrubber->comment(), - " process ", - $scrubber->process(), - " )", - $/, - $scrubber->scrub($it), - $/; - # - $scrubber->process(1); # allow process instructions (dangerous) - $default[0] = 1; # allow all tags by default - $default[1]->{'*'} = 0; # deny all attributes by default - $scrubber->default(@default); # set the default again - # - print - "#scrubbed text (default ", - $scrubber->default(), - " comment ", - $scrubber->comment(), - " process ", - $scrubber->process(), - " )", - $/, - $scrubber->scrub($it), - $/; - -=for example end - - -=head2 FUN - -If you have Test::Inline (and you've installed HTML::Scrubber), try - - pod2test Scrubber.pm >scrubber.t - perl scrubber.t - -=head1 SEE ALSO - -L<HTML::Parser>, L<Test::Inline>, L<HTML::Sanitizer>. - -=head1 BUGS/SUGGESTIONS/ETC - -Please use -https://rt.cpan.org/NoAuth/Bugs.html?Dist=HTML-Scrubber -to report I<bugs>/additions/etc -or send mail to <bug-HTML-Scrubber#rt.cpan.org>. - -=head1 AUTHOR - -D. H. (PodMaster) - -=head1 LICENSE - -Copyright (c) 2003-2004 by D.H. (PodMaster). All rights reserved. - -This module is free software; -you can redistribute it and/or modify it under -the same terms as Perl itself. -The LICENSE file contains the full text of the license. - -=cut + +=head1 NAME + +HTML::Scrubber - Perl extension for scrubbing/sanitizing html + +=head1 SYNOPSIS + +=for example begin + + #!/usr/bin/perl -w + use HTML::Scrubber; + use strict; + # + my $html = q[ + <style type="text/css"> BAD { background: #666; color: #666;} </style> + <script language="javascript"> alert("Hello, I am EVIL!"); </script> + <HR> + a => <a href=1>link </a> + br => <br> + b => <B> bold </B> + u => <U> UNDERLINE </U> + ]; + # + my $scrubber = HTML::Scrubber->new( allow => [ qw[ p b i u hr br ] ] ); # + # + print $scrubber->scrub($html); # + # + $scrubber->deny( qw[ p b i u hr br ] ); # + # + print $scrubber->scrub($html); # + # + + +=for example end + +=head1 DESCRIPTION + +If you wanna "scrub" or "sanitize" html input +in a reliable an flexible fashion, +then this module is for you. + +I wasn't satisfied with HTML::Sanitizer because it is +based on HTML::TreeBuilder, +so I thought I'd write something similar +that works directly with HTML::Parser. + +=head1 METHODS + +First a note on documentation: just study the L<EXAMPLE|"EXAMPLE"> below. +It's all the documentation you could need + +Also, be sure to read all the comments as well as +L<How does it work?|"How does it work?">. + +If you're new to perl, good luck to you. + +=cut + +package HTML::Scrubber; +use HTML::Parser(); +use HTML::Entities; +use vars qw[ $VERSION @_scrub @_scrub_fh ]; +use strict; + +$VERSION = '0.08'; + +# my my my my, these here to prevent foolishness like +# http://perlmonks.org/index.pl?node_id=251127#Stealing+Lexicals +(@_scrub )= ( \&_scrub, "self, event, tagname, attr, attrseq, text"); +(@_scrub_fh )= ( \&_scrub_fh, "self, event, tagname, attr, attrseq, text"); + +sub new { + my $package = shift; + my $p = HTML::Parser->new( + api_version => 3, + default_h => \@_scrub, + marked_sections => 0, + strict_comment => 0, + unbroken_text => 1, + case_sensitive => 0, + boolean_attribute_value => undef, + ); + + my $self = { + _p => $p, + _rules => { + '*' => 0, + }, + _comment => 0, + _process => 0, + _r => "", + _optimize => 1, + _script => 0, + _style => 0, + }; + + $p->{"\0_s"} = bless $self, $package; + + return $self unless @_; + + my(%args)= @_; + + for my $f( qw[ default allow deny rules process comment ] ) { + next unless exists $args{$f}; + if( ref $args{$f} ) { + $self->$f( @{ $args{$f} } ) ; + } else { + $self->$f( $args{$f} ) ; + } + } + + return $self; +} + +=head2 comment + + warn "comments are ", $p->comment ? 'allowed' : 'not allowed'; + $p->comment(0); # off by default + +=cut + +sub comment { + return + $_[0]->{_comment} + if @_ == 1; + $_[0]->{_comment} = $_[1]; + return; +} + +=head2 process + + warn "process instructions are ", $p->process ? 'allowed' : 'not allowed'; + $p->process(0); # off by default + +=cut + + +sub process { + return + $_[0]->{_process} + if @_ == 1; + $_[0]->{_process} = $_[1]; + return; +} + + +=head2 script + + warn "script tags (and everything in between) are supressed" + if $p->script; # off by default + $p->script( 0 || 1 ); + +B<**> Please note that this is implemented +using HTML::Parser's ignore_elements function, +so if C<script> is set to true, +all script tags encountered will be validated like all other tags. + +=cut + +sub script { + return + $_[0]->{_script} + if @_ == 1; + $_[0]->{_script} = $_[1]; + return; +} + +=head2 style + + warn "style tags (and everything in between) are supressed" + if $p->style; # off by default + $p->style( 0 || 1 ); + +B<**> Please note that this is implemented +using HTML::Parser's ignore_elements function, +so if C<style> is set to true, +all style tags encountered will be validated like all other tags. + +=cut + +sub style { + return + $_[0]->{_style} + if @_ == 1; + $_[0]->{_style} = $_[1]; + return; +} + +=head2 allow + + $p->allow(qw[ t a g s ]); + +=cut + +sub allow { + my $self = shift; + for my $k(@_){ + $self->{_rules}{lc $k}=1; + } + $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse + + return; +} + + +=head2 deny + + $p->deny(qw[ t a g s ]); + +=cut + +sub deny { + my $self = shift; + + for my $k(@_){ + $self->{_rules}{lc $k} = 0; + } + + $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse + + return; +} + +=head2 rules + + $p->rules( + img => { + src => qr{^(?!http://)}i, # only relative image links allowed + alt => 1, # alt attribute allowed + '*' => 0, # deny all other attributes + }, + b => 1, + ... + ); + +=cut + +sub rules{ + my $self = shift; + my(%rules)= @_; + for my $k(keys %rules) { + $self->{_rules}{lc $k} = $rules{$k}; + } + + $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse + + return; +} + +=head2 default + + print "default is ", $p->default(); + $p->default(1); # allow tags by default + $p->default( + undef, # don't change + { # default attribute rules + '*' => 1, # allow attributes by default + } + ); + +=cut + +sub default { + return + $_[0]->{_rules}{'*'} + if @_ == 1; + + $_[0]->{_rules}{'*'} = $_[1] if defined $_[1]; + $_[0]->{_rules}{'_'} = $_[2] if defined $_[2] and ref $_[2]; + $_[0]->{_optimize} = 1; # each time a rule changes, reoptimize when parse + + return; +} + +=head2 scrub_file + + $html = $scrubber->scrub_file('foo.html'); ## returns giant string + die "Eeek $!" unless defined $html; ## opening foo.html may have failed + $scrubber->scrub_file('foo.html', 'new.html') or die "Eeek $!"; + $scrubber->scrub_file('foo.html', *STDOUT) + or die "Eeek $!" + if fileno STDOUT; + +=cut + +sub scrub_file { + if(@_ > 2){ + return unless defined $_[0]->_out($_[2]); + } else { + $_[0]->{_p}->handler( default => @_scrub ); + } + + $_[0]->_optimize() ;#if $_[0]->{_optimize}; + + $_[0]->{_p}->parse_file($_[1]); + + return delete $_[0]->{_r} unless exists $_[0]->{_out}; + delete $_[0]->{_out}; + return 1; +} + +=head2 scrub + + print $scrubber->scrub($html); ## returns giant string + $scrubber->scrub($html, 'new.html') or die "Eeek $!"; + $scrubber->scrub($html', *STDOUT) + or die "Eeek $!" + if fileno STDOUT; + + +=cut + +sub scrub { + if(@_ > 2){ + return unless defined $_[0]->_out($_[2]); + } else { + $_[0]->{_p}->handler( default => @_scrub ); + } + + $_[0]->_optimize();# if $_[0]->{_optimize}; + + $_[0]->{_p}->parse($_[1]); + $_[0]->{_p}->eof(); + + return delete $_[0]->{_r} unless exists $_[0]->{_out}; + delete $_[0]->{_out}; + return 1; +} + + +=for comment _out + $scrubber->_out(*STDOUT) if fileno STDOUT; + $scrubber->_out('foo.html') or die "eeek $!"; + +=cut + +sub _out { + my($self, $o ) = @_; + + unless( ref $o and ref \$o ne 'GLOB') { + local *F; + open F, ">$o" or return undef; + binmode F; + $self->{_out} = *F; + } else { + $self->{_out} = $o; + } + + $self->{_p}->handler( default => @_scrub_fh ); + + return 1; +} + + +=for comment _validate +Uses $self->{_rules} to do attribute validation. +Takes tag, rule('_' || $tag), attrref. + +=cut + +sub _validate { + my($s, $t, $r, $a, $as) = @_; + return "<$t>" unless %$a; + + $r = $s->{_rules}->{$r}; + my %f; + + for my $k( keys %$a ) { + if( exists $r->{$k} ) { + if( ref $r->{$k} || length($r->{$k}) > 1 ) { + $f{$k} = $a->{$k} if $a->{$k} =~ m{$r->{$k}}; + } elsif( $r->{$k} ) { + $f{$k} = $a->{$k}; + } + } elsif( exists $r->{'*'} and $r->{'*'} ) { + $f{$k} = $a->{$k}; + } + } + + if( %f ){ + my %seen; + return "<$t $r>" + if $r = join ' ', + map { + defined $f{$_} + ? qq[$_="].encode_entities($f{$_}).q["] + : $_; # boolean attribute (TODO?) + } grep { + exists $f{$_} and !$seen{$_}++; + } @$as; + } + + return "<$t>"; +} + +=for comment _scrub_fh +I<default> handler, does the scrubbing if we're scrubbing out to a file. + +=cut + +sub _scrub_fh { + my( $p, $e, $t, $a, $as, $text ) = @_; + my $s = $p->{"\0_s"} ; + + if ( $e eq 'start' ) + { + if( exists $s->{_rules}->{$t} ) # is there a specific rule + { + if( ref $s->{_rules}->{$t} ) # is it complicated?(not simple;) + { + print + {$s->{_out}} + $s->_validate($t, $t, $a, $as); + } + elsif( $s->{_rules}->{$t} ) # validate using default attribute rule + { + print + {$s->{_out}} + $s->_validate($t, '_', $a, $as); + } + } + elsif( $s->{_rules}->{'*'} ) # default allow tags + { + print + {$s->{_out}} + $s->_validate($t, '_', $a, $as); + } + } + elsif ( $e eq 'end' ) + { + if( exists $s->{_rules}->{$t} ) + { + print + {$s->{_out}} + "</$t>" + if $s->{_rules}->{$t}; + + } + elsif( $s->{_rules}->{'*'} ) + { + + print {$s->{_out}} "</$t>"; + } + } + elsif ( $e eq 'comment' ) + { + print + {$s->{_out}} + $text + if $s->{_comment}; + } + elsif ( $e eq 'process' ) + { + print + {$s->{_out}} + $text + if $s->{_process}; + } + elsif ( $e eq 'text' or $e eq 'default') + { + $text =~ s/</</g; #https://rt.cpan.org/Ticket/Attachment/8716/10332/scrubber.patch + $text =~ s/>/>/g; + + print + {$s->{_out}} + $text; + } +} + +=for comment _scrub +I<default> handler, does the scrubbing if we're returning a giant string. + +=cut + +sub _scrub { + my( $p, $e, $t, $a, $as, $text ) = @_; + my $s = $p->{"\0_s"} ; + + if ( $e eq 'start' ) + { + if( exists $s->{_rules}->{$t} ) # is there a specific rule + { + if( ref $s->{_rules}->{$t} ) # is it complicated?(not simple;) + { + $s->{_r} .= $s->_validate($t, $t, $a, $as); + } + elsif( $s->{_rules}->{$t} ) # validate using default attribute rule + { + $s->{_r} .= $s->_validate($t, '_', $a, $as); + } + } + elsif( $s->{_rules}->{'*'} ) # default allow tags + { + $s->{_r} .= $s->_validate($t, '_', $a, $as); + } + } + elsif ( $e eq 'end' ) + { + if( exists $s->{_rules}->{$t} ) + { + $s->{_r} .= "</$t>" if $s->{_rules}->{$t}; + } + elsif( $s->{_rules}->{'*'} ) + { + $s->{_r} .= "</$t>"; + } + } + elsif ( $e eq 'comment' ) + { + $s->{_r} .= $text if $s->{_comment}; + } + elsif ( $e eq 'process' ) + { + $s->{_r} .= $text if $s->{_process}; + } + elsif ( $e eq 'text' or $e eq 'default') + { + $text =~ s/</</g; #https://rt.cpan.org/Ticket/Attachment/8716/10332/scrubber.patch + $text =~ s/>/>/g; + + $s->{_r} .= $text; + } + elsif ( $e eq 'start_document' ) + { + $s->{_r} = ""; + } +} + +sub _optimize { + my($self) = @_; + + my( @ignore_elements ) = grep { not $self->{"_$_"} } qw(script style); + $self->{_p}->ignore_elements(@ignore_elements); # if @ is empty, we reset ;) + + return unless $self->{_optimize}; +#sub allow +# return unless $self->{_optimize}; # till I figure it out (huh) + + if( $self->{_rules}{'*'} ){ # default allow + $self->{_p}->report_tags(); # so clear it + } else { + + my(@reports) = + grep { # report only tags we want + $self->{_rules}{$_} + } keys %{ + $self->{_rules} + }; + + $self->{_p}->report_tags( # default deny, so optimize + @reports + ) if @reports; + } + +# sub deny +# return unless $self->{_optimize}; # till I figure it out (huh) + my(@ignores)= + grep { + not $self->{_rules}{$_} + } grep { + $_ ne '*' + } keys %{ + $self->{_rules} + }; + + $self->{_p}->ignore_tags( # always ignore stuff we don't want + @ignores + ) if @ignores; + + $self->{_optimize}=0; + return; +} + + +sub DESTROY { + delete $_[0]->{_p}->{"\0_s"}; # break circular reference +} +1; + +#print sprintf q[ '%-12s => %s,], "$_'", $h{$_} for sort keys %h;# perl! +#perl -ne"chomp;print $_;print qq'\t\t# test ', ++$a if /ok\(/;print $/" test.pl >test2.pl +#perl -ne"chomp;print $_;if( /ok\(/ ){s/\#test \d+$//;print qq'\t\t# test ', ++$a }print $/" test.pl >test2.pl +#perl -ne"chomp;if(/ok\(/){s/# test .*$//;print$_,qq'\t\t# test ',++$a}else{print$_}print$/" test.pl >test2.pl + +=head1 How does it work? + +When a tag is encountered, HTML::Scrubber +allows/denies the tag using the explicit rule if one exists. + +If no explicit rule exists, Scrubber applies the default rule. + +If an explicit rule exists, +but it's a simple rule(1), +the default attribute rule is applied. + +=head2 EXAMPLE + +=for example begin + + #!/usr/bin/perl -w + use HTML::Scrubber; + use strict; + # + my @allow = qw[ br hr b a ]; + # + my @rules = ( + script => 0, + img => { + src => qr{^(?!http://)}i, # only relative image links allowed + alt => 1, # alt attribute allowed + '*' => 0, # deny all other attributes + }, + ); + # + my @default = ( + 0 => # default rule, deny all tags + { + '*' => 1, # default rule, allow all attributes + 'href' => qr{^(?!(?:java)?script)}i, + 'src' => qr{^(?!(?:java)?script)}i, + # If your perl doesn't have qr + # just use a string with length greater than 1 + 'cite' => '(?i-xsm:^(?!(?:java)?script))', + 'language' => 0, + 'name' => 1, # could be sneaky, but hey ;) + 'onblur' => 0, + 'onchange' => 0, + 'onclick' => 0, + 'ondblclick' => 0, + 'onerror' => 0, + 'onfocus' => 0, + 'onkeydown' => 0, + 'onkeypress' => 0, + 'onkeyup' => 0, + 'onload' => 0, + 'onmousedown' => 0, + 'onmousemove' => 0, + 'onmouseout' => 0, + 'onmouseover' => 0, + 'onmouseup' => 0, + 'onreset' => 0, + 'onselect' => 0, + 'onsubmit' => 0, + 'onunload' => 0, + 'src' => 0, + 'type' => 0, + } + ); + # + my $scrubber = HTML::Scrubber->new(); + $scrubber->allow( @allow ); + $scrubber->rules( @rules ); # key/value pairs + $scrubber->default( @default ); + $scrubber->comment(1); # 1 allow, 0 deny + # + ## preferred way to create the same object + $scrubber = HTML::Scrubber->new( + allow => \@allow, + rules => \@rules, + default => \@default, + comment => 1, + process => 0, + ); + # + require Data::Dumper,die Data::Dumper::Dumper($scrubber) if @ARGV; + # + my $it = q[ + <?php echo(" EVIL EVIL EVIL "); ?> <!-- asdf --> + <hr> + <I FAKE="attribute" > IN ITALICS WITH FAKE="attribute" </I><br> + <B> IN BOLD </B><br> + <A NAME="evil"> + <A HREF="javascript:alert('die die die');">HREF=JAVA <!></A> + <br> + <A HREF="image/bigone.jpg" ONMOUSEOVER="alert('die die die');"> + <IMG SRC="image/smallone.jpg" ALT="ONMOUSEOVER JAVASCRIPT"> + </A> + </A> <br> + ]; + # + print "#original text",$/, $it, $/; + print + "#scrubbed text (default ", + $scrubber->default(), # no arguments returns the current value + " comment ", + $scrubber->comment(), + " process ", + $scrubber->process(), + " )", + $/, + $scrubber->scrub($it), + $/; + # + $scrubber->default(1); # allow all tags by default + $scrubber->comment(0); # deny comments + # + print + "#scrubbed text (default ", + $scrubber->default(), + " comment ", + $scrubber->comment(), + " process ", + $scrubber->process(), + " )", + $/, + $scrubber->scrub($it), + $/; + # + $scrubber->process(1); # allow process instructions (dangerous) + $default[0] = 1; # allow all tags by default + $default[1]->{'*'} = 0; # deny all attributes by default + $scrubber->default(@default); # set the default again + # + print + "#scrubbed text (default ", + $scrubber->default(), + " comment ", + $scrubber->comment(), + " process ", + $scrubber->process(), + " )", + $/, + $scrubber->scrub($it), + $/; + +=for example end + + +=head2 FUN + +If you have Test::Inline (and you've installed HTML::Scrubber), try + + pod2test Scrubber.pm >scrubber.t + perl scrubber.t + +=head1 SEE ALSO + +L<HTML::Parser>, L<Test::Inline>, L<HTML::Sanitizer>. + +=head1 BUGS/SUGGESTIONS/ETC + +Please use +https://rt.cpan.org/NoAuth/Bugs.html?Dist=HTML-Scrubber +to report I<bugs>/additions/etc +or send mail to <bug-HTML-Scrubber#rt.cpan.org>. + +=head1 AUTHOR + +D. H. (PodMaster) + +=head1 LICENSE + +Copyright (c) 2003-2004 by D.H. (PodMaster). All rights reserved. + +This module is free software; +you can redistribute it and/or modify it under +the same terms as Perl itself. +The LICENSE file contains the full text of the license. + +=cut diff --git a/t/01_use.t b/t/01_use.t index 76d61fd..af0a185 100644 --- a/t/01_use.t +++ b/t/01_use.t @@ -1,12 +1,12 @@ -# Before `make install' is performed this script should be runnable with -# `make test'. After `make install' it should work as `perl test.pl' - -######################### - -# change 'tests => 1' to 'tests => '; - -use Test; -BEGIN { plan tests => 1 }; -use HTML::Scrubber; -ok(1); - +# Before `make install' is performed this script should be runnable with +# `make test'. After `make install' it should work as `perl test.pl' + +######################### + +# change 'tests => 1' to 'tests => '; + +use Test; +BEGIN { plan tests => 1 }; +use HTML::Scrubber; +ok(1); + diff --git a/t/02_basic.t b/t/02_basic.t index 8662b25..1aead3d 100644 --- a/t/02_basic.t +++ b/t/02_basic.t @@ -1,151 +1,151 @@ -# Before `make install' is performed this script should be runnable with -# `make test'. After `make install' it should work as `perl test.pl' - -######################### - -# change 'tests => 1' to 'tests => last_test_to_print'; - -use Test; -BEGIN { plan tests => 77 }; -use HTML::Scrubber; -ok(1); # If we made it this far, we're ok. # test 1 - -######################### - -# Insert your test code below, the Test module is use()ed here so read -# its man page ( perldoc Test ) for help writing this test script. - - -my $html = q[ - <script>//blah</script> - <HR Align="left"> - <B> bold < - <U> underlined - <I> - <A href='#"'> LINK </A> - </I> - </U> - </B> - <!-- comments --> -]; - -my $scrubber = HTML::Scrubber->new(); - -ok( $scrubber ); # test 2 -ok( ! $scrubber->default() ); # test 3 -ok( ! $scrubber->comment() ); # test 4 -ok( ! $scrubber->process() ); # test 5 -ok( ! $scrubber->allow( qw[ p b i u hr br ] ) ); # test 6 - -$scrubber = $scrubber->scrub($html); - -ok( $scrubber ); # test 7 -ok( $scrubber !~ /href/i ); # test 8 -ok( $scrubber !~ /Align/i ); # test 9 -ok( $scrubber !~ /\Q<!--\E/ ); # test 10 -ok( $scrubber =~ /bold </ ); # test 11 - -$scrubber = HTML::Scrubber->new( deny => [ qw[ p b i u hr br ] ] ); - -ok( $scrubber ); # test 12 -ok( ! $scrubber->default() ); # test 13 -ok( ! $scrubber->comment() ); # test 14 -ok( ! $scrubber->process() ); # test 15 - -$scrubber = $scrubber->scrub($html); - -ok( $scrubber ); # test 16 -ok( $scrubber !~ /[><]/ ); # test 17 -ok( $scrubber !~ /href/i ); # test 18 -ok( $scrubber !~ /Align/i ); # test 19 -ok( $scrubber !~ /\Q<!--\E/ ); # test 20 -ok( $scrubber =~ /bold </ ); # test 21 - -$scrubber = HTML::Scrubber->new( default => [ 0 ] ); - -ok( $scrubber ); # test 22 -ok( ! $scrubber->default() ); # test 23 -ok( ! $scrubber->comment() ); # test 24 -ok( ! $scrubber->process() ); # test 25 - -$scrubber = $scrubber->scrub($html); - -ok( $scrubber ); # test 26 -ok( $scrubber !~ /[><]/ ); # test 27 -ok( $scrubber !~ /href/i ); # test 28 -ok( $scrubber !~ /Align/i ); # test 29 -ok( $scrubber !~ /\Q<!--\E/ ); # test 30 -ok( $scrubber =~ /bold </ ); # test 31 - -$scrubber = HTML::Scrubber->new( default => [ 1 ] ); - -ok( $scrubber ); # test 32 -ok( $scrubber->default() ); # test 33 -ok( ! $scrubber->comment() ); # test 34 -ok( ! $scrubber->process() ); # test 35 - -#use Data::Dumper;die Dumper( [ $scrubber, $scrubber->scrub($html) ]); - -$scrubber = $scrubber->scrub($html); - - -ok( $scrubber ); # test 36 -ok( $scrubber =~ /[><]/ ); # test 37 -ok( $scrubber !~ /href/i ); # test 38 -ok( $scrubber !~ /Align/i ); # test 39 -ok( $scrubber !~ /\Q<!--\E/ ); # test 40 -ok( $scrubber =~ /bold </ ); # test 41 - -$scrubber = HTML::Scrubber->new( default => [ 1 ] ); - -ok( $scrubber ); # test 42 -ok( $scrubber->default() ); # test 43 -ok( ! $scrubber->comment() ); # test 44 -ok( ! $scrubber->process() ); # test 45 -ok( ! $scrubber->comment(1) ); # test 46 - -$scrubber = $scrubber->scrub($html); - -ok( $scrubber ); # test 47 -ok( $scrubber =~ /[><]/ ); # test 48 -ok( $scrubber !~ /href/i ); # test 49 -ok( $scrubber !~ /Align/i ); # test 50 -ok( $scrubber =~ /\Q<!--\E/ ); # test 51 -ok( $scrubber =~ /bold </ ); # test 52 - - -$scrubber = HTML::Scrubber->new( default => [ 1 => { align => 1, '*' => 0 } ] ); - -ok( $scrubber ); # test 53 -ok( $scrubber->default() ); # test 54 -ok( ! $scrubber->comment() ); # test 55 -ok( ! $scrubber->process() ); # test 56 -ok( ! $scrubber->comment(1) ); # test 57 - -$scrubber = $scrubber->scrub($html); - -ok( $scrubber ); # test 58 -ok( $scrubber =~ /[><]/ ); # test 59 -ok( $scrubber !~ /href/i ); # test 60 -ok( $scrubber =~ /Align/i ); # test 61 -ok( $scrubber =~ /\Q<!--\E/ ); # test 62 -ok( $scrubber =~ /"left"/ ); # test 63 -ok( $scrubber =~ /bold </ ); # test 64 - -$scrubber = HTML::Scrubber->new( default => [ 1 => { align => 0, '*' => 1 } ] ); - -ok( $scrubber ); # test 65 -ok( $scrubber->default() ); # test 66 -ok( ! $scrubber->comment() ); # test 67 -ok( ! $scrubber->process() ); # test 68 -ok( ! $scrubber->comment(1) ); # test 69 -$scrubber = $scrubber->scrub($html); - -ok( $scrubber ); # test 70 -ok( $scrubber =~ /[><]/ ); # test 71 -ok( $scrubber =~ /href/i ); # test 72 -ok( $scrubber !~ /Align/i ); # test 73 -ok( $scrubber =~ /\Q<!--\E/ ); # test 74 -ok( $scrubber =~ /\Q"\E/ ); # test 75 -ok( $scrubber =~ /\#/ ); # test 76 -ok( $scrubber =~ /bold </ ); # test 77 +# Before `make install' is performed this script should be runnable with +# `make test'. After `make install' it should work as `perl test.pl' + +######################### + +# change 'tests => 1' to 'tests => last_test_to_print'; + +use Test; +BEGIN { plan tests => 77 }; +use HTML::Scrubber; +ok(1); # If we made it this far, we're ok. # test 1 + +######################### + +# Insert your test code below, the Test module is use()ed here so read +# its man page ( perldoc Test ) for help writing this test script. + + +my $html = q[ + <script>//blah</script> + <HR Align="left"> + <B> bold < + <U> underlined + <I> + <A href='#"'> LINK </A> + </I> + </U> + </B> + <!-- comments --> +]; + +my $scrubber = HTML::Scrubber->new(); + +ok( $scrubber ); # test 2 +ok( ! $scrubber->default() ); # test 3 +ok( ! $scrubber->comment() ); # test 4 +ok( ! $scrubber->process() ); # test 5 +ok( ! $scrubber->allow( qw[ p b i u hr br ] ) ); # test 6 + +$scrubber = $scrubber->scrub($html); + +ok( $scrubber ); # test 7 +ok( $scrubber !~ /href/i ); # test 8 +ok( $scrubber !~ /Align/i ); # test 9 +ok( $scrubber !~ /\Q<!--\E/ ); # test 10 +ok( $scrubber =~ /bold </ ); # test 11 + +$scrubber = HTML::Scrubber->new( deny => [ qw[ p b i u hr br ] ] ); + +ok( $scrubber ); # test 12 +ok( ! $scrubber->default() ); # test 13 +ok( ! $scrubber->comment() ); # test 14 +ok( ! $scrubber->process() ); # test 15 + +$scrubber = $scrubber->scrub($html); + +ok( $scrubber ); # test 16 +ok( $scrubber !~ /[><]/ ); # test 17 +ok( $scrubber !~ /href/i ); # test 18 +ok( $scrubber !~ /Align/i ); # test 19 +ok( $scrubber !~ /\Q<!--\E/ ); # test 20 +ok( $scrubber =~ /bold </ ); # test 21 + +$scrubber = HTML::Scrubber->new( default => [ 0 ] ); + +ok( $scrubber ); # test 22 +ok( ! $scrubber->default() ); # test 23 +ok( ! $scrubber->comment() ); # test 24 +ok( ! $scrubber->process() ); # test 25 + +$scrubber = $scrubber->scrub($html); + +ok( $scrubber ); # test 26 +ok( $scrubber !~ /[><]/ ); # test 27 +ok( $scrubber !~ /href/i ); # test 28 +ok( $scrubber !~ /Align/i ); # test 29 +ok( $scrubber !~ /\Q<!--\E/ ); # test 30 +ok( $scrubber =~ /bold </ ); # test 31 + +$scrubber = HTML::Scrubber->new( default => [ 1 ] ); + +ok( $scrubber ); # test 32 +ok( $scrubber->default() ); # test 33 +ok( ! $scrubber->comment() ); # test 34 +ok( ! $scrubber->process() ); # test 35 + +#use Data::Dumper;die Dumper( [ $scrubber, $scrubber->scrub($html) ]); + +$scrubber = $scrubber->scrub($html); + + +ok( $scrubber ); # test 36 +ok( $scrubber =~ /[><]/ ); # test 37 +ok( $scrubber !~ /href/i ); # test 38 +ok( $scrubber !~ /Align/i ); # test 39 +ok( $scrubber !~ /\Q<!--\E/ ); # test 40 +ok( $scrubber =~ /bold </ ); # test 41 + +$scrubber = HTML::Scrubber->new( default => [ 1 ] ); + +ok( $scrubber ); # test 42 +ok( $scrubber->default() ); # test 43 +ok( ! $scrubber->comment() ); # test 44 +ok( ! $scrubber->process() ); # test 45 +ok( ! $scrubber->comment(1) ); # test 46 + +$scrubber = $scrubber->scrub($html); + +ok( $scrubber ); # test 47 +ok( $scrubber =~ /[><]/ ); # test 48 +ok( $scrubber !~ /href/i ); # test 49 +ok( $scrubber !~ /Align/i ); # test 50 +ok( $scrubber =~ /\Q<!--\E/ ); # test 51 +ok( $scrubber =~ /bold </ ); # test 52 + + +$scrubber = HTML::Scrubber->new( default => [ 1 => { align => 1, '*' => 0 } ] ); + +ok( $scrubber ); # test 53 +ok( $scrubber->default() ); # test 54 +ok( ! $scrubber->comment() ); # test 55 +ok( ! $scrubber->process() ); # test 56 +ok( ! $scrubber->comment(1) ); # test 57 + +$scrubber = $scrubber->scrub($html); + +ok( $scrubber ); # test 58 +ok( $scrubber =~ /[><]/ ); # test 59 +ok( $scrubber !~ /href/i ); # test 60 +ok( $scrubber =~ /Align/i ); # test 61 +ok( $scrubber =~ /\Q<!--\E/ ); # test 62 +ok( $scrubber =~ /"left"/ ); # test 63 +ok( $scrubber =~ /bold </ ); # test 64 + +$scrubber = HTML::Scrubber->new( default => [ 1 => { align => 0, '*' => 1 } ] ); + +ok( $scrubber ); # test 65 +ok( $scrubber->default() ); # test 66 +ok( ! $scrubber->comment() ); # test 67 +ok( ! $scrubber->process() ); # test 68 +ok( ! $scrubber->comment(1) ); # test 69 +$scrubber = $scrubber->scrub($html); + +ok( $scrubber ); # test 70 +ok( $scrubber =~ /[><]/ ); # test 71 +ok( $scrubber =~ /href/i ); # test 72 +ok( $scrubber !~ /Align/i ); # test 73 +ok( $scrubber =~ /\Q<!--\E/ ); # test 74 +ok( $scrubber =~ /\Q"\E/ ); # test 75 +ok( $scrubber =~ /\#/ ); # test 76 +ok( $scrubber =~ /bold </ ); # test 77 diff --git a/t/03_more.t b/t/03_more.t index 8142645..68c60cd 100644 --- a/t/03_more.t +++ b/t/03_more.t @@ -1,6 +1,6 @@ -# perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake test -# cpan-upload -mailto [email protected] -verbose -user podmaster HTML-Scrubber-0.04.tar.gz - +# perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake test +# cpan-upload -mailto [email protected] -verbose -user podmaster HTML-Scrubber-0.04.tar.gz + use strict; use Test::More tests => 7; @@ -8,39 +8,39 @@ BEGIN { $^W = 1 } use_ok( 'HTML::Scrubber' ); -my $s = HTML::Scrubber->new; -my $html = q[<a href=1>link </a><br><B> bold </B><U> UNDERLINE </U>]; - -isa_ok($s, 'HTML::Scrubber'); - -$s->rules( 'font' => { face => 1 } ); - -is( $s->scrub('<font face="gothic">'), '<font face="gothic">', 'font face gothic' ); - -$s->allow(qw[ U ]); - -#use Data::Dumper;warn $/,Dumper($s); - -is( $s->scrub($html), q[link bold <u> UNDERLINE </u>],'only U'); - -$s->allow(qw[ B U ]); - -#use Data::Dumper;warn $/,Dumper($s); - -is( $s->scrub($html), q[link <b> bold </b><u> UNDERLINE </u>],'B and U'); - -$s->allow(qw[ A B ]); -$s->deny('U'); -$s->default(0,{ '*'=> 1}); - -#use Data::Dumper;warn $/,Dumper($s); - -is( $s->scrub($html), q[<a href="1">link </a><b> bold </b> UNDERLINE ],'A and B'); - -$s = HTML::Scrubber->new( - default => [ 1, { '*' => 1 } ] -); - -is( $s->scrub($html), q[<a href="1">link </a><br><b> bold </b><u> UNDERLINE </u>], 'A B U and BR'); - -#use Data::Dumper;warn $/,Dumper($s); +my $s = HTML::Scrubber->new; +my $html = q[<a href=1>link </a><br><B> bold </B><U> UNDERLINE </U>]; + +isa_ok($s, 'HTML::Scrubber'); + +$s->rules( 'font' => { face => 1 } ); + +is( $s->scrub('<font face="gothic">'), '<font face="gothic">', 'font face gothic' ); + +$s->allow(qw[ U ]); + +#use Data::Dumper;warn $/,Dumper($s); + +is( $s->scrub($html), q[link bold <u> UNDERLINE </u>],'only U'); + +$s->allow(qw[ B U ]); + +#use Data::Dumper;warn $/,Dumper($s); + +is( $s->scrub($html), q[link <b> bold </b><u> UNDERLINE </u>],'B and U'); + +$s->allow(qw[ A B ]); +$s->deny('U'); +$s->default(0,{ '*'=> 1}); + +#use Data::Dumper;warn $/,Dumper($s); + +is( $s->scrub($html), q[<a href="1">link </a><b> bold </b> UNDERLINE ],'A and B'); + +$s = HTML::Scrubber->new( + default => [ 1, { '*' => 1 } ] +); + +is( $s->scrub($html), q[<a href="1">link </a><br><b> bold </b><u> UNDERLINE </u>], 'A B U and BR'); + +#use Data::Dumper;warn $/,Dumper($s); diff --git a/t/04_style_script.t b/t/04_style_script.t index 4b3eaaa..e9d6dd3 100644 --- a/t/04_style_script.t +++ b/t/04_style_script.t @@ -1,4 +1,4 @@ -# perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake test +# perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake test use strict; use Test::More tests => 9; @@ -6,22 +6,22 @@ BEGIN { $^W = 1 } use_ok( 'HTML::Scrubber' ); -my $s = HTML::Scrubber->new; -my $html = q[start <style>in the style</style> middle <script>in the script</script> end]; - -isa_ok($s, 'HTML::Scrubber'); - -is( $s->script, 0, 'script off by default'); -is( $s->style, 0, 'style off by default'); -is( $s->scrub($html), 'start middle end', 'default (no style no script)'); - - -$s->script(1); -is( $s->script, 1, 'script on'); -is( $s->scrub($html), 'start middle in the script end', 'script off'); - - - -$s->style(1); -is( $s->style, 1, 'style on'); +my $s = HTML::Scrubber->new; +my $html = q[start <style>in the style</style> middle <script>in the script</script> end]; + +isa_ok($s, 'HTML::Scrubber'); + +is( $s->script, 0, 'script off by default'); +is( $s->style, 0, 'style off by default'); +is( $s->scrub($html), 'start middle end', 'default (no style no script)'); + + +$s->script(1); +is( $s->script, 1, 'script on'); +is( $s->scrub($html), 'start middle in the script end', 'script off'); + + + +$s->style(1); +is( $s->style, 1, 'style on'); is( $s->scrub($html), 'start in the style middle in the script end', 'style off and script off'); \ No newline at end of file diff --git a/t/05_pi_comment.t b/t/05_pi_comment.t index f300ab7..f90d419 100644 --- a/t/05_pi_comment.t +++ b/t/05_pi_comment.t @@ -1,4 +1,4 @@ -# perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake test +# perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake test use strict; use Test::More tests => 9; @@ -6,22 +6,22 @@ BEGIN { $^W = 1 } use_ok( 'HTML::Scrubber' ); -my $s = HTML::Scrubber->new; -my $html = q[start <!--comment--> mid1 <?html pi> mid2 <?xml pi?> end]; - -isa_ok($s, 'HTML::Scrubber'); - -is( $s->comment, 0, 'comment off by default'); -is( $s->process, 0, 'process off by default'); -is( $s->scrub($html), 'start mid1 mid2 end'); - - -$s->comment(1); -is( $s->comment, 1, 'comment on'); -is( $s->scrub($html), 'start <!--comment--> mid1 mid2 end', 'comment on'); - - - -$s->process(1); -is( $s->process, 1, 'process on'); +my $s = HTML::Scrubber->new; +my $html = q[start <!--comment--> mid1 <?html pi> mid2 <?xml pi?> end]; + +isa_ok($s, 'HTML::Scrubber'); + +is( $s->comment, 0, 'comment off by default'); +is( $s->process, 0, 'process off by default'); +is( $s->scrub($html), 'start mid1 mid2 end'); + + +$s->comment(1); +is( $s->comment, 1, 'comment on'); +is( $s->scrub($html), 'start <!--comment--> mid1 mid2 end', 'comment on'); + + + +$s->process(1); +is( $s->process, 1, 'process on'); is( $s->scrub($html), 'start <!--comment--> mid1 <?html pi> mid2 <?xml pi?> end', 'process on'); \ No newline at end of file diff --git a/t/06_scrub_file.t b/t/06_scrub_file.t index 8c0cba8..75faa6a 100644 --- a/t/06_scrub_file.t +++ b/t/06_scrub_file.t @@ -1,57 +1,57 @@ -# perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake test +# perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake test -use strict; +use strict; use File::Spec; use Test::More tests => 10; BEGIN { $^W = 1 } use_ok( 'HTML::Scrubber' ); -my $s = HTML::Scrubber->new; -my $html = q[<html><body><p>hi<br>start <!--comment--> mid1 <?html pi> mid2 <?xml pi?> end</body></html>]; - - isa_ok($s, 'HTML::Scrubber'); - -my $tmpdir = File::Spec->tmpdir(); - -SKIP: { - skip "no writable temporary directory found", 6 - unless length $tmpdir - and -d $tmpdir; - - my $tmpfile = File::Spec->catfile($tmpdir,"html-scrubber.test.html"); - my $r = $s->scrub($html,$tmpfile); - $r = "Error: \$@=$@ \$!=$!" unless $r; - is($r, 1, "scrub(\$html,\$tmpfile=$tmpfile)"); - -# use Data::Dumper;die Dumper($s); - - local *FILIS; - open FILIS, "+>$tmpfile" or die "can't write to $tmpfile"; - - $r = $s->scrub($html,\*FILIS); - $r = "Error: \$@=$@ \$!=$!" unless $r; - - is($r, 1, q[scrub($html,\*FILIS)]); - - seek *FILIS,0,0; - $r = join '', readline *FILIS; - is($r,"histart mid1 mid2 end","FILIS has the right stuff"); - is(close(FILIS),1,q[close(FILIS)]); - - $r = $s->scrub_file($tmpfile,"$tmpfile.html"); - $r = "Error: \$@=$@ \$!=$!" unless $r; - - is($r, 1, qq[scrub_file(\$tmpfile,"\$tmpfile.html"=$tmpfile.html)]); - - open FILIS, "+>$tmpfile.html" or die "can't write to $tmpfile"; - $r = $s->scrub_file($tmpfile,\*FILIS); - $r = "Error: \$@=$@ \$!=$!" unless $r; - - is($r, 1, q[scrub_file($tmpfile,\*FILIS)]); - seek *FILIS,0,0; - $r = join '', readline *FILIS; - is($r,"histart mid1 mid2 end","FILIS has the right stuff"); - is(close(FILIS),1,q[close(FILIS)]); - -}; +my $s = HTML::Scrubber->new; +my $html = q[<html><body><p>hi<br>start <!--comment--> mid1 <?html pi> mid2 <?xml pi?> end</body></html>]; + + isa_ok($s, 'HTML::Scrubber'); + +my $tmpdir = File::Spec->tmpdir(); + +SKIP: { + skip "no writable temporary directory found", 6 + unless length $tmpdir + and -d $tmpdir; + + my $tmpfile = File::Spec->catfile($tmpdir,"html-scrubber.test.html"); + my $r = $s->scrub($html,$tmpfile); + $r = "Error: \$@=$@ \$!=$!" unless $r; + is($r, 1, "scrub(\$html,\$tmpfile=$tmpfile)"); + +# use Data::Dumper;die Dumper($s); + + local *FILIS; + open FILIS, "+>$tmpfile" or die "can't write to $tmpfile"; + + $r = $s->scrub($html,\*FILIS); + $r = "Error: \$@=$@ \$!=$!" unless $r; + + is($r, 1, q[scrub($html,\*FILIS)]); + + seek *FILIS,0,0; + $r = join '', readline *FILIS; + is($r,"histart mid1 mid2 end","FILIS has the right stuff"); + is(close(FILIS),1,q[close(FILIS)]); + + $r = $s->scrub_file($tmpfile,"$tmpfile.html"); + $r = "Error: \$@=$@ \$!=$!" unless $r; + + is($r, 1, qq[scrub_file(\$tmpfile,"\$tmpfile.html"=$tmpfile.html)]); + + open FILIS, "+>$tmpfile.html" or die "can't write to $tmpfile"; + $r = $s->scrub_file($tmpfile,\*FILIS); + $r = "Error: \$@=$@ \$!=$!" unless $r; + + is($r, 1, q[scrub_file($tmpfile,\*FILIS)]); + seek *FILIS,0,0; + $r = join '', readline *FILIS; + is($r,"histart mid1 mid2 end","FILIS has the right stuff"); + is(close(FILIS),1,q[close(FILIS)]); + +}; diff --git a/t/07_booleans.t b/t/07_booleans.t index d006e64..6a5a640 100644 --- a/t/07_booleans.t +++ b/t/07_booleans.t @@ -1,73 +1,73 @@ -# 07_booleans.t +# 07_booleans.t -use strict; +use strict; use File::Spec; use Test::More tests => 9; BEGIN { $^W = 1 } -use_ok( 'HTML::Scrubber' ); - -use HTML::Scrubber; -my @allow = qw[ br hr b a option button th ]; -my $scrubber = HTML::Scrubber->new(); -$scrubber->allow( @allow ); -$scrubber->default( - undef, # don't change - { # default attribute rules - '/' => 1, # '/' ia boolean (stand-alone) attribute - 'pie' => 1, - 'selected' => 1, - 'disabled' => 1, - 'nowrap' => 1, - } -); - -ok( $scrubber, "got scrubber"); - -test( -q~<br> hi <br /> <a href= >~, -q~<br> hi <br /> <a>~, +use_ok( 'HTML::Scrubber' ); + +use HTML::Scrubber; +my @allow = qw[ br hr b a option button th ]; +my $scrubber = HTML::Scrubber->new(); +$scrubber->allow( @allow ); +$scrubber->default( + undef, # don't change + { # default attribute rules + '/' => 1, # '/' ia boolean (stand-alone) attribute + 'pie' => 1, + 'selected' => 1, + 'disabled' => 1, + 'nowrap' => 1, + } +); + +ok( $scrubber, "got scrubber"); + +test( +q~<br> hi <br /> <a href= >~, +q~<br> hi <br /> <a>~, "br /"); - - -test( -q~<option selected> flicka <a href=>~, -q~<option selected> flicka <a>~, + + +test( +q~<option selected> flicka <a href=>~, +q~<option selected> flicka <a>~, "selected"); - -test( -q~<button name="flicka" Disabled > the flicker </button>~, -q~<button disabled> the flicker </button>~, + +test( +q~<button name="flicka" Disabled > the flicker </button>~, +q~<button disabled> the flicker </button>~, "disabled"); - - -test( -q~<button disabled > dd </button>~, -q~<button disabled> dd </button>~, + + +test( +q~<button disabled > dd </button>~, +q~<button disabled> dd </button>~, "dd"); - - -test( -q~<a disabled pie=6> | </a>~, -q~<a disabled pie="6"> | </a>~, + + +test( +q~<a disabled pie=6> | </a>~, +q~<a disabled pie="6"> | </a>~, "pie"); - - -test( -q~<a selected disabled selected pie pie pie disabled /> | </a>~, -q~<a selected disabled pie /> | </a>~, + + +test( +q~<a selected disabled selected pie pie pie disabled /> | </a>~, +q~<a selected disabled pie /> | </a>~, "selected pie"); - - -#dependent on version of HTML::Parser, after 0.36 1st is returned (ie pie) -#test(q~<br pie pie=4>~, q~<br pie="4">~, 'repeated mixed'); -test( q~<th nowrap=nowrap>~, -q~<th nowrap="nowrap">~, + +#dependent on version of HTML::Parser, after 0.36 1st is returned (ie pie) +#test(q~<br pie pie=4>~, q~<br pie="4">~, 'repeated mixed'); + +test( q~<th nowrap=nowrap>~, +q~<th nowrap="nowrap">~, "th nowrap=nowrap"); - - - + + + sub test { my ($in, $out, $name) = @_; -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libhtml-scrubber-perl.git _______________________________________________ Pkg-perl-cvs-commits mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits
