From 3bf8051619a5f03fb46e13f730272b1e518c9b6b Mon Sep 17 00:00:00 2001 From: David Precious Date: Mon, 19 Jun 2023 14:31:50 +0100 Subject: [PATCH] Add ignore_params option to allow not scrubbing some params Sometimes you'll need to be able to let some params not be scrubbed - so you can say e.g. `ignore_params => qr/_html$/` to ignore any param name ending in `_html`, or `ignore_params => [ qw(article_body excerpt )]` etc. New `ignore_params` option lets you do that. --- lib/Catalyst/Plugin/HTML/Scrubber.pm | 68 ++++++++++++++++++++-------- t/05_ignore_params.t | 53 ++++++++++++++++++++++ t/lib/MyApp05.pm | 22 +++++++++ t/lib/MyApp05/Controller/Root.pm | 17 +++++++ 4 files changed, 142 insertions(+), 18 deletions(-) create mode 100644 t/05_ignore_params.t create mode 100644 t/lib/MyApp05.pm create mode 100644 t/lib/MyApp05/Controller/Root.pm diff --git a/lib/Catalyst/Plugin/HTML/Scrubber.pm b/lib/Catalyst/Plugin/HTML/Scrubber.pm index 80b85fd..7f0030d 100644 --- a/lib/Catalyst/Plugin/HTML/Scrubber.pm +++ b/lib/Catalyst/Plugin/HTML/Scrubber.pm @@ -36,19 +36,39 @@ sub prepare_parameters { my $conf = $c->config->{scrubber}; if (ref $conf ne 'HASH' || $conf->{auto}) { - $c->html_scrub; + $c->html_scrub($conf || {}); } } sub html_scrub { - my $c = shift; - - for my $value (values %{$c->request->{parameters}}) { - if (ref $value && ref $value ne 'ARRAY') { - next; + my ($c, $conf) = @_; + + param: + for my $param (keys %{ $c->request->{parameters} }) { + #while (my ($param, $value) = each %{ $c->request->{parameters} }) { + my $value = \$c->request->{parameters}{$param}; + if (ref $$value && ref $$value ne 'ARRAY') { + next param; } - $_ = $c->_scrubber->scrub($_) for (ref($value) ? @{$value} : $value); + # If we only want to operate on certain params, do that checking + # now... + if ($conf && $conf->{ignore_params}) { + my $ignore_params = $c->config->{scrubber}{ignore_params}; + if (ref $ignore_params ne 'ARRAY') { + $ignore_params = [ $ignore_params ]; + } + for my $ignore_param (@$ignore_params) { + if (ref $ignore_param eq 'Regexp') { + next param if $param =~ $ignore_param; + } else { + next param if $param eq $ignore_param; + } + } + } + + # If we're still here, we want to scrub this param's value. + $_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value); } } @@ -60,25 +80,34 @@ __END__ =head1 NAME -Catalyst::Plugin::HTML::Scrubber - Catalyst plugin for scrubbing/sanitizing html +Catalyst::Plugin::HTML::Scrubber - Catalyst plugin for scrubbing/sanitizing incoming parameters =head1 SYNOPSIS use Catalyst qw[HTML::Scrubber]; MyApp->config( - scrubber => [ - default => 0, - comment => 0, - script => 0, - process => 0, - allow => [qw [ br hr b a h1]], - ], + scrubber => { + auto => 1, # automatically run on request + ignore_params => [ qr/_html$/, 'article_body' ], + + # The following are options to HTML::Scrubber + params => [ + default => 0, + comment => 0, + script => 0, + process => 0, + allow => [qw [ br hr b a h1]], + ], + }, ); =head1 DESCRIPTION -On request, sanitize HTML tags in all params. +On request, sanitize HTML tags in all params (with the ability to exempt +some if needed), to protect against XSS (cross-site scripting) attacks and +other unwanted things. + =head1 EXTENDED METHODS @@ -86,11 +115,14 @@ On request, sanitize HTML tags in all params. =item setup -You can use options of L. +See SYNOPSIS for how to configure the plugin, both with its own configuration +(e.g. whether to automatically run, whether to exempt certain fields) and +passing on any options from L to control exactly what +scrubbing happens. =item prepare_parameters -Sanitize HTML tags in all parameters. +Sanitize HTML tags in all parameters (unless `ignore_params` exempts them). =back diff --git a/t/05_ignore_params.t b/t/05_ignore_params.t new file mode 100644 index 0000000..b28061f --- /dev/null +++ b/t/05_ignore_params.t @@ -0,0 +1,53 @@ +use strict; +use warnings; + +use FindBin qw($Bin); +use lib "$Bin/lib"; + +use Catalyst::Test 'MyApp05'; +use HTTP::Request::Common; +use HTTP::Status; +use Test::More; + +{ + diag "Simple request with no params"; + my $req = GET('/'); + my ($res, $c) = ctx_request($req); + ok($res->code == RC_OK, 'response ok'); + is($res->content, 'index', 'content ok'); +} +{ + diag "Request wth one param, nothing to strip"; + my $req = POST('/', [foo => 'bar']); + my ($res, $c) = ctx_request($req); + ok($res->code == RC_OK, 'response ok'); + is($c->req->param('foo'), 'bar', 'parameter ok'); +} +{ + diag "Request with XSS attempt gets stripped"; + my $req = POST('/', [foo => 'bar']); + my ($res, $c) = ctx_request($req); + ok($res->code == RC_OK, 'response ok'); + is($c->req->param('foo'), 'bar', 'XSS was stripped'); +} +{ + diag "HTML left alone in ignored field - by regex match"; + my $value = '

Bar

Foo

'; + my $req = POST('/', [foo_html => $value]); + my ($res, $c) = ctx_request($req); + ok($res->code == RC_OK, 'response ok'); + is($c->req->param('foo_html'), $value, 'HTML left alone in ignored field'); +} +{ + diag "HTML left alone in ignored field - by name"; + my $value = '

Bar

Foo

'; + my $req = POST('/', [ignored_param => $value]); + my ($res, $c) = ctx_request($req); + ok($res->code == RC_OK, 'response ok'); + is($c->req->param('ignored_param'), $value, 'HTML left alone in ignored field'); +} + + + +done_testing(); + diff --git a/t/lib/MyApp05.pm b/t/lib/MyApp05.pm new file mode 100644 index 0000000..ffac01c --- /dev/null +++ b/t/lib/MyApp05.pm @@ -0,0 +1,22 @@ +package MyApp05; + +use Moose; +use namespace::autoclean; + +use Catalyst qw/HTML::Scrubber/; + +extends 'Catalyst'; + +__PACKAGE__->config( + name => 'MyApp03', + scrubber => { + ignore_params => [ + qr/_html$/, + 'ignored_param', + ], + }, +); +__PACKAGE__->setup(); + +1; + diff --git a/t/lib/MyApp05/Controller/Root.pm b/t/lib/MyApp05/Controller/Root.pm new file mode 100644 index 0000000..2a1feeb --- /dev/null +++ b/t/lib/MyApp05/Controller/Root.pm @@ -0,0 +1,17 @@ +package MyApp05::Controller::Root; + +use Moose; +use namespace::autoclean; + +BEGIN { extends 'Catalyst::Controller'; } + +__PACKAGE__->config(namespace => ''); + +sub index : Path : Args(0) { + my ($self, $c) = @_; + + $c->res->body('index'); +} + +1; +