#!/usr/bin/perl -wT
#
# URLblocked.cgi - explain to the user that the URL is blocked and by which rule set.
#
# Currently the error messages supports
# en (English),
# de (German),
# pl (Polish)
# sv (Swedisk)
# it (Italian)
# pt (Portuguese)
# fr (French)
# tr (Turkish)
# nl (Dutch).
# You can add a language yourself: search for all occurences of "NEWLANGUAGE"
# and add your language text.
use strict;
use Socket;
# This CGI script uses fastcgi and also requires Fcgid configured in Apache.
# Comment out the next line and the line with "while (new CGI::Fast)" (approximately at line 423)
# to revert this script to a regular CGI script.
### use CGI::Fast;
use constant {
CT_IMAGE => 1,
CT_JAVA => 2,
CT_HTML => 3,
CT_XML => 4,
CT_CSS => 5,
CT_TEXT => 6,
CT_JSON => 7,
CT_STREAM => 8,
CT_204 => 9
};
use vars qw( $admin $clientaddr $clientname $clientuser $clientgroup $category $targetgroup );
use vars qw( $color $size $mode $textcolor $bgcolor $titlesize $textsize $httpcode $url $origurl );
use vars qw( $ufdbhost $ufdbscripturi $ufdbredirscripturi $ufdbsni $ufdbservername $ufdbrequesturi $ufdbrefurl );
use vars qw( $escaped_ufdbrequesturi $escaped_url );
use vars qw( @day @month @languages $lang $protocol $address $port $path );
local $admin;
local $clientaddr;
local $clientname;
local $clientuser;
local $clientgroup;
local $targetgroup;
local $color;
local $size;
local $mode;
local $textcolor;
local $bgcolor;
local $titlesize;
local $textsize;
local $httpcode;
local $url;
local $origurl;
local $ufdbhost;
local $ufdbscripturi;
local $ufdbredirscripturi;
local $ufdbsni;
local $ufdbservername;
local $ufdbrequesturi;
local $ufdbrefurl;
local $lang;
local $protocol;
local $address;
local $port;
local $path;
local @day = ("Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday");
local @month = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec");
local @languages = (
"de (German),",
"nl (Dutch),",
"pl (Polish),",
"sv (Swedish),",
"es (Spanish),",
"it (Italian),",
"pt (Portuguese),",
"fr (French),",
"tr (Turkish),",
"NEW (NEWLANGUAGE),",
"en (English),",
);
my $html_comment = "\n";
sub init();
sub session_reinit();
sub getPreferedLanguage(@);
sub parseURL($);
sub parseQuery($);
sub session_reinit ()
{
$admin = 'unknown';
$clientaddr = 'unknown';
$clientname = 'unknown';
$clientuser = 'unknown';
$clientgroup = 'unknown';
$targetgroup = 'unknown';
$color = 'orange';
$size = 'normal';
$mode = 'default';
$httpcode = '200';
$url = 'unknown';
$origurl = 'unknown';
$lang = 'unknown';
$ufdbrequesturi = '';
}
sub init ()
{
$lang = getPreferedLanguage( @languages );
( $httpcode, $admin, $clientaddr, $clientname, $clientuser, $clientgroup, $category, $url ) =
parseQuery( $ENV{"QUERY_STRING"} );
}
#
# Find the first supported language of the client.
#
sub getPreferedLanguage (@)
{
my @supported = @_;
my @clientLanguages = split(/\s*,\s*/,$ENV{"HTTP_ACCEPT_LANGUAGE"}) if(defined($ENV{"HTTP_ACCEPT_LANGUAGE"}));
my $lang;
my $supp;
### NO! push(@clientLanguages,$supported[0]);
for $lang (@clientLanguages)
{
$lang =~ s/\s.*//;
$lang =~ s/-.*//;
for $supp (@supported)
{
$supp =~ s/\s.*//;
return($lang) if ($lang eq $supp);
}
}
return 'en'; # default language is 'en'
}
sub parseQuery ($)
{
my $query = shift;
my $admin = 'The system administrator.';
my $clientaddr = '';
my $clientname = '';
my $clientuser = '';
my $clientgroup = '';
my $category = '';
my $httpcode = '200';
my $url = 'undefined';
my $therequest = '';
my $dummy;
$ufdbservername = '';
$ufdbhost = '';
$ufdbrequesturi = '';
if (defined($query))
{
while ($query =~ /^\&?([^\&=]+)=\"([^\"]*)\"(.*)/ ||
$query =~ /^\&?([^\&=]+)=([^\&=]*)(.*)/)
{
my $key = $1;
my $value = $2;
$value = '??' unless(defined($value) && $value && $value ne '??');
$query = $3;
if ($key =~ /^(admin|clientaddr|clientname|clientuser|clientident|clientgroup|category|targetgroup|color|size|source|srcclass|targetclass|mode|httpcode|ufdbhost|ufdbscripturi|ufdbredirscripturi|ufdbsni|ufdbservername|ufdbrequesturi|ufdbrefurl|url)$/)
{
$escaped_ufdbrequesturi = $value if ($key eq 'ufdbrequesturi');
$escaped_url = $value if ($key eq 'url');
$value =~ s/%20/ /g;
$value =~ s/%22/"/g;
$value =~ s/%23/#/g;
$value =~ s/%24/\$/g;
$value =~ s/%26/\&/g;
$value =~ s/%27/\?/g;
$value =~ s/%2B/\+/ig;
$value =~ s/%2C/,/ig;
$value =~ s/%2F/\//ig;
$value =~ s/%3A/:/ig;
$value =~ s/%3B/;/ig;
$value =~ s/%3C//ig;
$value =~ s/%3F/?/ig;
$value =~ s/%40/\@/ig;
$value =~ s/%5C/\\/ig;
$value =~ s/%25/%/g; # must be last
$key = 'clientgroup' if ($key eq 'source' || $key eq 'srcclass');
$key = 'clientuser' if ($key eq 'clientident');
$key = 'category' if ($key eq 'targetgroup' || $key eq 'targetclass');
eval "\$$key = \$value";
}
if ($query =~ /^url=(.*)/)
{
$url = $1;
last;
}
}
}
$dummy = $ENV{'HTTP_X_BLOCKED_URL'};
$url = $dummy if defined $dummy;
$dummy = $ENV{'HTTP_X_BLOCKED_CATEGORY'};
$targetgroup = $dummy if defined $dummy;
if ($url eq 'undefined')
{
if ($ufdbservername ne '')
{
$url = 'https://' . $ufdbservername;
}
elsif ($ufdbhost ne '')
{
$url = 'https://' . $ufdbhost;
}
if ($url ne 'undefined' && $ufdbrequesturi ne '')
{
$ufdbrequesturi =~ s/\?.*//;
$url .= $ufdbrequesturi;
}
}
$origurl = $url;
$url = substr( $url, 0, 120 ) . '...' if (length($url) > 120);
$ufdbrefurl = $ENV{'HTTP_REFERER'};
if (1)
{
my $dbgf = '/tmp/debug-cgibin-cgi'; # '/local/websites/logs/cgidebug';
open DEBUG, ">> $dbgf";
print DEBUG "\n\nurl = $url\n";
print DEBUG "lang = $lang\n";
foreach my $key ('HTTP_HOST', 'HTTP_REFERER', 'HTTP_USER_AGENT', 'REMOTE_ADDR', 'REQUEST_METHOD',
'HTTP_ACCEPT_CHARSET', 'HTTP_ACCEPT_LANGUAGE', 'HTTP_CONTENT_TYPE', 'QUERY_STRING') {
my $value = $ENV{$key};
$value = 'undefined' if !defined($value);
print DEBUG "$key = $value\n";
}
if (0)
{
print DEBUG ":\n";
foreach my $key (sort(keys(%ENV))) {
print DEBUG "$key = $ENV{$key}\n";
}
close DEBUG;
}
}
return ( $httpcode, $admin, $clientaddr, $clientname, $clientuser, $clientgroup, $category, $url );
}
sub parseURL ($)
{
my $url = shift;
my $protocol = "";
my $address = "";
my $port = "";
my $path = "";
$url =~ /^([^\/:]+):\/\/([^\/:]+)(:\d*)?(.*)/;
$protocol = $1 if(defined($1));
$address = $2 if(defined($2));
$port = $3 if(defined($3));
$path = $4 if(defined($4));
return ( $protocol, $address, $port, $path );
}
sub getContentType( $ )
{
my $url = shift;
my $suffix;
my $path;
$url =~ s/[;\?\&].*//;
$url =~ s/^(ftp|http|https):\/\///;
$path = $url;
$path =~ s/^[^\/]*//;
$suffix = $path;
$suffix =~ s/.*\././;
return CT_IMAGE if ($suffix =~ /\.(bmp|gif|ico|jpg|jpeg|jpe|png|webp|tiff)$/i);
return CT_CSS if ($suffix =~ /\.css$/i);
return CT_JSON if ($suffix =~ /\.json$/i);
return CT_JAVA if ($suffix =~ /\.(js|jar)$/i);
return CT_TEXT if ($suffix =~ /\.(csv|txt)$/i);
return CT_HTML if ($suffix =~ /\.(htm|html|dhtml|shtml)$/i);
return CT_XML if ($suffix =~ /\.(xml|sxml|rss)$/i);
return CT_STREAM if ($suffix =~ /\.(bin|bz2|cab|class|dat|doc|gz|h264|mp3|mpg|mpeg|msi|mst|ppt|pdf|rar|tar|ttf|xls|zip|ogv|divx|xvid|qt|ra|ram|rv|wmv|avi|mov|swf|mp4|mv4|flv)$/i);
# no suffix found, now we start with the guesswork
return CT_HTML if ($path eq '/' || $path eq '');
return CT_IMAGE if ($url =~ /^googleadservices\.com\/pagead\/conversion\// );
return CT_TEXT if ($url =~ /^googleads\.g\.doubleclick\.net\/pagead\/ads\// );
return CT_JAVA if ($url =~ /^pubads\.g\.doubleclick\.net\/pagead\/ads\// );
return CT_JAVA if ($url eq 'a.analytics.yahoo.com/fpc.pl' );
return CT_IMAGE if ($url eq 'a.analytics.yahoo.com/p.pl' );
return CT_IMAGE if ($url eq 'ping.chartbeat.net/ping' );
if ($url =~ '^www\.youtube\.com')
{
return CT_STREAM if ($path =~ /^\/cp\// || $path =~ /^\/p\// ||
$path =~ /^\/v\// || $path =~ /^\/videoplayback/ );
}
return CT_204 if ($url =~ /^s\.youtube\.com\/api\/stats\//);
return CT_IMAGE if ($url =~ /^b\.scorecardresearch\.com\// );
return CT_JAVA if ($url =~ /\.doubleclick\.net\/adj\// ||
$url =~ /\.doubleclick\.net\/pfadj\// );
return CT_IMAGE if ($url =~ /\.doubleclick\.net\/imp/ );
return CT_IMAGE if ($url =~ /\.tradedoubler\.com\/imp/ );
if ($url =~ /^view\.atdmt\.com\//)
{
return CT_IMAGE if ($path =~ /^\/action\// );
return CT_JAVA if ($path =~ /^\/jview\// );
}
return CT_JAVA if ($url eq 'static.ak.connect.facebook.com/connect.php' );
return CT_IMAGE if ($url eq 'secure-us.imrworldwide.com/cgi-bin/m' );
return CT_IMAGE if ($url =~ /ftjcfx\.com\/image-/ );
return CT_IMAGE if ($url =~ /lduhtrp\.net\/image-/ );
return CT_IMAGE if ($url =~ /img\.pheedo\.com\/img\.phdo/ );
if ($path =~ /\/realmedia\/ads\//i )
{
return CT_JAVA if ($path =~ /\/adstream_jx/ || $path =~ /\/adstream_mjx/ );
return CT_IMAGE if ($path =~ /\/adstream_lx/ || $path =~ /\/adstream_nx/ );
return CT_IMAGE if ($path =~ /\/ads\/cap\.cgi/ );
}
return CT_JAVA if ($url =~ /overture\.com\/ls_js_/ );
return CT_IMAGE if ($path =~ /\/scripts\/beacon\.dll/ || $path =~ /\/scripts\/beacon2\.dll/ );
return CT_IMAGE if ($url eq 'rtd.tubemogul.com/upi/');
return CT_JAVA if ($path =~ /\/javascript\// || $path =~ /\/ajaxpro\// );
return CT_JAVA if ($path =~ /\/js\.php$/ || $path =~ /\/javascript\.php$/ );
return CT_CSS if ($path =~ /\/css\.php$/ );
return CT_IMAGE if ($path =~ /\/image\.php$/ || $path =~ /\/image\.php\// );
return CT_JAVA if ($path =~ /\/js\.ng\// || $path =~ /\/js\// );
return CT_JAVA if ($path =~ /\/scripts\// || $path =~ /\/script\// );
return CT_XML if ($url =~ /^xml\./ );
if ($path =~ /\/b\/ss\// )
{
return CT_IMAGE if ($path =~ /\/FAS/i || $path =~ /\/H\./i || $path =~ /\/G\./i );
}
return CT_JAVA if ($url =~ /\.channel\.facebook\.com\/x\// );
return CT_TEXT if ($url =~ /\.channel\.facebook\.com\/p/ );
return CT_IMAGE if ($url eq 'www.facebook.com/fr/u.php' );
return CT_IMAGE if ($url eq 'pixel.mathtag.com/event/img' );
return CT_JAVA if ($url eq 'pixel.mathtag.com/event/js' );
return CT_IMAGE if ($url eq 'x.bidswitch.net/ul_cb/sync' );
return CT_XML if ($path =~ /\/xml-rpc/ );
return CT_STREAM if ($path eq 'open/1');
return CT_IMAGE if ($url =~ /^pixel\./ || $path =~ /\/pixel$/ );
return CT_TEXT if ($url =~ /heatmap/ );
return CT_204 if ($url eq 'analytics.livestream.com/track');
return CT_HTML;
}
# comment out the next line if fastcgi is not configured
### while (new CGI::Fast)
{
my $time = time;
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
my $root;
my $buffer;
session_reinit();
init();
# the default is english
my $forbidden = 'no access';
my $title = $forbidden;
my $explanation_prefix = 'URL blocked because it is';
my $explanation_suffix = '';
my $go_back = 'back';
my $more_info = 'More information about ufdbGuard is here.';
my $contentType = getContentType( $origurl );
$contentType = CT_204 if ($httpcode eq '204');
if ($contentType == CT_IMAGE)
{
print "Content-Type: image/png\n";
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime( $time + 180 );
printf "Expires: %s, %02d %s %04d %02d:%02d:%02d GMT\n",
$day[$wday], $mday, $month[$mon], $year+1900, $hour, $min, $sec;
print "\n";
my $imgfile;
if ($category eq 'ads')
{
$imgfile = "transparent.png";
$imgfile = "no-ads.png" if ($mode eq 'noads');
$imgfile = "smallcross.png" if ($mode eq 'cross');
$imgfile = "square.png" if ($mode eq 'square');
}
else
{
if ($mode eq 'cross') {
$imgfile = "smallcross.png" if ($mode eq 'cross');
}
elsif ($mode eq 'square') {
$imgfile = "square.png" if ($mode eq 'square');
}
elsif ($mode eq 'simple-red' || $mode eq 'transparent' || $mode eq 'transparant') {
$imgfile = "transparent.png";
}
else {
$imgfile = "forbidden-normal-" . $lang . ".png";
}
}
$root = $ENV{'DOCUMENT_ROOT'};
open( BLOCKEDPNG, "$root/images/$imgfile" ) || print "failed to open $root/images/$imgfile\n";
print $buffer while (read (BLOCKEDPNG,$buffer,8192));
close( BLOCKEDPNG );
}
elsif ($contentType == CT_204)
{
print "Status: 204 No Content\n";
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime( $time + 180 );
print "Content-Type: text/plain\n";
printf "Expires: %s, %02d %s %04d %02d:%02d:%02d GMT\n",
$day[$wday], $mday, $month[$mon], $year+1900, $hour, $min, $sec;
printf "X-blocked-category: %s\n", $category;
printf "X-blocked-URL: %s\n", $url;
print "\n";
}
elsif ($contentType == CT_STREAM)
{
if (1)
{
print "Status: 204 no content\n";
print "Content-Type: text/plain\n";
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime( $time + 180 );
printf "Expires: %s, %02d %s %04d %02d:%02d:%02d GMT\n",
$day[$wday], $mday, $month[$mon], $year+1900, $hour, $min, $sec;
printf "X-blocked-category: %s\n", $category;
printf "X-blocked-URL: %s\n", $url;
}
else
{
print "Content-Type: application/octet-stream\n";
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime( $time + 180 );
printf "Expires: %s, %02d %s %04d %02d:%02d:%02d GMT\n",
$day[$wday], $mday, $month[$mon], $year+1900, $hour, $min, $sec;
}
print "\n";
}
elsif ($contentType == CT_JAVA)
{
print "Content-Type: application/x-javascript\n";
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime( $time + 180 );
printf "Expires: %s, %02d %s %04d %02d:%02d:%02d GMT\n",
$day[$wday], $mday, $month[$mon], $year+1900, $hour, $min, $sec;
print "\n";
print "\n";
}
elsif ($contentType == CT_JSON)
{
print "Content-Type: application/json\n";
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime( $time + 180 );
printf "Expires: %s, %02d %s %04d %02d:%02d:%02d GMT\n",
$day[$wday], $mday, $month[$mon], $year+1900, $hour, $min, $sec;
print "\n";
print "\n";
}
elsif ($contentType == CT_CSS)
{
print "Content-Type: text/css\n";
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime( $time + 180 );
printf "Expires: %s, %02d %s %04d %02d:%02d:%02d GMT\n",
$day[$wday], $mday, $month[$mon], $year+1900, $hour, $min, $sec;
print "\n";
print "\n";
}
elsif ($contentType == CT_TEXT)
{
print "Content-Type: text/plain\n";
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime( $time + 180 );
printf "Expires: %s, %02d %s %04d %02d:%02d:%02d GMT\n",
$day[$wday], $mday, $month[$mon], $year+1900, $hour, $min, $sec;
print "\n";
print "\n";
}
elsif ($contentType == CT_XML)
{
print "Content-Type: text/xml\n";
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime( $time + 180 );
printf "Expires: %s, %02d %s %04d %02d:%02d:%02d GMT\n",
$day[$wday], $mday, $month[$mon], $year+1900, $hour, $min, $sec;
print "\n";
print "\n";
print "
\n";
print "https://blockedhttps.urlfilterdb.com is used by ufdbGuard
\n";
print "to display messages about blocked URLs.
\n";
print "
\n";
print "$forbidden
$category\n";
print "why is this URL blocked?\n";
print "
\n";
print "$go_back.
\n";
print "$admin\n";
print "
\n";
print "$more_info\n";
print "
\n