Dodane skrypty do sprawdzania open proxy
A => beaubot/bot/proxy/AddressChecker.pm +82 -0
@@ 0,0 1,82 @@ 
+package AddressChecker;
+
+use strict;
+use warnings;
+use Socket;
+use Log::Any;
+
+my $logger = Log::Any::get_logger;
+
+my @hostnames = (
+	{
+		'pattern' => qr/(?:ppp|adsl)\.tpnet\.pl$/,
+		'type'    => 'dynamic',
+		'block'   => '12 hours',
+	},
+	{
+		'pattern' => qr/^(?:dial|dynamic)-.+\.dialog\.net\.pl$/,
+		'type'    => 'dynamic',
+	},
+	{
+		'pattern' => qr/\.dynamic\.t-mobile\.pl$/,
+		'type'    => 'dynamic',
+	},
+	{
+		'pattern' => qr/^user-\d+-\d+-\d+-\d+\.play-internet\.pl$/,
+		'type'    => 'dynamic',
+	},
+	{
+		'pattern' => qr/\.adsl\.inetia\.pl$/,
+		'type'    => 'dynamic',
+		'block'   => '12 hours',
+	},
+	{
+		'pattern' => qr/\.gprs.*\.plus(?:gsm)?\.pl$/,
+		'type'    => 'dynamic',
+	},
+	{
+		'pattern' => qr/-gprs.+\.centertel\.pl$/,
+		'type'    => 'dynamic',
+	},
+	{
+		'pattern' => qr/dynamic\.chello\.pl$/,
+		'type'    => 'dynamic',
+		'block'   => '4 months',
+	},
+	{
+		'pattern' => qr/dynamic/,
+		'type'    => 'dynamic',
+		'block'   => '2 months',
+	},
+);
+
+sub check {
+	my $address = shift;
+	$logger->debug("Checking $address");
+	my $result = {};
+	my $info;
+	my $hostname = gethostbyaddr( inet_aton($address), AF_INET );
+
+	if ( defined $hostname ) {
+		$result->{hostname} = $hostname;
+		$logger->debug("The address $address resolves to $hostname");
+		foreach my $entry (@hostnames) {
+			if ( $hostname =~ $entry->{pattern} ) {
+				$info = $entry;
+				last;
+			}
+		}
+	}
+
+	if ( defined $info ) {
+		$result->{type}  = $info->{type};
+		$result->{block} = $info->{block};
+	}
+	else {
+		$result->{type}  = 'static';
+		$result->{block} = '6 months';
+	}
+	return $result;
+}
+
+1;

          
A => beaubot/bot/proxy/ProxyDatabase.pm +225 -0
@@ 0,0 1,225 @@ 
+package ProxyDatabase;
+
+use strict;
+use warnings;
+use Log::Any;
+use DBI;
+use DateTime;
+use DateTime::Format::Strptime;
+
+my $logger = Log::Any->get_logger;
+
+my $dateFormat = new DateTime::Format::Strptime(
+	pattern   => '%Y-%m-%dT%TZ',
+	time_zone => 'UTC',
+	on_error  => 'croak',
+);
+
+sub new {
+	my $caller = shift;
+	my $class = ref($caller) || $caller;
+
+	my $this = {
+		'dbh'  => undef,
+		'file' => undef,
+		@_,
+	};
+	bless $this, $class;
+
+	die "A path to a database is missing\n"
+	  unless defined $this->{file};
+
+	$this->{dbh} = DBI->connect( "dbi:SQLite:dbname=$this->{file}", "", "", { RaiseError => 1, PrintError => 0, sqlite_use_immediate_transaction => 1 } );
+
+	return $this;
+}
+
+# ------------------------------------------------------------------------------
+
+sub begin {
+	my $this = shift;
+	$this->{dbh}->begin_work;
+}
+
+sub commit {
+	my $this = shift;
+	$this->{dbh}->commit;
+}
+
+# ------------------------------------------------------------------------------
+
+sub getSetting {
+	my $this = shift;
+	my $name = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('SELECT value FROM settings WHERE name = ?');
+	$sth->execute($name);
+	my ($value) = $sth->fetchrow_array();
+	$sth->finish;
+	return $value;
+}
+
+sub setSetting {
+	my $this  = shift;
+	my $name  = shift;
+	my $value = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('INSERT OR REPLACE INTO settings(name, value) VALUES(?, ?)');
+	$sth->execute( $name, $value );
+}
+
+# ------------------------------------------------------------------------------
+
+sub insertOrIgnoreProxy {
+	my $this    = shift;
+	my $address = shift;
+	my $source  = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('INSERT OR IGNORE INTO proxies (proxy_address, proxy_added, proxy_source) VALUES (?, ?, ?)');
+	$sth->execute( $address, $dateFormat->format_datetime( DateTime->now ), $source );
+}
+
+sub fetchProxiesToCheck {
+	my $this  = shift;
+	my $since = shift;
+	my $limit = shift || 50;
+
+	my $sth = $this->{dbh}->prepare_cached( "
+	SELECT proxy_id, proxy_address
+	FROM proxies
+	WHERE proxy_checked IS NULL OR proxy_checked < ?
+	ORDER BY proxy_checked, proxy_id DESC
+	LIMIT $limit
+" );
+	$sth->execute( $dateFormat->format_datetime($since) );
+
+	my @list;
+
+	while ( my $row = $sth->fetchrow_hashref ) {
+		push @list, $row;
+	}
+	return @list;
+}
+
+# ------------------------------------------------------------------------------
+
+sub insertCheckResult {
+	my $this    = shift;
+	my $proxy   = shift;
+	my $status  = shift;
+	my $type    = shift;
+	my $address = shift;
+
+	my $timestamp = $dateFormat->format_datetime( DateTime->now );
+	my $insertSth = $this->{dbh}->prepare_cached('INSERT INTO checks (check_proxy, check_timestamp, check_status, check_type, check_address) VALUES (?, ?, ?, ?, ?)');
+	$insertSth->execute( $proxy, $timestamp, $status, $type, $address );
+	my $updateSth = $this->{dbh}->prepare_cached('UPDATE proxies SET proxy_checked = ? WHERE proxy_id = ?');
+	$updateSth->execute( $timestamp, $proxy );
+}
+
+# ------------------------------------------------------------------------------
+
+sub insertOrIgnoreBlock {
+	my $this   = shift;
+	my $target = shift;
+	my $start  = shift;
+	my $expiry = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('INSERT OR IGNORE INTO blocks (block_address, block_start, block_expiry) VALUES (?, ?, ?)');
+	$sth->execute( $target, $dateFormat->format_datetime($start), $expiry );
+}
+
+sub removeBlock {
+	my $this    = shift;
+	my $blockId = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('DELETE FROM blocks WHERE block_id = ?');
+	$sth->execute($blockId);
+}
+
+sub fetchBlocks {
+	my $this    = shift;
+	my $blockId = shift;
+	my $limit   = shift || 50;
+
+	my $sth = $this->{dbh}->prepare_cached("SELECT block_id AS id, block_address AS address, block_start AS start, block_expiry AS expiry FROM blocks ORDER BY block_id ASC LIMIT $limit");
+	$sth->execute;
+
+	my @list;
+	while ( my $row = $sth->fetchrow_hashref ) {
+		$row->{start} = $dateFormat->parse_datetime( $row->{start} );
+		push @list, $row;
+	}
+	return @list;
+}
+
+# ------------------------------------------------------------------------------
+
+my @sessionChars = ( 'A' .. 'Z', 'a' .. 'z', '0' .. '9' );
+
+sub generateSessionId {
+	my $result = '';
+
+	for ( my $i = 0 ; $i < 32 ; $i++ ) {
+		$result .= $sessionChars[ int( rand( scalar @sessionChars ) ) ];
+	}
+	return $result;
+}
+
+sub createSession {
+	my $this = shift;
+
+	my $sth   = $this->{dbh}->prepare_cached('INSERT INTO sessions (session_id) VALUES (?)');
+	my $count = 0;
+	while ( $count < 100 ) {
+		my $sessionId = generateSessionId();
+		eval {    #
+			$sth->execute($sessionId);
+		};
+		my $error = $@;
+		if ($error) {
+			die $error
+			  unless $error =~ /column session_id is not unique/;
+		}
+		else {
+			return $sessionId;
+		}
+		$count++;
+	}
+	die "Unable to generate sessionId\n";
+}
+
+sub destroySession {
+	my $this      = shift;
+	my $sessionId = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('DELETE FROM sessions WHERE session_id = ?');
+	$sth->execute($sessionId);
+}
+
+sub getSessionAddress {
+	my $this      = shift;
+	my $sessionId = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('SELECT session_address FROM sessions WHERE session_id = ?');
+
+	$sth->execute($sessionId);
+	my ($address) = $sth->fetchrow_array;
+	$sth->finish;
+	return $address;
+}
+
+sub setSessionAddress {
+	my $this      = shift;
+	my $sessionId = shift;
+	my $address   = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('UPDATE sessions SET session_address = ? WHERE session_id = ? AND session_address IS NULL');
+	$sth->execute( $address, $sessionId );
+
+	return $sth->rows;
+}
+
+# ------------------------------------------------------------------------------
+
+1;

          
A => beaubot/bot/proxy/check-ip.pl +36 -0
@@ 0,0 1,36 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use DateTime;
+use AddressChecker;
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my $sth = $dbh->{dbh}->prepare_cached('SELECT * FROM checks WHERE check_status <> 0');
+$sth->execute;
+while ( my $row = $sth->fetchrow_hashref ) {
+	my $result = AddressChecker::check( $row->{check_address} );
+	if ( $logger->is_debug ) {
+		$logger->debug( "Result:\n" . Dumper( [ $row, $result ] ) );
+	}
+	if ( defined $result->{block} ) {
+		$logger->info("Placing block on $row->{check_address} with expiry: $result->{block}");
+		$dbh->insertOrIgnoreBlock( $row->{check_address}, DateTime->now(), $result->{block} );
+	}
+
+}

          
A => beaubot/bot/proxy/log4perl.conf +10 -0
@@ 0,0 1,10 @@ 
+log4perl.oneMessagePerAppender = 1
+
+log4perl.rootLogger=INFO, console
+
+log4perl.category.main = DEBUG, console
+
+log4perl.appender.console=Log::Log4perl::Appender::Screen
+log4perl.appender.console.layout=Log::Log4perl::Layout::PatternLayout
+log4perl.appender.console.layout.ConversionPattern=[%d{dd-MM-yyyy HH:mm:ss}][%P] %p %m%n
+log4perl.appender.console.utf8=1

          
A => beaubot/bot/proxy/proxy-block.pl +73 -0
@@ 0,0 1,73 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setProject( 'wikipedia', 'pl', 'sysop' );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my $api = $bot->getApi;
+$api->checkAccount;
+
+while ( my @list = $dbh->fetchBlocks ) {
+	foreach my $block (@list) {
+		$logger->info("Processing block request of $block->{address}");
+
+		my $blocked = 0;
+		{
+			my $iterator = $api->getIterator(
+				'action' => 'query',
+				'list'   => 'blocks',
+				'bkip'   => $block->{address},
+			);
+
+			while ( my $entry = $iterator->next ) {
+				$blocked++;
+				$logger->info( "Active blocks of $block->{address}:\n" . Dumper($entry) )
+				  if $logger->is_info;
+			}
+		}
+		{
+			my $iterator = $api->getIterator(
+				'action' => 'query',
+				'list'   => 'globalblocks',
+				'bgip'   => $block->{address},
+			);
+
+			while ( my $entry = $iterator->next ) {
+				$blocked++;
+				$logger->info( "Active global blocks of $block->{address}:\n" . Dumper($entry) )
+				  if $logger->is_info;
+			}
+		}
+		if ($blocked) {
+			$logger->info("$block->{address} is already blocked");
+		}
+		else {
+			$logger->info("Blocking $block->{address} with expiry $block->{expiry}");
+			$api->block(
+				'anononly' => 1,
+				'nocreate' => 1,
+				'user'     => $block->{address},
+				'reason'   => '[[WP:OP|open proxy]]',
+				'expiry'   => $block->{expiry},
+			);
+
+		}
+		$dbh->removeBlock( $block->{id} );
+	}
+}

          
A => beaubot/bot/proxy/proxy-check-verifier.pl +29 -0
@@ 0,0 1,29 @@ 
+#!/usr/bin/perl
+
+use strict;
+use utf8;
+use CGI;
+use ProxyDatabase;
+use Env;
+use FindBin qw($RealBin);
+
+binmode STDOUT, ":utf8";
+
+my $db         = "$RealBin/../var/proxy.sqlite";
+my $sessionId  = CGI::param('id');
+my $remoteAddr = $ENV{REMOTE_ADDR};
+
+sub error {
+	print "Content-type: text/html; charset=utf-8\n\nERROR";
+	exit(0);
+}
+
+error()
+  unless defined $sessionId and defined $remoteAddr;
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+error()
+  unless $dbh->setSessionAddress( $sessionId, $remoteAddr );
+
+print "Content-type: text/html; charset=utf-8\n\nSUCCESS";

          
A => beaubot/bot/proxy/proxy-check-worker.pl +298 -0
@@ 0,0 1,298 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use ProxyDatabase;
+use AddressChecker;
+use LWP::UserAgent;
+use WWW::Mechanize;
+use IO::Handle;
+use Storable qw(freeze thaw);
+use File::Spec;
+use Log::Any;
+use Log::Any::Adapter;
+use LWP::Protocol::socks;
+
+binmode STDIN;
+binmode STDOUT;
+STDOUT->autoflush(1);
+
+my $db      = "$RealBin/../var/proxy.sqlite";
+my $testUrl = 'http://94.23.242.48/~beau/cgi-bin/verify.pl?id=$sessionId';
+my $agent   = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7';
+
+my %ignoredAddresses = map { $_ => 1 } qw(94.23.242.48 178.33.53.23 178.32.200.207 127.0.0.1);
+
+# Send all logs to Log::Log4perl
+use Log::Log4perl;
+Log::Log4perl->init( File::Spec->join( $RealBin, 'log4perl.conf' ) );
+Log::Any::Adapter->set('Log4perl');
+
+my $logger = Log::Any::get_logger;
+$logger->info("Worker is being started");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+sub checkProxyLwp {
+	my $info  = shift;
+	my $proxy = shift;
+
+	$logger->info("Checking proxy '$proxy' using LWP::UserAgent");
+	my $result = {
+		'address' => undef,
+		'status'  => 0,
+	};
+	my $sessionId = $dbh->createSession();
+	eval {
+		my $ua = LWP::UserAgent->new(
+			'agent'   => $agent,
+			'timeout' => 30,
+		);
+		$ua->proxy( [ 'http', 'https' ] => $proxy );
+
+		my $url = $testUrl;
+		$url =~ s/\$sessionId/$sessionId/g;
+
+		$ua->get($url);
+		my $address = $dbh->getSessionAddress($sessionId);
+
+		$result->{address} = $address;
+		$result->{status} = defined $address ? 1 : 0;
+	};
+	my $error = $@;
+	$dbh->destroySession($sessionId);
+	die $error
+	  if $error;
+
+	return $result;
+}
+
+sub checkHttpProxy {
+	my $info = shift;
+
+	# Do not check proxies without a specified port
+	return { status => 0 }
+	  unless defined $info->{port};
+
+	my $result = checkProxyLwp( $info, "http://$info->{address}:$info->{port}/" );
+	$result->{type}     = 'http';
+	$result->{accepted} = '1';
+	return $result;
+}
+
+sub checkSocks4Proxy {
+	my $info = shift;
+
+	# Do not check proxies without a specified port
+	return { status => 0 }
+	  unless defined $info->{port};
+
+	my $result = checkProxyLwp( $info, "socks4://$info->{address}:$info->{port}/" );
+	$result->{type}     = 'socks4';
+	$result->{accepted} = '1';
+	return $result;
+}
+
+sub checkSocks5Proxy {
+	my $info = shift;
+
+	# Do not check proxies without a specified port
+	return { status => 0 }
+	  unless defined $info->{port};
+
+	my $result = checkProxyLwp( $info, "socks://$info->{address}:$info->{port}/" );
+	$result->{type}     = 'socks5';
+	$result->{accepted} = '1';
+	return $result;
+}
+
+sub checkWebProxy {
+	my $info = shift;
+
+	# Do not check proxies with a specified port
+	return { status => 0 }
+	  if defined $info->{port};
+
+	# Do not check non-web proxies
+	return { status => 0 }
+	  unless $info->{address} =~ m{^https?://}i;
+
+	$logger->info("Checking proxy '$info->{address}' using WWW::Mechanize");
+	my $result = {
+		'address' => undef,
+		'status'  => 0,
+	};
+	my $sessionId = $dbh->createSession();
+	eval {
+		my $mech = new WWW::Mechanize(
+			'agent'     => $agent,
+			'timeout'   => 30,
+			'autocheck' => 0,
+			'quiet'     => 1,
+		);
+
+		my $response = $mech->get( $info->{address} );
+		return unless $response->is_success;
+
+		my $form;
+		my $field;
+
+		foreach my $f ( 'u', 'q', 'url', 'rxproxyuri' ) {
+			$form  = $mech->form_with_fields($f);
+			$field = $f;
+			next if $form and $form->method eq 'SEND';
+			last if $form;
+		}
+
+		unless ( defined $form ) {
+			$logger->debug("Unable to find form");
+			return;
+		}
+		my $url = $testUrl;
+		$url =~ s/\$sessionId/$sessionId/g;
+		$mech->field( $field, $url );
+		$mech->submit;
+
+		my $address = $dbh->getSessionAddress($sessionId);
+
+		$result->{address} = $address;
+		$result->{status}  = defined $address ? 1 : 0;
+		$result->{type}    = 'web proxy';
+	};
+	my $error = $@;
+	$dbh->destroySession($sessionId);
+	die $error
+	  if $error;
+
+	$result->{accepted} = '1';
+	return $result;
+}
+
+my @methods = (    #
+	\&checkHttpProxy,
+	\&checkSocks4Proxy,
+	\&checkSocks5Proxy,
+	\&checkWebProxy,
+);
+
+sub checkProxy {
+	my $info = shift;
+
+	foreach my $method (@methods) {
+
+		# TODO: Check till no new addresses are discovered...
+		my $result;
+		for ( my $i = 0 ; $i < 3 ; $i++ ) {
+			$result = $method->($info);
+			last if $result->{status};
+			sleep(1)
+			  if $result->{accepted};
+		}
+		return $result
+		  if $result->{status};
+	}
+	return { status => 0 };
+}
+
+# ------------------------------------------------------------------------------
+
+sub sendMessage {
+	my $message = shift;
+
+	if ( $logger->is_debug ) {
+		$logger->debug( "Sending a message:\n" . Dumper($message) );
+	}
+
+	my $data = freeze($message);
+	STDOUT->print( bytes::length($data) . "\n" . $data )
+	  or die "Unable to send a message: $!\n";
+}
+
+sub readMessage {
+	my $size = readline(STDIN);
+
+	die "Unable to read a message header: $!\n"
+	  unless defined $size;
+
+	chomp($size);
+
+	my $buffer;
+	my $len = read( STDIN, $buffer, $size );
+
+	if ( $len < $size ) {
+		die "Received a corrupted message: expected $len bytes instead of $size bytes\n";
+	}
+	my $message = thaw $buffer;
+	if ( $logger->is_debug ) {
+		$logger->debug( "Received a message:\n" . Dumper($message) );
+	}
+	return $message;
+}
+
+# ------------------------------------------------------------------------------
+
+eval {
+	$logger->info("Worker is entering main loop");
+	while (1) {
+		$0 = 'proxy-check-worker: idle';
+		last
+		  if eof(STDIN);
+		my $message = readMessage();
+
+		die "Invalid message\n"
+		  unless ref($message) eq 'HASH' and defined $message->{command};
+
+		if ( $message->{command} eq 'CHECK' ) {
+			$0 = "proxy-check-worker: checking $message->{address}";
+
+			if ( $message->{address} =~ /^(.+?):(\d+)$/ ) {
+				$message->{address} = $1;
+				$message->{port}    = $2;
+			}
+
+			my $result = checkProxy($message);
+
+			if ( $result->{status} ) {
+
+				my $reject = undef;
+
+				if ( $result->{address} !~ /^\d+\.\d+\.\d+\.\d+$/ ) {
+					$reject = "not a valid IPv4 address '$result->{address}'";
+				}
+				elsif ( $ignoredAddresses{ $result->{address} } ) {
+					$reject = "ignored address '$result->{address}'";
+				}
+
+				if ( defined $reject ) {
+					$logger->info("Ignoring proxy $message->{proxy}: $reject");
+					$result = { status => 0 };
+				}
+				else {
+
+					# Check type of address (isp, country, service)
+					my $addressCheck = AddressChecker::check( $result->{address} );
+					$result->{block}       = $addressCheck->{block};
+					$result->{hostname}    = $addressCheck->{hostname};
+					$result->{addressType} = $addressCheck->{type};
+				}
+			}
+
+			$result->{proxy}   = $message->{proxy};
+			$result->{command} = 'CHECKRESULT';
+			sendMessage($result);
+		}
+		elsif ( $message->{command} eq 'QUIT' ) {
+			last;
+		}
+		else {
+			die "Unsupported command '$message->{command}'\n";
+		}
+	}
+	$logger->info("Worker is being terminated");
+};
+if ($@) {
+	$@ =~ s/\s+$//;
+	$logger->fatal("Worker is being terminated: $@");
+	exit(1);
+}

          
A => beaubot/bot/proxy/proxy-check.pl +252 -0
@@ 0,0 1,252 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use LWP::UserAgent;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Storable qw(freeze thaw);
+use Bot4;
+use AnyEvent;
+use IPC::Open2;
+use ProxyDatabase;
+use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
+
+my $maxWorkers = 10;
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my %workers;      # pid => $worker
+my %jobsTaken;    # proxy_id => $worker
+my @jobsQueue;    # list of { proxy_id, proxy_address }
+my $lastJobsFetched = clock_gettime(CLOCK_MONOTONIC) - 60;    # timestamp, when last jobs were fetched
+
+sub sendMessage {
+	my $child   = shift;
+	my $message = shift;
+
+	if ( $logger->is_debug ) {
+		$logger->debug( "Sending a message to a worker $child->{pid}:\n" . Dumper($message) );
+	}
+
+	my $data = freeze($message);
+	$child->{in}->print( bytes::length($data) . "\n" . $data )
+	  or die "Unable to send a message to a worker $child->{pid}: $!\n";
+}
+
+sub readMessage {
+	my $child = shift;
+
+	my $size = readline( $child->{out} );
+
+	die "Unable to read from a worker $child->{pid}: $!\n"
+	  unless defined $size;
+
+	chomp($size);
+
+	my $buffer;
+	my $len = read( $child->{out}, $buffer, $size );
+
+	if ( $len < $size ) {
+		die "Received a corrupted message from a worker $child->{pid}: expected $len bytes instead of $size bytes\n";
+	}
+	my $message = thaw $buffer;
+	if ( $logger->is_debug ) {
+		$logger->debug( "Received a message from a worker $child->{pid}:\n" . Dumper($message) );
+	}
+	return $message;
+}
+
+sub killWorker {
+	my $child = shift;
+	$logger->info("Killing worker $child->{pid}");
+	unassignJob($child);
+	undef $child->{childWatcher};
+	undef $child->{ioWatcher};
+	kill 'KILL', $child->{pid}
+	  unless defined $child->{dead};
+	close( $child->{in} );
+	close( $child->{out} );
+	delete $workers{ $child->{pid} };
+}
+
+sub spawnWorker {
+	my $child = {
+		'pid' => undef,
+		'in'  => undef,
+		'out' => undef,
+	};
+	$child->{pid} = open2( $child->{out}, $child->{in}, 'perl', 'proxy-check-worker.pl' );
+	binmode $child->{out}
+	  if $child->{out};
+	binmode $child->{in}
+	  if $child->{in};
+
+	$child->{childWatcher} = AnyEvent->child(
+		pid => $child->{pid},
+		cb  => sub {
+			my ( $pid, $status ) = @_;
+			$logger->info("Worker $child->{pid} has exited with status: $status");
+			$child->{dead} = $status;
+			killWorker($child);
+		},
+	);
+
+	$child->{ioWatcher} = AnyEvent->io(
+		'fh'   => $child->{out},
+		'poll' => 'r',
+		'cb'   => sub {
+			eval {
+				my $message = readMessage($child);
+				processMessage( $child, $message );
+			};
+			if ($@) {
+				$@ =~ s/\s+$//;
+				$logger->error($@);
+				killWorker($child);
+				return;
+			}
+		}
+	);
+
+	$workers{ $child->{pid} } = $child;
+	return $child;
+}
+
+sub fetchJobs {
+	my $time = clock_gettime(CLOCK_MONOTONIC);
+	return if $time - $lastJobsFetched < 59;
+	$logger->debug("Fetching proxies to check");
+	my $dt = DateTime->now;
+	$dt -= DateTime::Duration->new( 'months' => 1 );
+	@jobsQueue = grep { !exists $jobsTaken{ $_->{proxy_id} } } $dbh->fetchProxiesToCheck( $dt, 200 );
+	$lastJobsFetched = $time;
+}
+
+sub sendJob {
+	my $worker = shift;
+
+	fetchJobs unless @jobsQueue;
+	my $job = shift @jobsQueue;
+	return unless $job;
+
+	eval {
+		sendMessage(
+			$worker,
+			{
+				'command' => 'CHECK',
+				'proxy'   => $job->{proxy_id},
+				'address' => $job->{proxy_address},
+			}
+		);
+
+		$worker->{job}      = $job;
+		$worker->{jobStart} = clock_gettime(CLOCK_MONOTONIC);
+
+		$jobsTaken{ $job->{proxy_id} } = $worker;
+	};
+	my $error = $@;
+	if ($error) {
+		unshift @jobsQueue, $job;
+		$logger->error("Unable to send a job to a worker $worker->{pid}: $error");
+		killWorker($worker);
+	}
+}
+
+sub unassignJob {
+	my $worker = shift;
+
+	my $job = $worker->{job};
+	return unless defined $job;
+
+	$worker->{job}     = undef;
+	$worker->{jobTime} = undef;
+	delete $jobsTaken{ $job->{proxy_id} };
+}
+
+sub processMessage {
+	my $worker  = shift;
+	my $message = shift;
+
+	die "Invalid message\n"
+	  unless ref($message) eq 'HASH' and defined $message->{command};
+
+	if ( $message->{command} eq 'CHECKRESULT' ) {
+		if ( $message->{proxy} != $worker->{job}->{proxy_id} ) {
+			die "Invalid job id, got: $message->{proxy}, expected: $worker->{job}->{proxy_id}\n";
+		}
+		unassignJob($worker);
+
+		$dbh->insertCheckResult( @{$message}{ 'proxy', 'status', 'type', 'address' } );
+
+		# If the address qualifies for blocking append block request to the queue
+		if ( defined $message->{block} ) {
+			$dbh->insertOrIgnoreBlock( $message->{address}, DateTime->now(), $message->{block} );
+		}
+		sendJob($worker);
+	}
+	else {
+		die "Unsupported command '$message->{command}'\n";
+	}
+}
+
+sub min ($$) {
+	return $_[ $_[0] > $_[1] ];
+}
+
+my $quit = AnyEvent->condvar;
+
+my $jobs = AnyEvent->timer(
+	after    => 0,
+	interval => 60,
+	cb       => sub {
+		unless (@jobsQueue) {
+
+			# Empty job queue, fetch more
+			fetchJobs;
+		}
+
+		# Spawn workers if needed
+		my $currentWorkers = scalar keys %workers;
+		my $workersToSpawn = min( $maxWorkers - $currentWorkers, scalar @jobsQueue );
+
+		if ( $workersToSpawn > 0 ) {
+			$logger->info("Spawning workers: $workersToSpawn");
+			for ( my $i = 0 ; $i < $workersToSpawn ; $i++ ) {
+				spawnWorker;
+			}
+		}
+		my $time = clock_gettime(CLOCK_MONOTONIC);
+		foreach my $worker ( values %workers ) {
+			if ( $worker->{job} ) {
+
+				# Check job time
+				if ( $time - $worker->{jobStart} > 3600 ) {
+					killWorker($worker);
+				}
+			}
+			else {
+
+				# Assign job to worker
+				sendJob($worker);
+			}
+		}
+	},
+);
+
+eval {    #
+	$quit->recv;
+};
+if ($@) {
+	$@ =~ s/\s+$//;
+	$logger->fatal($@);
+}

          
A => beaubot/bot/proxy/proxy-import-centrump2p.pl +57 -0
@@ 0,0 1,57 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+$mech->get('http://prx.centrump2p.com/');
+
+sub extractLinks {
+	my @links = $mech->content =~ /<td class="a1"><a href="([^"]+)/g;
+	die "Unable to find links\n"
+	  unless @links;
+
+	$dbh->begin;
+	foreach my $link (@links) {
+		$link = decode_entities($link);
+		$logger->info("Enqueueing $link for scan");
+		$dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+
+	}
+	$dbh->commit;
+}
+
+die "Unable to find pager\n"
+  unless $mech->content =~ m{<div id="pager">.+<a href="/(\d+)">\d+</a>\s*</div>}s;
+
+my $count = $1;
+
+die "Number of pages is too high ($count)\n"
+  if $count > 100;
+
+extractLinks;
+
+for ( my $i = 2 ; $i < $count ; $i++ ) {
+	sleep( int( rand(10) ) + 5 );
+	$logger->info("Checking page $i of $count");
+	$mech->get("http://prx.centrump2p.com/$i");
+	extractLinks;
+}

          
A => beaubot/bot/proxy/proxy-import-hinky.pl +57 -0
@@ 0,0 1,57 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+$mech->get('http://mrhinkydink.com/proxies.htm');
+
+sub extractLinks {
+	my @links = $mech->content =~ m{<td>(\d+\.\d+\.\d+\.\d+)(?:<sup>\*</sup>)?</td>\s*<td>(\d+)</td>}g;
+	die "Unable to find links\n"
+	  unless @links;
+
+	$dbh->begin;
+	while (@links) {
+		my ( $address, $port ) = splice @links, 0, 2;
+		my $link = "$address:$port";
+		$logger->info("Enqueueing $link for scan");
+		$dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+	}
+	$dbh->commit;
+}
+
+die "Unable to find pager\n"
+  unless $mech->content =~ m{<a href="proxies(\d+)\.htm">\[\d+\]</a>&nbsp;\s*</td>}s;
+
+my $count = $1;
+
+die "Number of pages is too high ($count)\n"
+  if $count > 100;
+
+extractLinks;
+
+for ( my $i = 2 ; $i < $count ; $i++ ) {
+	sleep( int( rand(10) ) + 5 );
+	$logger->info("Checking page $i of $count");
+	$mech->get("http://mrhinkydink.com/proxies$i.htm");
+	extractLinks;
+}

          
A => beaubot/bot/proxy/proxy-import-procseebot.pl +88 -0
@@ 0,0 1,88 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use DateTime;
+use DateTime::Format::Strptime;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+
+my $db   = "$RealBin/../var/proxy.sqlite";
+my $user = 'ProcseeBot';
+my $bot  = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setProject( 'wikipedia', 'en' );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $dateFormat = new DateTime::Format::Strptime(
+	pattern   => '%Y-%m-%dT%TZ',
+	time_zone => 'UTC',
+	on_error  => 'croak',
+);
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my $lastTimestamp = $dbh->getSetting('procseebot.last.timestamp');
+
+my $api  = $bot->getApi;
+my %args = (               #
+	'action'   => 'query',
+	'list'     => 'logevents',
+	'leaction' => 'block/block',
+	'leuser'   => 'ProcseeBot',
+	'ledir'    => 'newer',
+	'lelimit'  => 'max',
+);
+$args{lestart} = $lastTimestamp
+  if defined $lastTimestamp;
+
+my $iterator = $api->getIterator(%args);
+my $count    = 0;
+
+$dbh->begin;
+while ( my $entry = $iterator->next ) {
+	die "Invalid user '$entry->{user}'\n"
+	  unless $entry->{user} eq $user;
+
+	die "Invalid title '$entry->{title}'\n"
+	  unless $entry->{title} =~ m/^User:(\d+\.\d+\.\d+\.\d+)$/;
+
+	my $exitAddress = $1;
+
+	die "Invalid comment '$entry->{comment}'\n"
+	  unless $entry->{comment} =~ m/^\{\{blocked proxy\}\} <!-- (?:(\S+):)?(\d+) -->$/;
+
+	my $entryAddress = defined $1 ? $1 : $exitAddress;
+	my $port = $2;
+
+	$lastTimestamp = $entry->{timestamp};
+
+	my $now    = DateTime->now;
+	my $expiry = $dateFormat->parse_datetime( $entry->{params}->{expiry} );
+
+	if ( $expiry < $now ) {
+		$logger->info("Ignoring expired block $exitAddress ($entryAddress:$port)");
+	}
+	else {
+		$logger->info("Enqueueing $entryAddress:$port for scan");
+		$dbh->insertOrIgnoreProxy( "$entryAddress:$port", $user );
+	}
+
+	$lastTimestamp = $entry->{timestamp};
+
+	$count++;
+	if ( $count % 500 == 0 ) {
+		$dbh->setSetting( 'procseebot.last.timestamp', $lastTimestamp );
+		$dbh->commit;
+		$dbh->begin;
+	}
+}
+$dbh->setSetting( 'procseebot.last.timestamp', $lastTimestamp );
+$dbh->commit;

          
A => beaubot/bot/proxy/proxy-import-proxyorg.pl +46 -0
@@ 0,0 1,46 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+$mech->get('http://proxy.org/proxy.js');
+
+my $content = $mech->content;
+
+my @links = $content =~ /value="([^"' ]+)"/g;
+@links = grep { $_ ne 'random' } @links;
+die "Unable to find links\n"
+  unless @links;
+
+$dbh->begin;
+foreach my $link (@links) {
+	$link = decode_entities($link);
+	$link = "http://$link"
+	  unless $link =~ '://';
+
+	next if $link eq 'http://';
+
+	$logger->info("Enqueueing $link for scan");
+	$dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+
+}
+$dbh->commit;

          
A => beaubot/bot/proxy/proxy-import-xeronet.pl +74 -0
@@ 0,0 1,74 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+#$mech->proxy( [ 'http', 'https' ] => '' );
+
+my %links;
+my @pages = (    #
+	'http://www.xeronet-proxy-list.net/type/other_proxy',
+	'http://www.xeronet-proxy-list.net/type/glype_proxy',
+	'http://www.xeronet-proxy-list.net',
+	#'http://www.proxywebsitelist.in',
+	#'http://freeproxylistings.com',
+);
+
+foreach my $page (@pages) {
+	eval {
+		$logger->info("Checking $page");
+		$mech->get($page);
+
+		my $content = $mech->content;
+		my $count   = 0;
+
+		foreach my $link ( $content =~ m{<A href="[^">]+out\.php?[^">]+"[^>]*>\s*(http://[^>]+?)\s*</A>}isg ) {
+			$links{$link}++;
+			$count++;
+		}
+		if ($count) {
+			$logger->info("Fetched $count links");
+		}
+		else {
+			$logger->error("No links found on $page");
+		}
+	};
+	if ($@) {
+		$@ =~ s/\s+$//;
+		$logger->error("Unable to fetch page $page: $@");
+	}
+
+}
+
+die "Unable to find links\n"
+  unless keys %links;
+
+$dbh->begin;
+foreach my $link ( keys %links ) {
+	$link = decode_entities($link);
+	$link = "http://$link"
+	  unless $link =~ '://';
+
+	$logger->info("Enqueueing $link for scan");
+	$dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+}
+$dbh->commit;

          
A => beaubot/bot/proxy/schema.sql +45 -0
@@ 0,0 1,45 @@ 
+CREATE TABLE IF NOT EXISTS proxies (
+	proxy_id INTEGER PRIMARY KEY,
+	/* An address */
+	proxy_address TEXT UNIQUE,
+	/* A timestamp of a insert */
+	proxy_added TIMESTAMP,
+	/* A timestamp of the last check */
+	proxy_checked TIMESTAMP,
+	/* A reference to a source */
+	proxy_source TEXT
+);
+
+CREATE INDEX IF NOT EXISTS proxies_checked_idx ON proxies (proxy_checked);
+
+CREATE TABLE IF NOT EXISTS checks (
+	check_proxy INTEGER REFERENCES proxies (proxy_id),
+	/* A timestamp of the attempt */
+	check_timestamp TIMESTAMP,
+	/* A check result: 0 - not working, 1 - working */
+	check_status INTEGER,
+	/* -1 - Unknown, 0 - HTTP, 1 - SOCKS4, 2 - SOCKS5, 3 - Web Proxy */
+	check_type INTEGER,
+	/* An address of the proxy */
+	check_address TEXT,
+	PRIMARY KEY(check_proxy, check_timestamp)
+);
+
+CREATE TABLE IF NOT EXISTS sessions (
+	/* A session identifier of a check */
+	session_id TEXT PRIMARY KEY,
+	/* An address of the checked proxy */
+	session_address TEXT
+);
+
+CREATE TABLE IF NOT EXISTS blocks (
+	block_id INTEGER PRIMARY KEY,
+	block_address TEXT UNIQUE,
+	block_start TIMESTAMP,
+	block_expiry TIMESTAMP
+);
+
+CREATE TABLE IF NOT EXISTS settings (
+	name TEXT PRIMARY KEY,
+	value TEXT
+);