b2010cf66d2f — Marcin Cieślak 3 years ago
Merge local changes
A => beaubot/LICENSE +339 -0
@@ 0,0 1,339 @@ 
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.

          
A => beaubot/bot/proxy/AddressChecker.pm +82 -0
@@ 0,0 1,82 @@ 
+package AddressChecker;
+
+use strict;
+use warnings;
+use Socket;
+use Log::Any;
+
+my $logger = Log::Any::get_logger;
+
+my @hostnames = (
+	{
+		'pattern' => qr/(?:ppp|adsl)\.tpnet\.pl$/,
+		'type'    => 'dynamic',
+		'block'   => '12 hours',
+	},
+	{
+		'pattern' => qr/^(?:dial|dynamic)-.+\.dialog\.net\.pl$/,
+		'type'    => 'dynamic',
+	},
+	{
+		'pattern' => qr/\.dynamic\.t-mobile\.pl$/,
+		'type'    => 'dynamic',
+	},
+	{
+		'pattern' => qr/^user-\d+-\d+-\d+-\d+\.play-internet\.pl$/,
+		'type'    => 'dynamic',
+	},
+	{
+		'pattern' => qr/\.adsl\.inetia\.pl$/,
+		'type'    => 'dynamic',
+		'block'   => '12 hours',
+	},
+	{
+		'pattern' => qr/\.gprs.*\.plus(?:gsm)?\.pl$/,
+		'type'    => 'dynamic',
+	},
+	{
+		'pattern' => qr/-gprs.+\.centertel\.pl$/,
+		'type'    => 'dynamic',
+	},
+	{
+		'pattern' => qr/dynamic\.chello\.pl$/,
+		'type'    => 'dynamic',
+		'block'   => '4 months',
+	},
+	{
+		'pattern' => qr/dynamic/,
+		'type'    => 'dynamic',
+		'block'   => '2 months',
+	},
+);
+
+sub check {
+	my $address = shift;
+	$logger->debug("Checking $address");
+	my $result = {};
+	my $info;
+	my $hostname = gethostbyaddr( inet_aton($address), AF_INET );
+
+	if ( defined $hostname ) {
+		$result->{hostname} = $hostname;
+		$logger->debug("The address $address resolves to $hostname");
+		foreach my $entry (@hostnames) {
+			if ( $hostname =~ $entry->{pattern} ) {
+				$info = $entry;
+				last;
+			}
+		}
+	}
+
+	if ( defined $info ) {
+		$result->{type}  = $info->{type};
+		$result->{block} = $info->{block};
+	}
+	else {
+		$result->{type}  = 'static';
+		$result->{block} = '6 months';
+	}
+	return $result;
+}
+
+1;

          
A => beaubot/bot/proxy/ProxyDatabase.pm +225 -0
@@ 0,0 1,225 @@ 
+package ProxyDatabase;
+
+use strict;
+use warnings;
+use Log::Any;
+use DBI;
+use DateTime;
+use DateTime::Format::Strptime;
+
+my $logger = Log::Any->get_logger;
+
+my $dateFormat = new DateTime::Format::Strptime(
+	pattern   => '%Y-%m-%dT%TZ',
+	time_zone => 'UTC',
+	on_error  => 'croak',
+);
+
+sub new {
+	my $caller = shift;
+	my $class = ref($caller) || $caller;
+
+	my $this = {
+		'dbh'  => undef,
+		'file' => undef,
+		@_,
+	};
+	bless $this, $class;
+
+	die "A path to a database is missing\n"
+	  unless defined $this->{file};
+
+	$this->{dbh} = DBI->connect( "dbi:SQLite:dbname=$this->{file}", "", "", { RaiseError => 1, PrintError => 0, sqlite_use_immediate_transaction => 1 } );
+
+	return $this;
+}
+
+# ------------------------------------------------------------------------------
+
+sub begin {
+	my $this = shift;
+	$this->{dbh}->begin_work;
+}
+
+sub commit {
+	my $this = shift;
+	$this->{dbh}->commit;
+}
+
+# ------------------------------------------------------------------------------
+
+sub getSetting {
+	my $this = shift;
+	my $name = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('SELECT value FROM settings WHERE name = ?');
+	$sth->execute($name);
+	my ($value) = $sth->fetchrow_array();
+	$sth->finish;
+	return $value;
+}
+
+sub setSetting {
+	my $this  = shift;
+	my $name  = shift;
+	my $value = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('INSERT OR REPLACE INTO settings(name, value) VALUES(?, ?)');
+	$sth->execute( $name, $value );
+}
+
+# ------------------------------------------------------------------------------
+
+sub insertOrIgnoreProxy {
+	my $this    = shift;
+	my $address = shift;
+	my $source  = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('INSERT OR IGNORE INTO proxies (proxy_address, proxy_added, proxy_source) VALUES (?, ?, ?)');
+	$sth->execute( $address, $dateFormat->format_datetime( DateTime->now ), $source );
+}
+
+sub fetchProxiesToCheck {
+	my $this  = shift;
+	my $since = shift;
+	my $limit = shift || 50;
+
+	my $sth = $this->{dbh}->prepare_cached( "
+	SELECT proxy_id, proxy_address
+	FROM proxies
+	WHERE proxy_checked IS NULL OR proxy_checked < ?
+	ORDER BY proxy_checked, proxy_id DESC
+	LIMIT $limit
+" );
+	$sth->execute( $dateFormat->format_datetime($since) );
+
+	my @list;
+
+	while ( my $row = $sth->fetchrow_hashref ) {
+		push @list, $row;
+	}
+	return @list;
+}
+
+# ------------------------------------------------------------------------------
+
+sub insertCheckResult {
+	my $this    = shift;
+	my $proxy   = shift;
+	my $status  = shift;
+	my $type    = shift;
+	my $address = shift;
+
+	my $timestamp = $dateFormat->format_datetime( DateTime->now );
+	my $insertSth = $this->{dbh}->prepare_cached('INSERT INTO checks (check_proxy, check_timestamp, check_status, check_type, check_address) VALUES (?, ?, ?, ?, ?)');
+	$insertSth->execute( $proxy, $timestamp, $status, $type, $address );
+	my $updateSth = $this->{dbh}->prepare_cached('UPDATE proxies SET proxy_checked = ? WHERE proxy_id = ?');
+	$updateSth->execute( $timestamp, $proxy );
+}
+
+# ------------------------------------------------------------------------------
+
+sub insertOrIgnoreBlock {
+	my $this   = shift;
+	my $target = shift;
+	my $start  = shift;
+	my $expiry = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('INSERT OR IGNORE INTO blocks (block_address, block_start, block_expiry) VALUES (?, ?, ?)');
+	$sth->execute( $target, $dateFormat->format_datetime($start), $expiry );
+}
+
+sub removeBlock {
+	my $this    = shift;
+	my $blockId = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('DELETE FROM blocks WHERE block_id = ?');
+	$sth->execute($blockId);
+}
+
+sub fetchBlocks {
+	my $this    = shift;
+	my $blockId = shift;
+	my $limit   = shift || 50;
+
+	my $sth = $this->{dbh}->prepare_cached("SELECT block_id AS id, block_address AS address, block_start AS start, block_expiry AS expiry FROM blocks ORDER BY block_id ASC LIMIT $limit");
+	$sth->execute;
+
+	my @list;
+	while ( my $row = $sth->fetchrow_hashref ) {
+		$row->{start} = $dateFormat->parse_datetime( $row->{start} );
+		push @list, $row;
+	}
+	return @list;
+}
+
+# ------------------------------------------------------------------------------
+
+my @sessionChars = ( 'A' .. 'Z', 'a' .. 'z', '0' .. '9' );
+
+sub generateSessionId {
+	my $result = '';
+
+	for ( my $i = 0 ; $i < 32 ; $i++ ) {
+		$result .= $sessionChars[ int( rand( scalar @sessionChars ) ) ];
+	}
+	return $result;
+}
+
+sub createSession {
+	my $this = shift;
+
+	my $sth   = $this->{dbh}->prepare_cached('INSERT INTO sessions (session_id) VALUES (?)');
+	my $count = 0;
+	while ( $count < 100 ) {
+		my $sessionId = generateSessionId();
+		eval {    #
+			$sth->execute($sessionId);
+		};
+		my $error = $@;
+		if ($error) {
+			die $error
+			  unless $error =~ /column session_id is not unique/;
+		}
+		else {
+			return $sessionId;
+		}
+		$count++;
+	}
+	die "Unable to generate sessionId\n";
+}
+
+sub destroySession {
+	my $this      = shift;
+	my $sessionId = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('DELETE FROM sessions WHERE session_id = ?');
+	$sth->execute($sessionId);
+}
+
+sub getSessionAddress {
+	my $this      = shift;
+	my $sessionId = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('SELECT session_address FROM sessions WHERE session_id = ?');
+
+	$sth->execute($sessionId);
+	my ($address) = $sth->fetchrow_array;
+	$sth->finish;
+	return $address;
+}
+
+sub setSessionAddress {
+	my $this      = shift;
+	my $sessionId = shift;
+	my $address   = shift;
+
+	my $sth = $this->{dbh}->prepare_cached('UPDATE sessions SET session_address = ? WHERE session_id = ? AND session_address IS NULL');
+	$sth->execute( $address, $sessionId );
+
+	return $sth->rows;
+}
+
+# ------------------------------------------------------------------------------
+
+1;

          
A => beaubot/bot/proxy/check-ip.pl +36 -0
@@ 0,0 1,36 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use DateTime;
+use AddressChecker;
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my $sth = $dbh->{dbh}->prepare_cached('SELECT * FROM checks WHERE check_status <> 0');
+$sth->execute;
+while ( my $row = $sth->fetchrow_hashref ) {
+	my $result = AddressChecker::check( $row->{check_address} );
+	if ( $logger->is_debug ) {
+		$logger->debug( "Result:\n" . Dumper( [ $row, $result ] ) );
+	}
+	if ( defined $result->{block} ) {
+		$logger->info("Placing block on $row->{check_address} with expiry: $result->{block}");
+		$dbh->insertOrIgnoreBlock( $row->{check_address}, DateTime->now(), $result->{block} );
+	}
+
+}

          
A => beaubot/bot/proxy/log4perl.conf +10 -0
@@ 0,0 1,10 @@ 
+log4perl.oneMessagePerAppender = 1
+
+log4perl.rootLogger=INFO, console
+
+log4perl.category.main = DEBUG, console
+
+log4perl.appender.console=Log::Log4perl::Appender::Screen
+log4perl.appender.console.layout=Log::Log4perl::Layout::PatternLayout
+log4perl.appender.console.layout.ConversionPattern=[%d{dd-MM-yyyy HH:mm:ss}][%P] %p %m%n
+log4perl.appender.console.utf8=1

          
A => beaubot/bot/proxy/proxy-block.pl +73 -0
@@ 0,0 1,73 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setProject( 'wikipedia', 'pl', 'sysop' );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my $api = $bot->getApi;
+$api->checkAccount;
+
+while ( my @list = $dbh->fetchBlocks ) {
+	foreach my $block (@list) {
+		$logger->info("Processing block request of $block->{address}");
+
+		my $blocked = 0;
+		{
+			my $iterator = $api->getIterator(
+				'action' => 'query',
+				'list'   => 'blocks',
+				'bkip'   => $block->{address},
+			);
+
+			while ( my $entry = $iterator->next ) {
+				$blocked++;
+				$logger->info( "Active blocks of $block->{address}:\n" . Dumper($entry) )
+				  if $logger->is_info;
+			}
+		}
+		{
+			my $iterator = $api->getIterator(
+				'action' => 'query',
+				'list'   => 'globalblocks',
+				'bgip'   => $block->{address},
+			);
+
+			while ( my $entry = $iterator->next ) {
+				$blocked++;
+				$logger->info( "Active global blocks of $block->{address}:\n" . Dumper($entry) )
+				  if $logger->is_info;
+			}
+		}
+		if ($blocked) {
+			$logger->info("$block->{address} is already blocked");
+		}
+		else {
+			$logger->info("Blocking $block->{address} with expiry $block->{expiry}");
+			$api->block(
+				'anononly' => 1,
+				'nocreate' => 1,
+				'user'     => $block->{address},
+				'reason'   => '[[WP:OP|open proxy]]',
+				'expiry'   => $block->{expiry},
+			);
+
+		}
+		$dbh->removeBlock( $block->{id} );
+	}
+}

          
A => beaubot/bot/proxy/proxy-check-verifier.pl +29 -0
@@ 0,0 1,29 @@ 
+#!/usr/bin/perl
+
+use strict;
+use utf8;
+use CGI;
+use ProxyDatabase;
+use Env;
+use FindBin qw($RealBin);
+
+binmode STDOUT, ":utf8";
+
+my $db         = "$RealBin/../var/proxy.sqlite";
+my $sessionId  = CGI::param('id');
+my $remoteAddr = $ENV{REMOTE_ADDR};
+
+sub error {
+	print "Content-type: text/html; charset=utf-8\n\nERROR";
+	exit(0);
+}
+
+error()
+  unless defined $sessionId and defined $remoteAddr;
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+error()
+  unless $dbh->setSessionAddress( $sessionId, $remoteAddr );
+
+print "Content-type: text/html; charset=utf-8\n\nSUCCESS";

          
A => beaubot/bot/proxy/proxy-check-worker.pl +298 -0
@@ 0,0 1,298 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use ProxyDatabase;
+use AddressChecker;
+use LWP::UserAgent;
+use WWW::Mechanize;
+use IO::Handle;
+use Storable qw(freeze thaw);
+use File::Spec;
+use Log::Any;
+use Log::Any::Adapter;
+use LWP::Protocol::socks;
+
+binmode STDIN;
+binmode STDOUT;
+STDOUT->autoflush(1);
+
+my $db      = "$RealBin/../var/proxy.sqlite";
+my $testUrl = 'http://94.23.242.48/~beau/cgi-bin/verify.pl?id=$sessionId';
+my $agent   = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7';
+
+my %ignoredAddresses = map { $_ => 1 } qw(94.23.242.48 178.33.53.23 178.32.200.207 127.0.0.1);
+
+# Send all logs to Log::Log4perl
+use Log::Log4perl;
+Log::Log4perl->init( File::Spec->join( $RealBin, 'log4perl.conf' ) );
+Log::Any::Adapter->set('Log4perl');
+
+my $logger = Log::Any::get_logger;
+$logger->info("Worker is being started");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+sub checkProxyLwp {
+	my $info  = shift;
+	my $proxy = shift;
+
+	$logger->info("Checking proxy '$proxy' using LWP::UserAgent");
+	my $result = {
+		'address' => undef,
+		'status'  => 0,
+	};
+	my $sessionId = $dbh->createSession();
+	eval {
+		my $ua = LWP::UserAgent->new(
+			'agent'   => $agent,
+			'timeout' => 30,
+		);
+		$ua->proxy( [ 'http', 'https' ] => $proxy );
+
+		my $url = $testUrl;
+		$url =~ s/\$sessionId/$sessionId/g;
+
+		$ua->get($url);
+		my $address = $dbh->getSessionAddress($sessionId);
+
+		$result->{address} = $address;
+		$result->{status} = defined $address ? 1 : 0;
+	};
+	my $error = $@;
+	$dbh->destroySession($sessionId);
+	die $error
+	  if $error;
+
+	return $result;
+}
+
+sub checkHttpProxy {
+	my $info = shift;
+
+	# Do not check proxies without a specified port
+	return { status => 0 }
+	  unless defined $info->{port};
+
+	my $result = checkProxyLwp( $info, "http://$info->{address}:$info->{port}/" );
+	$result->{type}     = 'http';
+	$result->{accepted} = '1';
+	return $result;
+}
+
+sub checkSocks4Proxy {
+	my $info = shift;
+
+	# Do not check proxies without a specified port
+	return { status => 0 }
+	  unless defined $info->{port};
+
+	my $result = checkProxyLwp( $info, "socks4://$info->{address}:$info->{port}/" );
+	$result->{type}     = 'socks4';
+	$result->{accepted} = '1';
+	return $result;
+}
+
+sub checkSocks5Proxy {
+	my $info = shift;
+
+	# Do not check proxies without a specified port
+	return { status => 0 }
+	  unless defined $info->{port};
+
+	my $result = checkProxyLwp( $info, "socks://$info->{address}:$info->{port}/" );
+	$result->{type}     = 'socks5';
+	$result->{accepted} = '1';
+	return $result;
+}
+
+sub checkWebProxy {
+	my $info = shift;
+
+	# Do not check proxies with a specified port
+	return { status => 0 }
+	  if defined $info->{port};
+
+	# Do not check non-web proxies
+	return { status => 0 }
+	  unless $info->{address} =~ m{^https?://}i;
+
+	$logger->info("Checking proxy '$info->{address}' using WWW::Mechanize");
+	my $result = {
+		'address' => undef,
+		'status'  => 0,
+	};
+	my $sessionId = $dbh->createSession();
+	eval {
+		my $mech = new WWW::Mechanize(
+			'agent'     => $agent,
+			'timeout'   => 30,
+			'autocheck' => 0,
+			'quiet'     => 1,
+		);
+
+		my $response = $mech->get( $info->{address} );
+		return unless $response->is_success;
+
+		my $form;
+		my $field;
+
+		foreach my $f ( 'u', 'q', 'url', 'rxproxyuri' ) {
+			$form  = $mech->form_with_fields($f);
+			$field = $f;
+			next if $form and $form->method eq 'SEND';
+			last if $form;
+		}
+
+		unless ( defined $form ) {
+			$logger->debug("Unable to find form");
+			return;
+		}
+		my $url = $testUrl;
+		$url =~ s/\$sessionId/$sessionId/g;
+		$mech->field( $field, $url );
+		$mech->submit;
+
+		my $address = $dbh->getSessionAddress($sessionId);
+
+		$result->{address} = $address;
+		$result->{status}  = defined $address ? 1 : 0;
+		$result->{type}    = 'web proxy';
+	};
+	my $error = $@;
+	$dbh->destroySession($sessionId);
+	die $error
+	  if $error;
+
+	$result->{accepted} = '1';
+	return $result;
+}
+
+my @methods = (    #
+	\&checkHttpProxy,
+	\&checkSocks4Proxy,
+	\&checkSocks5Proxy,
+	\&checkWebProxy,
+);
+
+sub checkProxy {
+	my $info = shift;
+
+	foreach my $method (@methods) {
+
+		# TODO: Check till no new addresses are discovered...
+		my $result;
+		for ( my $i = 0 ; $i < 3 ; $i++ ) {
+			$result = $method->($info);
+			last if $result->{status};
+			sleep(1)
+			  if $result->{accepted};
+		}
+		return $result
+		  if $result->{status};
+	}
+	return { status => 0 };
+}
+
+# ------------------------------------------------------------------------------
+
+sub sendMessage {
+	my $message = shift;
+
+	if ( $logger->is_debug ) {
+		$logger->debug( "Sending a message:\n" . Dumper($message) );
+	}
+
+	my $data = freeze($message);
+	STDOUT->print( bytes::length($data) . "\n" . $data )
+	  or die "Unable to send a message: $!\n";
+}
+
+sub readMessage {
+	my $size = readline(STDIN);
+
+	die "Unable to read a message header: $!\n"
+	  unless defined $size;
+
+	chomp($size);
+
+	my $buffer;
+	my $len = read( STDIN, $buffer, $size );
+
+	if ( $len < $size ) {
+		die "Received a corrupted message: expected $len bytes instead of $size bytes\n";
+	}
+	my $message = thaw $buffer;
+	if ( $logger->is_debug ) {
+		$logger->debug( "Received a message:\n" . Dumper($message) );
+	}
+	return $message;
+}
+
+# ------------------------------------------------------------------------------
+
+eval {
+	$logger->info("Worker is entering main loop");
+	while (1) {
+		$0 = 'proxy-check-worker: idle';
+		last
+		  if eof(STDIN);
+		my $message = readMessage();
+
+		die "Invalid message\n"
+		  unless ref($message) eq 'HASH' and defined $message->{command};
+
+		if ( $message->{command} eq 'CHECK' ) {
+			$0 = "proxy-check-worker: checking $message->{address}";
+
+			if ( $message->{address} =~ /^(.+?):(\d+)$/ ) {
+				$message->{address} = $1;
+				$message->{port}    = $2;
+			}
+
+			my $result = checkProxy($message);
+
+			if ( $result->{status} ) {
+
+				my $reject = undef;
+
+				if ( $result->{address} !~ /^\d+\.\d+\.\d+\.\d+$/ ) {
+					$reject = "not a valid IPv4 address '$result->{address}'";
+				}
+				elsif ( $ignoredAddresses{ $result->{address} } ) {
+					$reject = "ignored address '$result->{address}'";
+				}
+
+				if ( defined $reject ) {
+					$logger->info("Ignoring proxy $message->{proxy}: $reject");
+					$result = { status => 0 };
+				}
+				else {
+
+					# Check type of address (isp, country, service)
+					my $addressCheck = AddressChecker::check( $result->{address} );
+					$result->{block}       = $addressCheck->{block};
+					$result->{hostname}    = $addressCheck->{hostname};
+					$result->{addressType} = $addressCheck->{type};
+				}
+			}
+
+			$result->{proxy}   = $message->{proxy};
+			$result->{command} = 'CHECKRESULT';
+			sendMessage($result);
+		}
+		elsif ( $message->{command} eq 'QUIT' ) {
+			last;
+		}
+		else {
+			die "Unsupported command '$message->{command}'\n";
+		}
+	}
+	$logger->info("Worker is being terminated");
+};
+if ($@) {
+	$@ =~ s/\s+$//;
+	$logger->fatal("Worker is being terminated: $@");
+	exit(1);
+}

          
A => beaubot/bot/proxy/proxy-check.pl +252 -0
@@ 0,0 1,252 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use LWP::UserAgent;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Storable qw(freeze thaw);
+use Bot4;
+use AnyEvent;
+use IPC::Open2;
+use ProxyDatabase;
+use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
+
+my $maxWorkers = 10;
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my %workers;      # pid => $worker
+my %jobsTaken;    # proxy_id => $worker
+my @jobsQueue;    # list of { proxy_id, proxy_address }
+my $lastJobsFetched = clock_gettime(CLOCK_MONOTONIC) - 60;    # timestamp, when last jobs were fetched
+
+sub sendMessage {
+	my $child   = shift;
+	my $message = shift;
+
+	if ( $logger->is_debug ) {
+		$logger->debug( "Sending a message to a worker $child->{pid}:\n" . Dumper($message) );
+	}
+
+	my $data = freeze($message);
+	$child->{in}->print( bytes::length($data) . "\n" . $data )
+	  or die "Unable to send a message to a worker $child->{pid}: $!\n";
+}
+
+sub readMessage {
+	my $child = shift;
+
+	my $size = readline( $child->{out} );
+
+	die "Unable to read from a worker $child->{pid}: $!\n"
+	  unless defined $size;
+
+	chomp($size);
+
+	my $buffer;
+	my $len = read( $child->{out}, $buffer, $size );
+
+	if ( $len < $size ) {
+		die "Received a corrupted message from a worker $child->{pid}: expected $len bytes instead of $size bytes\n";
+	}
+	my $message = thaw $buffer;
+	if ( $logger->is_debug ) {
+		$logger->debug( "Received a message from a worker $child->{pid}:\n" . Dumper($message) );
+	}
+	return $message;
+}
+
+sub killWorker {
+	my $child = shift;
+	$logger->info("Killing worker $child->{pid}");
+	unassignJob($child);
+	undef $child->{childWatcher};
+	undef $child->{ioWatcher};
+	kill 'KILL', $child->{pid}
+	  unless defined $child->{dead};
+	close( $child->{in} );
+	close( $child->{out} );
+	delete $workers{ $child->{pid} };
+}
+
+sub spawnWorker {
+	my $child = {
+		'pid' => undef,
+		'in'  => undef,
+		'out' => undef,
+	};
+	$child->{pid} = open2( $child->{out}, $child->{in}, 'perl', 'proxy-check-worker.pl' );
+	binmode $child->{out}
+	  if $child->{out};
+	binmode $child->{in}
+	  if $child->{in};
+
+	$child->{childWatcher} = AnyEvent->child(
+		pid => $child->{pid},
+		cb  => sub {
+			my ( $pid, $status ) = @_;
+			$logger->info("Worker $child->{pid} has exited with status: $status");
+			$child->{dead} = $status;
+			killWorker($child);
+		},
+	);
+
+	$child->{ioWatcher} = AnyEvent->io(
+		'fh'   => $child->{out},
+		'poll' => 'r',
+		'cb'   => sub {
+			eval {
+				my $message = readMessage($child);
+				processMessage( $child, $message );
+			};
+			if ($@) {
+				$@ =~ s/\s+$//;
+				$logger->error($@);
+				killWorker($child);
+				return;
+			}
+		}
+	);
+
+	$workers{ $child->{pid} } = $child;
+	return $child;
+}
+
+sub fetchJobs {
+	my $time = clock_gettime(CLOCK_MONOTONIC);
+	return if $time - $lastJobsFetched < 59;
+	$logger->debug("Fetching proxies to check");
+	my $dt = DateTime->now;
+	$dt -= DateTime::Duration->new( 'months' => 1 );
+	@jobsQueue = grep { !exists $jobsTaken{ $_->{proxy_id} } } $dbh->fetchProxiesToCheck( $dt, 200 );
+	$lastJobsFetched = $time;
+}
+
+sub sendJob {
+	my $worker = shift;
+
+	fetchJobs unless @jobsQueue;
+	my $job = shift @jobsQueue;
+	return unless $job;
+
+	eval {
+		sendMessage(
+			$worker,
+			{
+				'command' => 'CHECK',
+				'proxy'   => $job->{proxy_id},
+				'address' => $job->{proxy_address},
+			}
+		);
+
+		$worker->{job}      = $job;
+		$worker->{jobStart} = clock_gettime(CLOCK_MONOTONIC);
+
+		$jobsTaken{ $job->{proxy_id} } = $worker;
+	};
+	my $error = $@;
+	if ($error) {
+		unshift @jobsQueue, $job;
+		$logger->error("Unable to send a job to a worker $worker->{pid}: $error");
+		killWorker($worker);
+	}
+}
+
+sub unassignJob {
+	my $worker = shift;
+
+	my $job = $worker->{job};
+	return unless defined $job;
+
+	$worker->{job}     = undef;
+	$worker->{jobTime} = undef;
+	delete $jobsTaken{ $job->{proxy_id} };
+}
+
+sub processMessage {
+	my $worker  = shift;
+	my $message = shift;
+
+	die "Invalid message\n"
+	  unless ref($message) eq 'HASH' and defined $message->{command};
+
+	if ( $message->{command} eq 'CHECKRESULT' ) {
+		if ( $message->{proxy} != $worker->{job}->{proxy_id} ) {
+			die "Invalid job id, got: $message->{proxy}, expected: $worker->{job}->{proxy_id}\n";
+		}
+		unassignJob($worker);
+
+		$dbh->insertCheckResult( @{$message}{ 'proxy', 'status', 'type', 'address' } );
+
+		# If the address qualifies for blocking append block request to the queue
+		if ( defined $message->{block} ) {
+			$dbh->insertOrIgnoreBlock( $message->{address}, DateTime->now(), $message->{block} );
+		}
+		sendJob($worker);
+	}
+	else {
+		die "Unsupported command '$message->{command}'\n";
+	}
+}
+
+sub min ($$) {
+	return $_[ $_[0] > $_[1] ];
+}
+
+my $quit = AnyEvent->condvar;
+
+my $jobs = AnyEvent->timer(
+	after    => 0,
+	interval => 60,
+	cb       => sub {
+		unless (@jobsQueue) {
+
+			# Empty job queue, fetch more
+			fetchJobs;
+		}
+
+		# Spawn workers if needed
+		my $currentWorkers = scalar keys %workers;
+		my $workersToSpawn = min( $maxWorkers - $currentWorkers, scalar @jobsQueue );
+
+		if ( $workersToSpawn > 0 ) {
+			$logger->info("Spawning workers: $workersToSpawn");
+			for ( my $i = 0 ; $i < $workersToSpawn ; $i++ ) {
+				spawnWorker;
+			}
+		}
+		my $time = clock_gettime(CLOCK_MONOTONIC);
+		foreach my $worker ( values %workers ) {
+			if ( $worker->{job} ) {
+
+				# Check job time
+				if ( $time - $worker->{jobStart} > 3600 ) {
+					killWorker($worker);
+				}
+			}
+			else {
+
+				# Assign job to worker
+				sendJob($worker);
+			}
+		}
+	},
+);
+
+eval {    #
+	$quit->recv;
+};
+if ($@) {
+	$@ =~ s/\s+$//;
+	$logger->fatal($@);
+}

          
A => beaubot/bot/proxy/proxy-import-centrump2p.pl +57 -0
@@ 0,0 1,57 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+$mech->get('http://prx.centrump2p.com/');
+
+sub extractLinks {
+	my @links = $mech->content =~ /<td class="a1"><a href="([^"]+)/g;
+	die "Unable to find links\n"
+	  unless @links;
+
+	$dbh->begin;
+	foreach my $link (@links) {
+		$link = decode_entities($link);
+		$logger->info("Enqueueing $link for scan");
+		$dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+
+	}
+	$dbh->commit;
+}
+
+die "Unable to find pager\n"
+  unless $mech->content =~ m{<div id="pager">.+<a href="/(\d+)">\d+</a>\s*</div>}s;
+
+my $count = $1;
+
+die "Number of pages is too high ($count)\n"
+  if $count > 100;
+
+extractLinks;
+
+for ( my $i = 2 ; $i < $count ; $i++ ) {
+	sleep( int( rand(10) ) + 5 );
+	$logger->info("Checking page $i of $count");
+	$mech->get("http://prx.centrump2p.com/$i");
+	extractLinks;
+}

          
A => beaubot/bot/proxy/proxy-import-hinky.pl +57 -0
@@ 0,0 1,57 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+$mech->get('http://mrhinkydink.com/proxies.htm');
+
+sub extractLinks {
+	my @links = $mech->content =~ m{<td>(\d+\.\d+\.\d+\.\d+)(?:<sup>\*</sup>)?</td>\s*<td>(\d+)</td>}g;
+	die "Unable to find links\n"
+	  unless @links;
+
+	$dbh->begin;
+	while (@links) {
+		my ( $address, $port ) = splice @links, 0, 2;
+		my $link = "$address:$port";
+		$logger->info("Enqueueing $link for scan");
+		$dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+	}
+	$dbh->commit;
+}
+
+die "Unable to find pager\n"
+  unless $mech->content =~ m{<a href="proxies(\d+)\.htm">\[\d+\]</a>&nbsp;\s*</td>}s;
+
+my $count = $1;
+
+die "Number of pages is too high ($count)\n"
+  if $count > 100;
+
+extractLinks;
+
+for ( my $i = 2 ; $i < $count ; $i++ ) {
+	sleep( int( rand(10) ) + 5 );
+	$logger->info("Checking page $i of $count");
+	$mech->get("http://mrhinkydink.com/proxies$i.htm");
+	extractLinks;
+}

          
A => beaubot/bot/proxy/proxy-import-procseebot.pl +88 -0
@@ 0,0 1,88 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use DateTime;
+use DateTime::Format::Strptime;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+
+my $db   = "$RealBin/../var/proxy.sqlite";
+my $user = 'ProcseeBot';
+my $bot  = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setProject( 'wikipedia', 'en' );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $dateFormat = new DateTime::Format::Strptime(
+	pattern   => '%Y-%m-%dT%TZ',
+	time_zone => 'UTC',
+	on_error  => 'croak',
+);
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my $lastTimestamp = $dbh->getSetting('procseebot.last.timestamp');
+
+my $api  = $bot->getApi;
+my %args = (               #
+	'action'   => 'query',
+	'list'     => 'logevents',
+	'leaction' => 'block/block',
+	'leuser'   => 'ProcseeBot',
+	'ledir'    => 'newer',
+	'lelimit'  => 'max',
+);
+$args{lestart} = $lastTimestamp
+  if defined $lastTimestamp;
+
+my $iterator = $api->getIterator(%args);
+my $count    = 0;
+
+$dbh->begin;
+while ( my $entry = $iterator->next ) {
+	die "Invalid user '$entry->{user}'\n"
+	  unless $entry->{user} eq $user;
+
+	die "Invalid title '$entry->{title}'\n"
+	  unless $entry->{title} =~ m/^User:(\d+\.\d+\.\d+\.\d+)$/;
+
+	my $exitAddress = $1;
+
+	die "Invalid comment '$entry->{comment}'\n"
+	  unless $entry->{comment} =~ m/^\{\{blocked proxy\}\} <!-- (?:(\S+):)?(\d+) -->$/;
+
+	my $entryAddress = defined $1 ? $1 : $exitAddress;
+	my $port = $2;
+
+	$lastTimestamp = $entry->{timestamp};
+
+	my $now    = DateTime->now;
+	my $expiry = $dateFormat->parse_datetime( $entry->{params}->{expiry} );
+
+	if ( $expiry < $now ) {
+		$logger->info("Ignoring expired block $exitAddress ($entryAddress:$port)");
+	}
+	else {
+		$logger->info("Enqueueing $entryAddress:$port for scan");
+		$dbh->insertOrIgnoreProxy( "$entryAddress:$port", $user );
+	}
+
+	$lastTimestamp = $entry->{timestamp};
+
+	$count++;
+	if ( $count % 500 == 0 ) {
+		$dbh->setSetting( 'procseebot.last.timestamp', $lastTimestamp );
+		$dbh->commit;
+		$dbh->begin;
+	}
+}
+$dbh->setSetting( 'procseebot.last.timestamp', $lastTimestamp );
+$dbh->commit;

          
A => beaubot/bot/proxy/proxy-import-proxyorg.pl +46 -0
@@ 0,0 1,46 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+$mech->get('http://proxy.org/proxy.js');
+
+my $content = $mech->content;
+
+my @links = $content =~ /value="([^"' ]+)"/g;
+@links = grep { $_ ne 'random' } @links;
+die "Unable to find links\n"
+  unless @links;
+
+$dbh->begin;
+foreach my $link (@links) {
+	$link = decode_entities($link);
+	$link = "http://$link"
+	  unless $link =~ '://';
+
+	next if $link eq 'http://';
+
+	$logger->info("Enqueueing $link for scan");
+	$dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+
+}
+$dbh->commit;

          
A => beaubot/bot/proxy/proxy-import-xeronet.pl +74 -0
@@ 0,0 1,74 @@ 
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db  = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+#$mech->proxy( [ 'http', 'https' ] => '' );
+
+my %links;
+my @pages = (    #
+	'http://www.xeronet-proxy-list.net/type/other_proxy',
+	'http://www.xeronet-proxy-list.net/type/glype_proxy',
+	'http://www.xeronet-proxy-list.net',
+	#'http://www.proxywebsitelist.in',
+	#'http://freeproxylistings.com',
+);
+
+foreach my $page (@pages) {
+	eval {
+		$logger->info("Checking $page");
+		$mech->get($page);
+
+		my $content = $mech->content;
+		my $count   = 0;
+
+		foreach my $link ( $content =~ m{<A href="[^">]+out\.php?[^">]+"[^>]*>\s*(http://[^>]+?)\s*</A>}isg ) {
+			$links{$link}++;
+			$count++;
+		}
+		if ($count) {
+			$logger->info("Fetched $count links");
+		}
+		else {
+			$logger->error("No links found on $page");
+		}
+	};
+	if ($@) {
+		$@ =~ s/\s+$//;
+		$logger->error("Unable to fetch page $page: $@");
+	}
+
+}
+
+die "Unable to find links\n"
+  unless keys %links;
+
+$dbh->begin;
+foreach my $link ( keys %links ) {
+	$link = decode_entities($link);
+	$link = "http://$link"
+	  unless $link =~ '://';
+
+	$logger->info("Enqueueing $link for scan");
+	$dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+}
+$dbh->commit;

          
A => beaubot/bot/proxy/schema.sql +45 -0
@@ 0,0 1,45 @@ 
+CREATE TABLE IF NOT EXISTS proxies (
+	proxy_id INTEGER PRIMARY KEY,
+	/* An address */
+	proxy_address TEXT UNIQUE,
+	/* A timestamp of a insert */
+	proxy_added TIMESTAMP,
+	/* A timestamp of the last check */
+	proxy_checked TIMESTAMP,
+	/* A reference to a source */
+	proxy_source TEXT
+);
+
+CREATE INDEX IF NOT EXISTS proxies_checked_idx ON proxies (proxy_checked);
+
+CREATE TABLE IF NOT EXISTS checks (
+	check_proxy INTEGER REFERENCES proxies (proxy_id),
+	/* A timestamp of the attempt */
+	check_timestamp TIMESTAMP,
+	/* A check result: 0 - not working, 1 - working */
+	check_status INTEGER,
+	/* -1 - Unknown, 0 - HTTP, 1 - SOCKS4, 2 - SOCKS5, 3 - Web Proxy */
+	check_type INTEGER,
+	/* An address of the proxy */
+	check_address TEXT,
+	PRIMARY KEY(check_proxy, check_timestamp)
+);
+
+CREATE TABLE IF NOT EXISTS sessions (
+	/* A session identifier of a check */
+	session_id TEXT PRIMARY KEY,
+	/* An address of the checked proxy */
+	session_address TEXT
+);
+
+CREATE TABLE IF NOT EXISTS blocks (
+	block_id INTEGER PRIMARY KEY,
+	block_address TEXT UNIQUE,
+	block_start TIMESTAMP,
+	block_expiry TIMESTAMP
+);
+
+CREATE TABLE IF NOT EXISTS settings (
+	name TEXT PRIMARY KEY,
+	value TEXT
+);

          
M beaubot/bot/wikt-pronun.pl +8 -3
@@ 25,13 25,13 @@ my $api = $bot->getApi;
 my $commonsApi = $bot->getApi( "wikimedia", "commons" );
 $commonsApi->checkAccount;
 
-my @audioTemplates = ( 'audio', 'audioUS', 'audioUK', 'audioCA', 'audioAT' );
-my $reAudioTemplate = qr/audio(?:CA|US|UK|AT)?/i;
+my @audioTemplates = ( 'audio', 'audioUS', 'audioUK', 'audioCA', 'audioAT', 'audioAU' );
+my $reAudioTemplate = qr/audio(?:CA|US|UK|AT|AU)?/i;
 
 my $maxRecordingLength = 10;
 my @groups             = (     #
 	{
-		regex    => qr/^en[ _-](?:us|boston[ _-]us|nz|aus?|br|sa)[ _-]/i,
+		regex    => qr/^en[ _-](?:us|boston[ _-]us)[ _-]/i,
 		language => 'angielski',
 		template => 'audioUS',
 	},

          
@@ 41,6 41,11 @@ my @groups             = (     #
 		template => 'audioCA',
 	},
 	{
+		regex    => qr/^en[ _-]aus?[ _-]/i,
+		language => 'angielski',
+		template => 'audioAU',
+	},
+	{
 		regex    => qr/^en[ _-](?:uk|gb)[ _-]/i,
 		language => 'angielski',
 		template => 'audioUK',