A => beaubot/LICENSE +339 -0
@@ 0,0 1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
A => beaubot/bot/proxy/AddressChecker.pm +82 -0
@@ 0,0 1,82 @@
+package AddressChecker;
+
+use strict;
+use warnings;
+use Socket;
+use Log::Any;
+
+my $logger = Log::Any::get_logger;
+
+my @hostnames = (
+ {
+ 'pattern' => qr/(?:ppp|adsl)\.tpnet\.pl$/,
+ 'type' => 'dynamic',
+ 'block' => '12 hours',
+ },
+ {
+ 'pattern' => qr/^(?:dial|dynamic)-.+\.dialog\.net\.pl$/,
+ 'type' => 'dynamic',
+ },
+ {
+ 'pattern' => qr/\.dynamic\.t-mobile\.pl$/,
+ 'type' => 'dynamic',
+ },
+ {
+ 'pattern' => qr/^user-\d+-\d+-\d+-\d+\.play-internet\.pl$/,
+ 'type' => 'dynamic',
+ },
+ {
+ 'pattern' => qr/\.adsl\.inetia\.pl$/,
+ 'type' => 'dynamic',
+ 'block' => '12 hours',
+ },
+ {
+ 'pattern' => qr/\.gprs.*\.plus(?:gsm)?\.pl$/,
+ 'type' => 'dynamic',
+ },
+ {
+ 'pattern' => qr/-gprs.+\.centertel\.pl$/,
+ 'type' => 'dynamic',
+ },
+ {
+ 'pattern' => qr/dynamic\.chello\.pl$/,
+ 'type' => 'dynamic',
+ 'block' => '4 months',
+ },
+ {
+ 'pattern' => qr/dynamic/,
+ 'type' => 'dynamic',
+ 'block' => '2 months',
+ },
+);
+
+sub check {
+ my $address = shift;
+ $logger->debug("Checking $address");
+ my $result = {};
+ my $info;
+ my $hostname = gethostbyaddr( inet_aton($address), AF_INET );
+
+ if ( defined $hostname ) {
+ $result->{hostname} = $hostname;
+ $logger->debug("The address $address resolves to $hostname");
+ foreach my $entry (@hostnames) {
+ if ( $hostname =~ $entry->{pattern} ) {
+ $info = $entry;
+ last;
+ }
+ }
+ }
+
+ if ( defined $info ) {
+ $result->{type} = $info->{type};
+ $result->{block} = $info->{block};
+ }
+ else {
+ $result->{type} = 'static';
+ $result->{block} = '6 months';
+ }
+ return $result;
+}
+
+1;
A => beaubot/bot/proxy/ProxyDatabase.pm +225 -0
@@ 0,0 1,225 @@
+package ProxyDatabase;
+
+use strict;
+use warnings;
+use Log::Any;
+use DBI;
+use DateTime;
+use DateTime::Format::Strptime;
+
+my $logger = Log::Any->get_logger;
+
+my $dateFormat = new DateTime::Format::Strptime(
+ pattern => '%Y-%m-%dT%TZ',
+ time_zone => 'UTC',
+ on_error => 'croak',
+);
+
+sub new {
+ my $caller = shift;
+ my $class = ref($caller) || $caller;
+
+ my $this = {
+ 'dbh' => undef,
+ 'file' => undef,
+ @_,
+ };
+ bless $this, $class;
+
+ die "A path to a database is missing\n"
+ unless defined $this->{file};
+
+ $this->{dbh} = DBI->connect( "dbi:SQLite:dbname=$this->{file}", "", "", { RaiseError => 1, PrintError => 0, sqlite_use_immediate_transaction => 1 } );
+
+ return $this;
+}
+
+# ------------------------------------------------------------------------------
+
+sub begin {
+ my $this = shift;
+ $this->{dbh}->begin_work;
+}
+
+sub commit {
+ my $this = shift;
+ $this->{dbh}->commit;
+}
+
+# ------------------------------------------------------------------------------
+
+sub getSetting {
+ my $this = shift;
+ my $name = shift;
+
+ my $sth = $this->{dbh}->prepare_cached('SELECT value FROM settings WHERE name = ?');
+ $sth->execute($name);
+ my ($value) = $sth->fetchrow_array();
+ $sth->finish;
+ return $value;
+}
+
+sub setSetting {
+ my $this = shift;
+ my $name = shift;
+ my $value = shift;
+
+ my $sth = $this->{dbh}->prepare_cached('INSERT OR REPLACE INTO settings(name, value) VALUES(?, ?)');
+ $sth->execute( $name, $value );
+}
+
+# ------------------------------------------------------------------------------
+
+sub insertOrIgnoreProxy {
+ my $this = shift;
+ my $address = shift;
+ my $source = shift;
+
+ my $sth = $this->{dbh}->prepare_cached('INSERT OR IGNORE INTO proxies (proxy_address, proxy_added, proxy_source) VALUES (?, ?, ?)');
+ $sth->execute( $address, $dateFormat->format_datetime( DateTime->now ), $source );
+}
+
+sub fetchProxiesToCheck {
+ my $this = shift;
+ my $since = shift;
+ my $limit = shift || 50;
+
+ my $sth = $this->{dbh}->prepare_cached( "
+ SELECT proxy_id, proxy_address
+ FROM proxies
+ WHERE proxy_checked IS NULL OR proxy_checked < ?
+ ORDER BY proxy_checked, proxy_id DESC
+ LIMIT $limit
+" );
+ $sth->execute( $dateFormat->format_datetime($since) );
+
+ my @list;
+
+ while ( my $row = $sth->fetchrow_hashref ) {
+ push @list, $row;
+ }
+ return @list;
+}
+
+# ------------------------------------------------------------------------------
+
+sub insertCheckResult {
+ my $this = shift;
+ my $proxy = shift;
+ my $status = shift;
+ my $type = shift;
+ my $address = shift;
+
+ my $timestamp = $dateFormat->format_datetime( DateTime->now );
+ my $insertSth = $this->{dbh}->prepare_cached('INSERT INTO checks (check_proxy, check_timestamp, check_status, check_type, check_address) VALUES (?, ?, ?, ?, ?)');
+ $insertSth->execute( $proxy, $timestamp, $status, $type, $address );
+ my $updateSth = $this->{dbh}->prepare_cached('UPDATE proxies SET proxy_checked = ? WHERE proxy_id = ?');
+ $updateSth->execute( $timestamp, $proxy );
+}
+
+# ------------------------------------------------------------------------------
+
+sub insertOrIgnoreBlock {
+ my $this = shift;
+ my $target = shift;
+ my $start = shift;
+ my $expiry = shift;
+
+ my $sth = $this->{dbh}->prepare_cached('INSERT OR IGNORE INTO blocks (block_address, block_start, block_expiry) VALUES (?, ?, ?)');
+ $sth->execute( $target, $dateFormat->format_datetime($start), $expiry );
+}
+
+sub removeBlock {
+ my $this = shift;
+ my $blockId = shift;
+
+ my $sth = $this->{dbh}->prepare_cached('DELETE FROM blocks WHERE block_id = ?');
+ $sth->execute($blockId);
+}
+
+sub fetchBlocks {
+ my $this = shift;
+ my $blockId = shift;
+ my $limit = shift || 50;
+
+ my $sth = $this->{dbh}->prepare_cached("SELECT block_id AS id, block_address AS address, block_start AS start, block_expiry AS expiry FROM blocks ORDER BY block_id ASC LIMIT $limit");
+ $sth->execute;
+
+ my @list;
+ while ( my $row = $sth->fetchrow_hashref ) {
+ $row->{start} = $dateFormat->parse_datetime( $row->{start} );
+ push @list, $row;
+ }
+ return @list;
+}
+
+# ------------------------------------------------------------------------------
+
+my @sessionChars = ( 'A' .. 'Z', 'a' .. 'z', '0' .. '9' );
+
+sub generateSessionId {
+ my $result = '';
+
+ for ( my $i = 0 ; $i < 32 ; $i++ ) {
+ $result .= $sessionChars[ int( rand( scalar @sessionChars ) ) ];
+ }
+ return $result;
+}
+
+sub createSession {
+ my $this = shift;
+
+ my $sth = $this->{dbh}->prepare_cached('INSERT INTO sessions (session_id) VALUES (?)');
+ my $count = 0;
+ while ( $count < 100 ) {
+ my $sessionId = generateSessionId();
+ eval { #
+ $sth->execute($sessionId);
+ };
+ my $error = $@;
+ if ($error) {
+ die $error
+ unless $error =~ /column session_id is not unique/;
+ }
+ else {
+ return $sessionId;
+ }
+ $count++;
+ }
+ die "Unable to generate sessionId\n";
+}
+
+sub destroySession {
+ my $this = shift;
+ my $sessionId = shift;
+
+ my $sth = $this->{dbh}->prepare_cached('DELETE FROM sessions WHERE session_id = ?');
+ $sth->execute($sessionId);
+}
+
+sub getSessionAddress {
+ my $this = shift;
+ my $sessionId = shift;
+
+ my $sth = $this->{dbh}->prepare_cached('SELECT session_address FROM sessions WHERE session_id = ?');
+
+ $sth->execute($sessionId);
+ my ($address) = $sth->fetchrow_array;
+ $sth->finish;
+ return $address;
+}
+
+sub setSessionAddress {
+ my $this = shift;
+ my $sessionId = shift;
+ my $address = shift;
+
+ my $sth = $this->{dbh}->prepare_cached('UPDATE sessions SET session_address = ? WHERE session_id = ? AND session_address IS NULL');
+ $sth->execute( $address, $sessionId );
+
+ return $sth->rows;
+}
+
+# ------------------------------------------------------------------------------
+
+1;
A => beaubot/bot/proxy/check-ip.pl +36 -0
@@ 0,0 1,36 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use DateTime;
+use AddressChecker;
+
+my $db = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my $sth = $dbh->{dbh}->prepare_cached('SELECT * FROM checks WHERE check_status <> 0');
+$sth->execute;
+while ( my $row = $sth->fetchrow_hashref ) {
+ my $result = AddressChecker::check( $row->{check_address} );
+ if ( $logger->is_debug ) {
+ $logger->debug( "Result:\n" . Dumper( [ $row, $result ] ) );
+ }
+ if ( defined $result->{block} ) {
+ $logger->info("Placing block on $row->{check_address} with expiry: $result->{block}");
+ $dbh->insertOrIgnoreBlock( $row->{check_address}, DateTime->now(), $result->{block} );
+ }
+
+}
A => beaubot/bot/proxy/log4perl.conf +10 -0
@@ 0,0 1,10 @@
+log4perl.oneMessagePerAppender = 1
+
+log4perl.rootLogger=INFO, console
+
+log4perl.category.main = DEBUG, console
+
+log4perl.appender.console=Log::Log4perl::Appender::Screen
+log4perl.appender.console.layout=Log::Log4perl::Layout::PatternLayout
+log4perl.appender.console.layout.ConversionPattern=[%d{dd-MM-yyyy HH:mm:ss}][%P] %p %m%n
+log4perl.appender.console.utf8=1
A => beaubot/bot/proxy/proxy-block.pl +73 -0
@@ 0,0 1,73 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+
+my $db = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setProject( 'wikipedia', 'pl', 'sysop' );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my $api = $bot->getApi;
+$api->checkAccount;
+
+while ( my @list = $dbh->fetchBlocks ) {
+ foreach my $block (@list) {
+ $logger->info("Processing block request of $block->{address}");
+
+ my $blocked = 0;
+ {
+ my $iterator = $api->getIterator(
+ 'action' => 'query',
+ 'list' => 'blocks',
+ 'bkip' => $block->{address},
+ );
+
+ while ( my $entry = $iterator->next ) {
+ $blocked++;
+ $logger->info( "Active blocks of $block->{address}:\n" . Dumper($entry) )
+ if $logger->is_info;
+ }
+ }
+ {
+ my $iterator = $api->getIterator(
+ 'action' => 'query',
+ 'list' => 'globalblocks',
+ 'bgip' => $block->{address},
+ );
+
+ while ( my $entry = $iterator->next ) {
+ $blocked++;
+ $logger->info( "Active global blocks of $block->{address}:\n" . Dumper($entry) )
+ if $logger->is_info;
+ }
+ }
+ if ($blocked) {
+ $logger->info("$block->{address} is already blocked");
+ }
+ else {
+ $logger->info("Blocking $block->{address} with expiry $block->{expiry}");
+ $api->block(
+ 'anononly' => 1,
+ 'nocreate' => 1,
+ 'user' => $block->{address},
+ 'reason' => '[[WP:OP|open proxy]]',
+ 'expiry' => $block->{expiry},
+ );
+
+ }
+ $dbh->removeBlock( $block->{id} );
+ }
+}
A => beaubot/bot/proxy/proxy-check-verifier.pl +29 -0
@@ 0,0 1,29 @@
+#!/usr/bin/perl
+
+use strict;
+use utf8;
+use CGI;
+use ProxyDatabase;
+use Env;
+use FindBin qw($RealBin);
+
+binmode STDOUT, ":utf8";
+
+my $db = "$RealBin/../var/proxy.sqlite";
+my $sessionId = CGI::param('id');
+my $remoteAddr = $ENV{REMOTE_ADDR};
+
+sub error {
+ print "Content-type: text/html; charset=utf-8\n\nERROR";
+ exit(0);
+}
+
+error()
+ unless defined $sessionId and defined $remoteAddr;
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+error()
+ unless $dbh->setSessionAddress( $sessionId, $remoteAddr );
+
+print "Content-type: text/html; charset=utf-8\n\nSUCCESS";
A => beaubot/bot/proxy/proxy-check-worker.pl +298 -0
@@ 0,0 1,298 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use ProxyDatabase;
+use AddressChecker;
+use LWP::UserAgent;
+use WWW::Mechanize;
+use IO::Handle;
+use Storable qw(freeze thaw);
+use File::Spec;
+use Log::Any;
+use Log::Any::Adapter;
+use LWP::Protocol::socks;
+
+binmode STDIN;
+binmode STDOUT;
+STDOUT->autoflush(1);
+
+my $db = "$RealBin/../var/proxy.sqlite";
+my $testUrl = 'http://94.23.242.48/~beau/cgi-bin/verify.pl?id=$sessionId';
+my $agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7';
+
+my %ignoredAddresses = map { $_ => 1 } qw(94.23.242.48 178.33.53.23 178.32.200.207 127.0.0.1);
+
+# Send all logs to Log::Log4perl
+use Log::Log4perl;
+Log::Log4perl->init( File::Spec->join( $RealBin, 'log4perl.conf' ) );
+Log::Any::Adapter->set('Log4perl');
+
+my $logger = Log::Any::get_logger;
+$logger->info("Worker is being started");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+sub checkProxyLwp {
+ my $info = shift;
+ my $proxy = shift;
+
+ $logger->info("Checking proxy '$proxy' using LWP::UserAgent");
+ my $result = {
+ 'address' => undef,
+ 'status' => 0,
+ };
+ my $sessionId = $dbh->createSession();
+ eval {
+ my $ua = LWP::UserAgent->new(
+ 'agent' => $agent,
+ 'timeout' => 30,
+ );
+ $ua->proxy( [ 'http', 'https' ] => $proxy );
+
+ my $url = $testUrl;
+ $url =~ s/\$sessionId/$sessionId/g;
+
+ $ua->get($url);
+ my $address = $dbh->getSessionAddress($sessionId);
+
+ $result->{address} = $address;
+ $result->{status} = defined $address ? 1 : 0;
+ };
+ my $error = $@;
+ $dbh->destroySession($sessionId);
+ die $error
+ if $error;
+
+ return $result;
+}
+
+sub checkHttpProxy {
+ my $info = shift;
+
+ # Do not check proxies without a specified port
+ return { status => 0 }
+ unless defined $info->{port};
+
+ my $result = checkProxyLwp( $info, "http://$info->{address}:$info->{port}/" );
+ $result->{type} = 'http';
+ $result->{accepted} = '1';
+ return $result;
+}
+
+sub checkSocks4Proxy {
+ my $info = shift;
+
+ # Do not check proxies without a specified port
+ return { status => 0 }
+ unless defined $info->{port};
+
+ my $result = checkProxyLwp( $info, "socks4://$info->{address}:$info->{port}/" );
+ $result->{type} = 'socks4';
+ $result->{accepted} = '1';
+ return $result;
+}
+
+sub checkSocks5Proxy {
+ my $info = shift;
+
+ # Do not check proxies without a specified port
+ return { status => 0 }
+ unless defined $info->{port};
+
+ my $result = checkProxyLwp( $info, "socks://$info->{address}:$info->{port}/" );
+ $result->{type} = 'socks5';
+ $result->{accepted} = '1';
+ return $result;
+}
+
+sub checkWebProxy {
+ my $info = shift;
+
+ # Do not check proxies with a specified port
+ return { status => 0 }
+ if defined $info->{port};
+
+ # Do not check non-web proxies
+ return { status => 0 }
+ unless $info->{address} =~ m{^https?://}i;
+
+ $logger->info("Checking proxy '$info->{address}' using WWW::Mechanize");
+ my $result = {
+ 'address' => undef,
+ 'status' => 0,
+ };
+ my $sessionId = $dbh->createSession();
+ eval {
+ my $mech = new WWW::Mechanize(
+ 'agent' => $agent,
+ 'timeout' => 30,
+ 'autocheck' => 0,
+ 'quiet' => 1,
+ );
+
+ my $response = $mech->get( $info->{address} );
+ return unless $response->is_success;
+
+ my $form;
+ my $field;
+
+ foreach my $f ( 'u', 'q', 'url', 'rxproxyuri' ) {
+ $form = $mech->form_with_fields($f);
+ $field = $f;
+ next if $form and $form->method eq 'SEND';
+ last if $form;
+ }
+
+ unless ( defined $form ) {
+ $logger->debug("Unable to find form");
+ return;
+ }
+ my $url = $testUrl;
+ $url =~ s/\$sessionId/$sessionId/g;
+ $mech->field( $field, $url );
+ $mech->submit;
+
+ my $address = $dbh->getSessionAddress($sessionId);
+
+ $result->{address} = $address;
+ $result->{status} = defined $address ? 1 : 0;
+ $result->{type} = 'web proxy';
+ };
+ my $error = $@;
+ $dbh->destroySession($sessionId);
+ die $error
+ if $error;
+
+ $result->{accepted} = '1';
+ return $result;
+}
+
+my @methods = ( #
+ \&checkHttpProxy,
+ \&checkSocks4Proxy,
+ \&checkSocks5Proxy,
+ \&checkWebProxy,
+);
+
+sub checkProxy {
+ my $info = shift;
+
+ foreach my $method (@methods) {
+
+ # TODO: Check till no new addresses are discovered...
+ my $result;
+ for ( my $i = 0 ; $i < 3 ; $i++ ) {
+ $result = $method->($info);
+ last if $result->{status};
+ sleep(1)
+ if $result->{accepted};
+ }
+ return $result
+ if $result->{status};
+ }
+ return { status => 0 };
+}
+
+# ------------------------------------------------------------------------------
+
+sub sendMessage {
+ my $message = shift;
+
+ if ( $logger->is_debug ) {
+ $logger->debug( "Sending a message:\n" . Dumper($message) );
+ }
+
+ my $data = freeze($message);
+ STDOUT->print( bytes::length($data) . "\n" . $data )
+ or die "Unable to send a message: $!\n";
+}
+
+sub readMessage {
+ my $size = readline(STDIN);
+
+ die "Unable to read a message header: $!\n"
+ unless defined $size;
+
+ chomp($size);
+
+ my $buffer;
+ my $len = read( STDIN, $buffer, $size );
+
+ if ( $len < $size ) {
+ die "Received a corrupted message: expected $len bytes instead of $size bytes\n";
+ }
+ my $message = thaw $buffer;
+ if ( $logger->is_debug ) {
+ $logger->debug( "Received a message:\n" . Dumper($message) );
+ }
+ return $message;
+}
+
+# ------------------------------------------------------------------------------
+
+eval {
+ $logger->info("Worker is entering main loop");
+ while (1) {
+ $0 = 'proxy-check-worker: idle';
+ last
+ if eof(STDIN);
+ my $message = readMessage();
+
+ die "Invalid message\n"
+ unless ref($message) eq 'HASH' and defined $message->{command};
+
+ if ( $message->{command} eq 'CHECK' ) {
+ $0 = "proxy-check-worker: checking $message->{address}";
+
+ if ( $message->{address} =~ /^(.+?):(\d+)$/ ) {
+ $message->{address} = $1;
+ $message->{port} = $2;
+ }
+
+ my $result = checkProxy($message);
+
+ if ( $result->{status} ) {
+
+ my $reject = undef;
+
+ if ( $result->{address} !~ /^\d+\.\d+\.\d+\.\d+$/ ) {
+ $reject = "not a valid IPv4 address '$result->{address}'";
+ }
+ elsif ( $ignoredAddresses{ $result->{address} } ) {
+ $reject = "ignored address '$result->{address}'";
+ }
+
+ if ( defined $reject ) {
+ $logger->info("Ignoring proxy $message->{proxy}: $reject");
+ $result = { status => 0 };
+ }
+ else {
+
+ # Check type of address (isp, country, service)
+ my $addressCheck = AddressChecker::check( $result->{address} );
+ $result->{block} = $addressCheck->{block};
+ $result->{hostname} = $addressCheck->{hostname};
+ $result->{addressType} = $addressCheck->{type};
+ }
+ }
+
+ $result->{proxy} = $message->{proxy};
+ $result->{command} = 'CHECKRESULT';
+ sendMessage($result);
+ }
+ elsif ( $message->{command} eq 'QUIT' ) {
+ last;
+ }
+ else {
+ die "Unsupported command '$message->{command}'\n";
+ }
+ }
+ $logger->info("Worker is being terminated");
+};
+if ($@) {
+ $@ =~ s/\s+$//;
+ $logger->fatal("Worker is being terminated: $@");
+ exit(1);
+}
A => beaubot/bot/proxy/proxy-check.pl +252 -0
@@ 0,0 1,252 @@
+#!/usr/bin/perl -w
+
+use strict;
+use LWP::UserAgent;
+use Data::Dumper;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Storable qw(freeze thaw);
+use Bot4;
+use AnyEvent;
+use IPC::Open2;
+use ProxyDatabase;
+use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
+
+my $maxWorkers = 10;
+
+my $db = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my %workers; # pid => $worker
+my %jobsTaken; # proxy_id => $worker
+my @jobsQueue; # list of { proxy_id, proxy_address }
+my $lastJobsFetched = clock_gettime(CLOCK_MONOTONIC) - 60; # timestamp, when last jobs were fetched
+
+sub sendMessage {
+ my $child = shift;
+ my $message = shift;
+
+ if ( $logger->is_debug ) {
+ $logger->debug( "Sending a message to a worker $child->{pid}:\n" . Dumper($message) );
+ }
+
+ my $data = freeze($message);
+ $child->{in}->print( bytes::length($data) . "\n" . $data )
+ or die "Unable to send a message to a worker $child->{pid}: $!\n";
+}
+
+sub readMessage {
+ my $child = shift;
+
+ my $size = readline( $child->{out} );
+
+ die "Unable to read from a worker $child->{pid}: $!\n"
+ unless defined $size;
+
+ chomp($size);
+
+ my $buffer;
+ my $len = read( $child->{out}, $buffer, $size );
+
+ if ( $len < $size ) {
+ die "Received a corrupted message from a worker $child->{pid}: expected $len bytes instead of $size bytes\n";
+ }
+ my $message = thaw $buffer;
+ if ( $logger->is_debug ) {
+ $logger->debug( "Received a message from a worker $child->{pid}:\n" . Dumper($message) );
+ }
+ return $message;
+}
+
+sub killWorker {
+ my $child = shift;
+ $logger->info("Killing worker $child->{pid}");
+ unassignJob($child);
+ undef $child->{childWatcher};
+ undef $child->{ioWatcher};
+ kill 'KILL', $child->{pid}
+ unless defined $child->{dead};
+ close( $child->{in} );
+ close( $child->{out} );
+ delete $workers{ $child->{pid} };
+}
+
+sub spawnWorker {
+ my $child = {
+ 'pid' => undef,
+ 'in' => undef,
+ 'out' => undef,
+ };
+ $child->{pid} = open2( $child->{out}, $child->{in}, 'perl', 'proxy-check-worker.pl' );
+ binmode $child->{out}
+ if $child->{out};
+ binmode $child->{in}
+ if $child->{in};
+
+ $child->{childWatcher} = AnyEvent->child(
+ pid => $child->{pid},
+ cb => sub {
+ my ( $pid, $status ) = @_;
+ $logger->info("Worker $child->{pid} has exited with status: $status");
+ $child->{dead} = $status;
+ killWorker($child);
+ },
+ );
+
+ $child->{ioWatcher} = AnyEvent->io(
+ 'fh' => $child->{out},
+ 'poll' => 'r',
+ 'cb' => sub {
+ eval {
+ my $message = readMessage($child);
+ processMessage( $child, $message );
+ };
+ if ($@) {
+ $@ =~ s/\s+$//;
+ $logger->error($@);
+ killWorker($child);
+ return;
+ }
+ }
+ );
+
+ $workers{ $child->{pid} } = $child;
+ return $child;
+}
+
+sub fetchJobs {
+ my $time = clock_gettime(CLOCK_MONOTONIC);
+ return if $time - $lastJobsFetched < 59;
+ $logger->debug("Fetching proxies to check");
+ my $dt = DateTime->now;
+ $dt -= DateTime::Duration->new( 'months' => 1 );
+ @jobsQueue = grep { !exists $jobsTaken{ $_->{proxy_id} } } $dbh->fetchProxiesToCheck( $dt, 200 );
+ $lastJobsFetched = $time;
+}
+
+sub sendJob {
+ my $worker = shift;
+
+ fetchJobs unless @jobsQueue;
+ my $job = shift @jobsQueue;
+ return unless $job;
+
+ eval {
+ sendMessage(
+ $worker,
+ {
+ 'command' => 'CHECK',
+ 'proxy' => $job->{proxy_id},
+ 'address' => $job->{proxy_address},
+ }
+ );
+
+ $worker->{job} = $job;
+ $worker->{jobStart} = clock_gettime(CLOCK_MONOTONIC);
+
+ $jobsTaken{ $job->{proxy_id} } = $worker;
+ };
+ my $error = $@;
+ if ($error) {
+ unshift @jobsQueue, $job;
+ $logger->error("Unable to send a job to a worker $worker->{pid}: $error");
+ killWorker($worker);
+ }
+}
+
+sub unassignJob {
+ my $worker = shift;
+
+ my $job = $worker->{job};
+ return unless defined $job;
+
+ $worker->{job} = undef;
+ $worker->{jobTime} = undef;
+ delete $jobsTaken{ $job->{proxy_id} };
+}
+
+sub processMessage {
+ my $worker = shift;
+ my $message = shift;
+
+ die "Invalid message\n"
+ unless ref($message) eq 'HASH' and defined $message->{command};
+
+ if ( $message->{command} eq 'CHECKRESULT' ) {
+ if ( $message->{proxy} != $worker->{job}->{proxy_id} ) {
+ die "Invalid job id, got: $message->{proxy}, expected: $worker->{job}->{proxy_id}\n";
+ }
+ unassignJob($worker);
+
+ $dbh->insertCheckResult( @{$message}{ 'proxy', 'status', 'type', 'address' } );
+
+ # If the address qualifies for blocking append block request to the queue
+ if ( defined $message->{block} ) {
+ $dbh->insertOrIgnoreBlock( $message->{address}, DateTime->now(), $message->{block} );
+ }
+ sendJob($worker);
+ }
+ else {
+ die "Unsupported command '$message->{command}'\n";
+ }
+}
+
+sub min ($$) {
+ return $_[ $_[0] > $_[1] ];
+}
+
+my $quit = AnyEvent->condvar;
+
+my $jobs = AnyEvent->timer(
+ after => 0,
+ interval => 60,
+ cb => sub {
+ unless (@jobsQueue) {
+
+ # Empty job queue, fetch more
+ fetchJobs;
+ }
+
+ # Spawn workers if needed
+ my $currentWorkers = scalar keys %workers;
+ my $workersToSpawn = min( $maxWorkers - $currentWorkers, scalar @jobsQueue );
+
+ if ( $workersToSpawn > 0 ) {
+ $logger->info("Spawning workers: $workersToSpawn");
+ for ( my $i = 0 ; $i < $workersToSpawn ; $i++ ) {
+ spawnWorker;
+ }
+ }
+ my $time = clock_gettime(CLOCK_MONOTONIC);
+ foreach my $worker ( values %workers ) {
+ if ( $worker->{job} ) {
+
+ # Check job time
+ if ( $time - $worker->{jobStart} > 3600 ) {
+ killWorker($worker);
+ }
+ }
+ else {
+
+ # Assign job to worker
+ sendJob($worker);
+ }
+ }
+ },
+);
+
+eval { #
+ $quit->recv;
+};
+if ($@) {
+ $@ =~ s/\s+$//;
+ $logger->fatal($@);
+}
A => beaubot/bot/proxy/proxy-import-centrump2p.pl +57 -0
@@ 0,0 1,57 @@
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+$mech->get('http://prx.centrump2p.com/');
+
+sub extractLinks {
+ my @links = $mech->content =~ /<td class="a1"><a href="([^"]+)/g;
+ die "Unable to find links\n"
+ unless @links;
+
+ $dbh->begin;
+ foreach my $link (@links) {
+ $link = decode_entities($link);
+ $logger->info("Enqueueing $link for scan");
+ $dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+
+ }
+ $dbh->commit;
+}
+
+die "Unable to find pager\n"
+ unless $mech->content =~ m{<div id="pager">.+<a href="/(\d+)">\d+</a>\s*</div>}s;
+
+my $count = $1;
+
+die "Number of pages is too high ($count)\n"
+ if $count > 100;
+
+extractLinks;
+
+for ( my $i = 2 ; $i < $count ; $i++ ) {
+ sleep( int( rand(10) ) + 5 );
+ $logger->info("Checking page $i of $count");
+ $mech->get("http://prx.centrump2p.com/$i");
+ extractLinks;
+}
A => beaubot/bot/proxy/proxy-import-hinky.pl +57 -0
@@ 0,0 1,57 @@
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+$mech->get('http://mrhinkydink.com/proxies.htm');
+
+sub extractLinks {
+ my @links = $mech->content =~ m{<td>(\d+\.\d+\.\d+\.\d+)(?:<sup>\*</sup>)?</td>\s*<td>(\d+)</td>}g;
+ die "Unable to find links\n"
+ unless @links;
+
+ $dbh->begin;
+ while (@links) {
+ my ( $address, $port ) = splice @links, 0, 2;
+ my $link = "$address:$port";
+ $logger->info("Enqueueing $link for scan");
+ $dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+ }
+ $dbh->commit;
+}
+
+die "Unable to find pager\n"
+ unless $mech->content =~ m{<a href="proxies(\d+)\.htm">\[\d+\]</a> \s*</td>}s;
+
+my $count = $1;
+
+die "Number of pages is too high ($count)\n"
+ if $count > 100;
+
+extractLinks;
+
+for ( my $i = 2 ; $i < $count ; $i++ ) {
+ sleep( int( rand(10) ) + 5 );
+ $logger->info("Checking page $i of $count");
+ $mech->get("http://mrhinkydink.com/proxies$i.htm");
+ extractLinks;
+}
A => beaubot/bot/proxy/proxy-import-procseebot.pl +88 -0
@@ 0,0 1,88 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Data::Dumper;
+use DateTime;
+use DateTime::Format::Strptime;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+
+my $db = "$RealBin/../var/proxy.sqlite";
+my $user = 'ProcseeBot';
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setProject( 'wikipedia', 'en' );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $dateFormat = new DateTime::Format::Strptime(
+ pattern => '%Y-%m-%dT%TZ',
+ time_zone => 'UTC',
+ on_error => 'croak',
+);
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+
+my $lastTimestamp = $dbh->getSetting('procseebot.last.timestamp');
+
+my $api = $bot->getApi;
+my %args = ( #
+ 'action' => 'query',
+ 'list' => 'logevents',
+ 'leaction' => 'block/block',
+ 'leuser' => 'ProcseeBot',
+ 'ledir' => 'newer',
+ 'lelimit' => 'max',
+);
+$args{lestart} = $lastTimestamp
+ if defined $lastTimestamp;
+
+my $iterator = $api->getIterator(%args);
+my $count = 0;
+
+$dbh->begin;
+while ( my $entry = $iterator->next ) {
+ die "Invalid user '$entry->{user}'\n"
+ unless $entry->{user} eq $user;
+
+ die "Invalid title '$entry->{title}'\n"
+ unless $entry->{title} =~ m/^User:(\d+\.\d+\.\d+\.\d+)$/;
+
+ my $exitAddress = $1;
+
+ die "Invalid comment '$entry->{comment}'\n"
+ unless $entry->{comment} =~ m/^\{\{blocked proxy\}\} <!-- (?:(\S+):)?(\d+) -->$/;
+
+ my $entryAddress = defined $1 ? $1 : $exitAddress;
+ my $port = $2;
+
+ $lastTimestamp = $entry->{timestamp};
+
+ my $now = DateTime->now;
+ my $expiry = $dateFormat->parse_datetime( $entry->{params}->{expiry} );
+
+ if ( $expiry < $now ) {
+ $logger->info("Ignoring expired block $exitAddress ($entryAddress:$port)");
+ }
+ else {
+ $logger->info("Enqueueing $entryAddress:$port for scan");
+ $dbh->insertOrIgnoreProxy( "$entryAddress:$port", $user );
+ }
+
+ $lastTimestamp = $entry->{timestamp};
+
+ $count++;
+ if ( $count % 500 == 0 ) {
+ $dbh->setSetting( 'procseebot.last.timestamp', $lastTimestamp );
+ $dbh->commit;
+ $dbh->begin;
+ }
+}
+$dbh->setSetting( 'procseebot.last.timestamp', $lastTimestamp );
+$dbh->commit;
A => beaubot/bot/proxy/proxy-import-proxyorg.pl +46 -0
@@ 0,0 1,46 @@
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+$mech->get('http://proxy.org/proxy.js');
+
+my $content = $mech->content;
+
+my @links = $content =~ /value="([^"' ]+)"/g;
+@links = grep { $_ ne 'random' } @links;
+die "Unable to find links\n"
+ unless @links;
+
+$dbh->begin;
+foreach my $link (@links) {
+ $link = decode_entities($link);
+ $link = "http://$link"
+ unless $link =~ '://';
+
+ next if $link eq 'http://';
+
+ $logger->info("Enqueueing $link for scan");
+ $dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+
+}
+$dbh->commit;
A => beaubot/bot/proxy/proxy-import-xeronet.pl +74 -0
@@ 0,0 1,74 @@
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use lib "$RealBin/..";
+use Bot4;
+use ProxyDatabase;
+use WWW::Mechanize;
+use HTML::Entities qw(decode_entities);
+
+my $db = "$RealBin/../var/proxy.sqlite";
+my $bot = new Bot4;
+
+$bot->single(1);
+$bot->addOption( "database=s", \$db, "Changes path to a database" );
+$bot->setup( 'root' => "$RealBin/.." );
+
+my $logger = Log::Any::get_logger;
+$logger->info("Start");
+
+my $dbh = ProxyDatabase->new( 'file' => $db );
+my $mech = new WWW::Mechanize;
+$mech->agent_alias('Windows Mozilla');
+
+#$mech->proxy( [ 'http', 'https' ] => '' );
+
+my %links;
+my @pages = ( #
+ 'http://www.xeronet-proxy-list.net/type/other_proxy',
+ 'http://www.xeronet-proxy-list.net/type/glype_proxy',
+ 'http://www.xeronet-proxy-list.net',
+ #'http://www.proxywebsitelist.in',
+ #'http://freeproxylistings.com',
+);
+
+foreach my $page (@pages) {
+ eval {
+ $logger->info("Checking $page");
+ $mech->get($page);
+
+ my $content = $mech->content;
+ my $count = 0;
+
+ foreach my $link ( $content =~ m{<A href="[^">]+out\.php?[^">]+"[^>]*>\s*(http://[^>]+?)\s*</A>}isg ) {
+ $links{$link}++;
+ $count++;
+ }
+ if ($count) {
+ $logger->info("Fetched $count links");
+ }
+ else {
+ $logger->error("No links found on $page");
+ }
+ };
+ if ($@) {
+ $@ =~ s/\s+$//;
+ $logger->error("Unable to fetch page $page: $@");
+ }
+
+}
+
+die "Unable to find links\n"
+ unless keys %links;
+
+$dbh->begin;
+foreach my $link ( keys %links ) {
+ $link = decode_entities($link);
+ $link = "http://$link"
+ unless $link =~ '://';
+
+ $logger->info("Enqueueing $link for scan");
+ $dbh->insertOrIgnoreProxy( $link, $mech->uri->as_string );
+}
+$dbh->commit;
A => beaubot/bot/proxy/schema.sql +45 -0
@@ 0,0 1,45 @@
+CREATE TABLE IF NOT EXISTS proxies (
+ proxy_id INTEGER PRIMARY KEY,
+ /* An address */
+ proxy_address TEXT UNIQUE,
+ /* A timestamp of a insert */
+ proxy_added TIMESTAMP,
+ /* A timestamp of the last check */
+ proxy_checked TIMESTAMP,
+ /* A reference to a source */
+ proxy_source TEXT
+);
+
+CREATE INDEX IF NOT EXISTS proxies_checked_idx ON proxies (proxy_checked);
+
+CREATE TABLE IF NOT EXISTS checks (
+ check_proxy INTEGER REFERENCES proxies (proxy_id),
+ /* A timestamp of the attempt */
+ check_timestamp TIMESTAMP,
+ /* A check result: 0 - not working, 1 - working */
+ check_status INTEGER,
+ /* -1 - Unknown, 0 - HTTP, 1 - SOCKS4, 2 - SOCKS5, 3 - Web Proxy */
+ check_type INTEGER,
+ /* An address of the proxy */
+ check_address TEXT,
+ PRIMARY KEY(check_proxy, check_timestamp)
+);
+
+CREATE TABLE IF NOT EXISTS sessions (
+ /* A session identifier of a check */
+ session_id TEXT PRIMARY KEY,
+ /* An address of the checked proxy */
+ session_address TEXT
+);
+
+CREATE TABLE IF NOT EXISTS blocks (
+ block_id INTEGER PRIMARY KEY,
+ block_address TEXT UNIQUE,
+ block_start TIMESTAMP,
+ block_expiry TIMESTAMP
+);
+
+CREATE TABLE IF NOT EXISTS settings (
+ name TEXT PRIMARY KEY,
+ value TEXT
+);
M beaubot/bot/wikt-pronun.pl +8 -3
@@ 25,13 25,13 @@ my $api = $bot->getApi;
my $commonsApi = $bot->getApi( "wikimedia", "commons" );
$commonsApi->checkAccount;
-my @audioTemplates = ( 'audio', 'audioUS', 'audioUK', 'audioCA', 'audioAT' );
-my $reAudioTemplate = qr/audio(?:CA|US|UK|AT)?/i;
+my @audioTemplates = ( 'audio', 'audioUS', 'audioUK', 'audioCA', 'audioAT', 'audioAU' );
+my $reAudioTemplate = qr/audio(?:CA|US|UK|AT|AU)?/i;
my $maxRecordingLength = 10;
my @groups = ( #
{
- regex => qr/^en[ _-](?:us|boston[ _-]us|nz|aus?|br|sa)[ _-]/i,
+ regex => qr/^en[ _-](?:us|boston[ _-]us)[ _-]/i,
language => 'angielski',
template => 'audioUS',
},
@@ 41,6 41,11 @@ my @groups = ( #
template => 'audioCA',
},
{
+ regex => qr/^en[ _-]aus?[ _-]/i,
+ language => 'angielski',
+ template => 'audioAU',
+ },
+ {
regex => qr/^en[ _-](?:uk|gb)[ _-]/i,
language => 'angielski',
template => 'audioUK',