Artykuły na każdy temat
[PHP] Spam killer czyli efektywna klasa i metoda do walki ze spamem
<?php
error_reporting(0);
// Load the necessary things
require './spam_killer.class.php';
// Hire hitman
try
{
$spam_killer = new spam_killer($argv[1] === 'remove_spam');
}
catch(exception $e)
{
exit($e -> getMessage());
}
// Perform necessary commands
if($argv[1] === 'remove_spam')
{
date_default_timezone_set('Europe/Warsaw');
$spam_killer -> remove_spam();
}
else
{
$spam_killer -> update_tlds_list();
}
?>
config.php wygląda tak:
<?php
if(defined('BLOCK_INCLUDE') === false)
{
die('Restricted access');
}
$config['server'] = '';
$config['port'] = 143;
$config['user'] = '';
$config['password'] = '';
?>
natomiast spam_killer.class.php wygląda tak:
<?php
/**
* @package Spam_killer
* @subpackage Core
* @version: 1.00.00a
* @author CapaciousCore
* @copyright Copyright (C) 2015 CapaciousCore
* @link http://www.capaciouscore.pl/
* @license http://www.capaciouscore.pl/spam-killer-license/
*/
if(defined('BLOCK_INCLUDE') === false)
{
die('Restricted access');
}
class spam_killer
{
private $handle;
private $banned_addresses;
private $tlds_list;
private $meta_data;
function __construct($establish_connection)
{
if($establish_connection === true)
{
// Load the necessary things
require './config.php';
// Create connection
$this -> handle = imap_open('{'.$config['server'].':'.$config['port'].'}INBOX', $config['user'], $config['password'], OP_READONLY & OP_HALFOPEN);
// Remove unnecessary information
unset($config);
if(is_resource($this -> handle) === false)
{
throw new Exception(imap_last_error());
}
}
}
function __destruct()
{
if(is_resource($this -> handle) === true)
{
imap_close($this -> handle);
}
}
function remove_spam()
{
$this -> meta_data = json_decode(file_get_contents('./meta_data.json'));
if($this -> meta_data !== null)
{
$date[] = date('d-M-Y');
$date[] = date('d-M-Y', $this -> meta_data -> last_execution_time);
if($date[0] !== $date[1])
{
$date[0] = $date[1];
}
$this -> meta_data -> last_execution_time = time();
$uids = imap_search($this -> handle, 'SINCE '.$date[0], SE_UID); // PHP Team did not implement "YOUNGER" of course
if(is_array($uids) === true)
{
$this -> load_tlds_list();
$this -> load_banned_addresses();
// Only "unwatched"
$uids = array_filter($uids, function($uid) { return $uid > $this -> meta_data -> last_known_uid; });
if(empty($uids) === false)
{
$this -> meta_data -> last_known_uid = max($uids);
$results = array_reverse(imap_fetch_overview($this -> handle, implode(',', $uids), FT_UID));
foreach($results as $result)
{
$structure = imap_fetchstructure($this -> handle, $result -> uid, FT_UID);
$headers[0] = imap_fetchheader($this -> handle, $result -> uid, FT_UID);
$headers[1] = imap_rfc822_parse_headers($headers[0]);
if($this -> is_banned($headers[1] -> from[0] -> mailbox.'@'.$headers[1] -> from[0] -> host, 0) === false && $this -> is_banned($headers[1] -> from[0] -> host, 1) === false)
{
// Why $result -> seen isn't boolean, fuck logic?
$message = $this -> get_message($result -> uid, $structure);
if($message !== false && $this -> is_banned($message, 2) === true || $this -> is_banned($message, 3) === true)
{
$remove_message = true;
}
}
else
{
$remove_message = true;
}
if($remove_message === true)
{
$messages_pending_deletion = true;
// Send reports inter alia to SBL's
// $this -> report($structure, $headers, $message, $result -> uid);
// Required imap_expunge() call or imap_close() with optional parameter CL_EXPUNGE during connection
// imap_delete($this -> handle, $result -> uid, FT_UID);
// or... move fucker to the appropriate place! (can be moved to a conditional statement below after keeping UID's list)
imap_mail_move($this -> handle, $result -> uid, 'INBOX.spam', CP_UID);
// You can also reply to the spammer how much you love him with anonymous address
unset($remove_message);
// and... log this action!
}
}
// This can be solved differently
// if($messages_pending_deletion === true)
// {
// imap_expunge($this -> handle);
// }
}
}
return file_put_contents('./meta_data.json', json_encode($this -> meta_data), LOCK_EX) !== false;
}
return false;
}
function update_tlds_list()
{
$data = file('https://publicsuffix.org/list/public_suffix_list.dat', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
if(is_array($data) === true)
{
foreach($data as $line)
{
// Comments don'ts
if(strpos($line, '/') === false)
{
if(is_int(strpos($line, '*')) === true)
{
$line = ltrim($line, '*.');
}
else if(is_int(strpos($line, '!')) === true)
{
$line = end(explode('.', $line, 2));
}
// Unformatted list
$list[0][] = $line;
}
}
foreach(array_unique($list[0]) as $line)
{
// Formatted list
$list[1][count_chars($line)[46]][] = $line;
}
return file_put_contents('./tlds_list.json', json_encode($list[1]), LOCK_EX) !== false;
}
return false;
}
private function load_tlds_list()
{
$this -> tlds_list = json_decode(file_get_contents('./tlds_list.json'));
return $this -> tlds_list !== null;
}
private function load_banned_addresses()
{
// @todo In the future add banned headers
$lists = array('email_addresses', 'email_hosts', 'links', 'expressions');
for($h = 0, $how = count($lists); $h < $how; ++$h)
{
$this -> banned_addresses[$h] = json_decode(file_get_contents('./filters/banned_'.$lists[$h].'.json'));
}
if($this -> banned_addresses[3] !== null)
{
$this -> banned_addresses[3] = implode('|', $this -> banned_addresses[3]);
}
}
private function is_banned($data, $mode)
{
// Service blocked e-mail addresses and hosts
if($mode < 2)
{
if(in_array($data, $this -> banned_addresses[$mode]) === true)
{
return true;
}
}
// Support blocked links (domains)
else if($mode === 2)
{
if($data -> is_html === true)
{
preg_match_all('/<a.*href=\"([^\"]*)\".*>(?:.*)<\/a>/isU', $data -> content, $links);
$links = $links[1];
}
else
{
preg_match_all('/(?:http|https)\:\/\/[^\s]*?/iU', $data -> content, $links);
$links = $links[0];
}
if(empty($links) === false)
{
foreach($links as $link)
{
if(in_array($this -> get_domain($link), $this -> banned_addresses[2]) === true)
{
return true;
}
}
}
}
// Support blocked content
else if($mode === 3)
{
if($this -> banned_addresses[3] !== null)
{
if(preg_match('#'.$this -> banned_addresses[3].'#i', $data -> content) === 1)
{
return true;
}
}
}
return false;
}
private function get_message($uid, $structure)
{
$message = $this -> get_part($uid, 'TEXT/HTML', $structure);
if(empty(trim($message)) === true)
{
$message = $this -> get_part($uid, 'TEXT/PLAIN', $structure);
$is_html = false;
}
else
{
$is_html = true;
}
if($message !== false)
{
return (object)array('is_html' => $is_html, 'content' => $message);
}
return false;
}
private function get_part($uid, $mime_type, $structure, $part_counter = null)
{
if($mime_type === $this -> get_mime_type($structure))
{
if($part_counter === null)
{
$part_counter = 1;
}
return $this -> encode_part(imap_fetchbody($this -> handle, $uid, $part_counter, FT_UID | FT_PEEK), $structure -> encoding);
}
// Multipart
else if($structure -> type === 1)
{
foreach($structure -> parts as $index => $sub_struct)
{
if($part_counter !== null)
{
$prefix = $part_counter.'.';
}
$data = $this -> get_part($uid, $mime_type, $sub_struct, $prefix.(++$index));
if($data !== false)
{
return $data;
}
}
}
return false;
}
private function get_mime_type($structure)
{
$mime_types = array('TEXT', 'MULTIPART', 'MESSAGE', 'APPLICATION', 'AUDIO', 'IMAGE', 'VIDEO', 'OTHER');
if(empty($structure -> subtype) === false)
{
return $mime_types[$structure -> type].'/'.$structure -> subtype;
}
return 'TEXT/PLAIN';
}
private function encode_part($content, $encoding)
{
// imap_qprint() vs quoted_printable_decode()?
switch($encoding)
{
// 8 bits
case 1:
return quoted_printable_decode(imap_8bit($content));
// Binary
case 2:
return imap_binary($content);
// Base64
case 3:
return imap_base64($content);
// Quoted printable
case 4:
return quoted_printable_decode($content);
// 7 bits, other and unknown
case 0:
case 5:
default:
return $content;
}
}
private function get_domain($url)
{
// @todo In the future implement support for punycode format
// @todo In the future implement support for IPv6 format
if(preg_match('/^((http|https)\:{1})?\/{2}[^\/]/i', $url) !== 1)
{
$url = '//'.$url;
}
$url = parse_url(mb_strtolower($url), PHP_URL_HOST);
if(is_string($url) === true)
{
if(filter_var($url, FILTER_VALIDATE_IP) !== false)
{
return $url;
}
$segments = array_slice(explode('.', $url), -max(array_keys($this -> tlds_list)) - 2);
for($h = 0, $how = $i = count($segments) - 1, --$i; $h < $how; ++$h, --$i)
{
$missing_segment = array_shift($segments);
if(in_array(implode('.', $segments), $this -> tlds_list[$i]) === true)
{
array_unshift($segments, $missing_segment);
return implode('.', $segments);
}
}
// Referring to RFC 2606
if(in_array($segments[0], array('test', 'example', 'invalid', 'localhost')) === true)
{
return $segments[0];
}
}
// Something went wrong
return false;
}
private function report($structure, $headers, $message, $uid = null)
{
// Add your code here
}
}
?>
Całość można ściągnąć za jednym zamachem tutaj. Niestety to rozwiązanie wymaga od nas wprowadzenia danych do autoryzacji przy połączeniu się z serwerem pocztowym stąd zalecam wrzucenie skryptu po za zasięgiem publicznym. Mam na myśli tzw. public_html. Odnośnie zadań CRON to proponuje coś takiego:
0 1 * * * cd /path/to/spam_killer/ && php spam_killer.php >/dev/null 2>&1
4,9,14,19,24,29,34,39,44,49,54,59 * * * * cd /path/to/spam_killer/ && php spam_killer.php remove_spam >/dev/null 2>&1
Może kiedyś jak znajdę odrobinę czasu to zbuduję listę podobną do tej.
Komentarze
Dodaj komentarz