bb6140e51043 — Malcolm Scott 3 months ago
Major refactor by Daniel Carter to talk to the new lists.cam.ac.uk Sympa service (Mailman has gone).  Thanks, Daniel!
1 files changed, 139 insertions(+), 313 deletions(-)

M listsdotcam.py
M listsdotcam.py +139 -313
@@ 1,6 1,7 @@ 
 #!/usr/bin/python3
 #
 # Copyright (C) 2009-2015 Malcolm Scott <mas90@srcf.net>
+# Copyright (C) 2021 Daniel Carter <dcc52@srcf.net>
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License

          
@@ 14,13 15,13 @@ 
 # The full terms of the GNU General Public License version 2 can be
 # read at http://www.gnu.org/licenses/gpl-2.0.txt .
 #
-"""Update lists.cam.ac.uk Mailman mailing lists
+"""Update lists.cam.ac.uk Sympa mailing lists
 
 For command-line usage instructions, run with --help
 
 For usage as a Python (2 or 3) module, note that you must first call
-  raven_auth(USERNAME, PASSWORD)
-before calling the mailman_foo functions.
+  sympa_auth(USERNAME, PASSWORD)
+before calling the sympa_foo functions.
 
 Note that Python 3 is recommended, as better facilities for handling
 Unicode characters are available."""

          
@@ 33,325 34,155 @@ import sys
 import termios
 import argparse
 from datetime import datetime
+from getpass import getpass
+from email.utils import parseaddr
 
-try:
-	# Python 3
-	import urllib.request as request
-	from urllib.parse import urlencode
-except ImportError:
-	# Python 2 -- NB, won't handle unusual characters at all well
-	import urllib2 as request
-	from urllib import urlencode
+from zeep.client import Client, Settings
+from zeep.exceptions import Fault
+
+class AuthenticationException(Exception):
+	pass
+
+class SympaActionFailedException(Exception):
+	pass
+
+SOAP_URL = "https://lists.cam.ac.uk/sympa/wsdl"
+
+class ListsDotCam:
+	def __init__(self, user, password):
+		"Authenticate to Sympa, keeping the cookie in the class's Zeep session state"
 
-try:
-	# Python 3
-	from html.parser import HTMLParser
-except ImportError:
-	# Python 2
-	from HTMLParser import HTMLParser
+		# Store the admin username, so we don't erroneously flag it as an invalid admin
+		self.username = user
+
+		# Set up a URL opener with cookie jar, used by most other functions
+		self.zeep = Client(SOAP_URL, settings=Settings(strict=False))
 
-from bs4 import BeautifulSoup
+		try:
+			result  = self.zeep.service.login(user, password)
+			element = result._raw_elements[0]
+			cookie  = element.text
+		except Fault as err:
+			raise AuthenticationException(err)   # Typically "Authentication failed"
+
+		# Add HTTP Cookie for session authentication
+		self.zeep.settings.extra_http_headers = [("Cookie", f"sympa_session={cookie}")]
 
 
-RAVENSERVER = 'https://raven.cam.ac.uk'
-RAVENURI = '/auth/authenticate2.html'
-MAILMANSERVER = 'https://lists.cam.ac.uk'
-MAILMANURI = '/mailman/'
-DEFAULTPARAMS = {
-		'send_welcome_msg_to_this_batch': '0',
-		'send_notifications_to_list_owner': '0',
-		'send_unsub_ack_to_this_batch': '0',
-		'send_unsub_notifications_to_list_owner': '0'
-	}
 
-RAVENURL = RAVENSERVER + RAVENURI
-MAILMANURL = MAILMANSERVER + MAILMANURI
-MAILMANLOGINURL = MAILMANURL + "raven/"
-LISTADDURL = MAILMANURL + 'admin/%s/members/add'
-LISTIMPORTURL = MAILMANURL + 'admin/%s/members/import'
-LISTSETTINGSURL = MAILMANURL + 'admin/%s/common'
+	def sympa_check_admins(self, ml, mladmins):
+		"Check administrators for list 'ml' against 'mladmins'"
+		result = self.zeep.service.fullReview(ml)
+		current_admins = []
+		for record in result:
+			email = ''
+			admin = False
+			for attr in record.getchildren():
+				if attr.tag == '{urn:sympasoap}email':
+					email = attr.text
+				elif attr.tag == '{urn:sympasoap}isOwner' and attr.text == 'true':
+					admin = True
+			if admin and (email != ''):
+				current_admins.append(email)
+		
+		for email in sorted(current_admins):
+			if email not in mladmins and email != self.username:
+				print(email + " is currently a list admin but should be removed")
+		
+		for email in sorted(mladmins):
+			if email not in current_admins:
+				print(email + " is not currently a list admin, and should be added")
 
 
 
-class RavenException(Exception):
-	pass
-
-class NoRavenException(Exception):
-	pass
-
-class NoMailmanResultException(Exception):
-	pass
-
-class MailmanScrapingException(Exception):
-	pass
-
+	def sympa_subscribe_one(self, ml, address, name):
+		"Subscribe a single address to list 'ml'"
+		print("Adding: %s (%s)" % (address, name))
+		try:
+			# Final parameter indicates that we don't send a welcome email to the new subscriber
+			result = self.zeep.service.add(ml, address, name, 'true')
+		except Fault as err:
+			raise SympaActionFailedException(err)   # Typically "User is already a list member"
 
-def getpass(prompt="Password: "):
-	"Read a password from stdin, without echo (from http://docs.python.org/library/termios.html)"
-	try:
-		# input() in Python 3 == raw_input() in Python 2
-		_input = raw_input
-	except NameError:
-		_input = input
-	stdin_fd = sys.stdin.fileno()
-	old = termios.tcgetattr(stdin_fd)
-	new = termios.tcgetattr(stdin_fd)
-	new[3] = new[3] & ~termios.ECHO    # lflags
-	try:
-		termios.tcsetattr(stdin_fd, termios.TCSADRAIN, new)
-		passwd = _input(prompt)
-		print()
-	finally:
-		termios.tcsetattr(stdin_fd, termios.TCSADRAIN, old)
-	return passwd
+		if result._raw_elements[0].text != "true":
+			raise SympaActionFailedException("Failed to add: " + address)
 
 
-# Set up a global URL opener with cookie jar, used by most other functions
-OPENER = request.build_opener(request.HTTPCookieProcessor())
-request.install_opener(OPENER)
-
-
-def raven_auth(ravenuser, ravenpass):
-	"Authenticate to Raven, keeping the cookie in the global OPENER"
-
-	# This is just to pick up the session cookie.
-	# This request will get redirected to Raven's login form, but
-	# we don't try to parse that form.
-	try:
-		f = OPENER.open(MAILMANLOGINURL)
-		newurl = f.geturl()
-		if newurl[0:len(RAVENSERVER)] != RAVENSERVER:
-			raise RavenException("Expected a redirection to Raven; actually ended up at %s" % newurl)
-	finally:
-		f.close()
 
-	# Now actually log into Raven, and pick up the second cookie.
-	# Again, we don't care about the page we get sent back to --
-	# it's just a no-op.
-	params = {
-			'date': datetime.utcnow().strftime("%Y%m%dT%H%M%SZ"),
-			'ver': '1',
-			'url': MAILMANLOGINURL,
-			'userid': ravenuser,
-			'pwd': ravenpass
-		}
-	try:
-		f = OPENER.open(RAVENURL, urlencode(params).encode("ascii"))
-		newurl = f.geturl()
-		if newurl[0:len(RAVENSERVER)] == RAVENSERVER:
-			# If we're still in Raven we probably supplied the wrong username/password
-			raise RavenException("Failed to authenticate to Raven")
-		elif newurl[0:len(MAILMANSERVER)] != MAILMANSERVER:
-			# We're not in Mailman or Raven?  WTF.
-			raise RavenException("Redirected unexpectedly!  (%s)" % newurl)
-		OPENER.raven_auth = True
-	finally:
-		f.close()
+	def sympa_subscribe(self, ml, addresses):
+		"Subscribe the email addresses in iterable 'addresses' to list 'ml' (not touching existing subscribers)"
+		for (address, name) in addresses.items():
+			self.sympa_subscribe_one(ml, address, name)
+
 
 
-class MailmanHTMLParser(HTMLParser):
-	"""Helper to pretty-print Mailman's HTML pages, which (currently) look like:
-
-	...
-	<BODY bgcolor="white">
-	<h5>Error subscribing:</h5>
-	<ul>
-	<li>mas90@cam.ac.uk -- Already a member
-	</ul>
-	<p><center><h2>Soc-SRCF-social mailing list administration<br>...
-
-	or
+	def sympa_import(self, ml, addresses):
+		"REPLACE the current subscribers for list 'ml' with those interable 'addresses'"
+		existing = {}
+		for email in self.zeep.service.review(ml):
+			if email == "no_subscribers": # Unhelpful quirk of REVIEW
+				continue
+			existing[email] = 1
 
-	...
-	<table width="100%" border="0" cellspacing="4" cellpadding="5">
-	  <tr>
-	    <td colspan="2" width="100%" bgcolor="#99ccff" align="center">
-	      <b><font color="#000000" size="+1">Soc-SRCF Administrator
-	Authentication</FONT></b>
-	      </td>
-	  </tr>
-	    <tr>
-	      <td align=middle>
-	        You are not authorised for Soc-SRCF Administrator functions
-	      </td>
-	    </tr>
-	  </table>
-
-	NB: the whitespace handling in this class (er, in fact most of this class,
-	but that in particular) is very much not generic but happens to work with
-	the way Mailman produces pages at the time of writing."""
-
-	running = 1
-	inheading = 0
-	initem = 0
-	hadheading = 0
-	intable = 0
-	inentity = False
-
-	def __init__(self, output_file=sys.stdout):
-		HTMLParser.__init__(self)
-		self.outfile = output_file
+		for email in sorted(existing):
+			if email in addresses:
+				continue
 
-	def handle_starttag(self, tag, attrs):
-		if self.running:
-			if tag == 'h5':
-				if self.inheading == 1:
-					self.outfile.write("\n")
-				self.inheading = 1
-				self.hadheading = 1
-				self.initem = 0
-			elif tag == 'li':
-				self.initem = 1
-			elif tag == 'h2':
-				self.inheading = 0
-				self.initem = 0
-				self.running = 0
-				if not self.hadheading:
-					raise NoMailmanResultException()
-			elif tag == 'td':  # for not-authorised message...
-				if not self.hadheading and self.intable == 0:
-					self.intable = 1
-
-	def handle_endtag(self, tag):
-		if self.running:
-			tag = tag.lower()
-			if tag == 'h5':
-				self.inheading = 0
-			elif tag == 'li':
-				self.initem = 0
-			elif tag == 'td':
-				self.intable = 0
-			elif tag == 'table':
-				self.outfile.write("\n")
-				self.intable = 2 # skip subsequent tables
-			elif tag == 'ul':
-				self.outfile.write("\n")
+			print("Removing: " + email)
+			# "del" reserved word in Python, hence shenanigans with getattr()
+			result = getattr(self.zeep.service, "del")(ml, email, 'true')
+			if result._raw_elements[0].text != "true":
+				raise SympaActionFailedException("Failed to remove: " + email)
 
-	def _output(self, data):
-		if self.inheading:
-			self.outfile.write("   %s" % data)
-			self.inheading = 0
-			self.initem = 2
-		elif self.initem == 1:
-			self.outfile.write("\n      %s" % data)
-			self.initem = 2
-		elif self.initem == 2:
-			self.outfile.write(data)
-		elif self.intable == 1:  # error messages...
-			if data.strip() != "":
-				self.outfile.write("\n   |  %s" % data.replace('\n', ' ').strip())
+		for (email,name) in sorted(addresses.items()):
+			if email in existing:
+				continue
 
-	def handle_data(self, data):
-		if self.running:
-			data = data.rstrip("\n")
-			if self.inentity:
-				# Mailman output double-encodes...
-				data = self.unescape("&%s" % data)
-				self.inentity = False
-			if data != "":
-				self._output(data)
-
-	def handle_entityref(self, name):
-		if name == "nbsp":
-			pass
-		elif name == "amp":
-			# Mailman output double-encodes...
-			self.inentity = True
-		else:
-			self._output(self.unescape('&%s;' % name))
-
-	def handle_charref(self, name):
-		self._output(self.unescape('&#%s;' % name))
+			self.sympa_subscribe_one(ml, email, name)
 
 
-def mailman_action(url, params):
-	if not hasattr(OPENER, 'raven_auth') or not OPENER.raven_auth:
-		raise NoRavenException("Must have successfully authenticated to Raven (raven_auth(...)) before calling this function")
-
-	parser = MailmanHTMLParser()
-
-	try:
-		# Mailman 2.1.15+: get CSRF token
-		f = OPENER.open(url)
-		soup = BeautifulSoup(f.read(), "html.parser")
-		token_tag = soup.find("input", attrs={"name": "csrf_token"})
-		if token_tag is not None:
-			token = token_tag['value']
-			params['csrf_token'] = token
-		f.close()
-
-		f = OPENER.open(url, urlencode(params).encode("ascii"))
-		if f.code != 200:
-			raise MailmanScrapingException("Error %d!" % f.code)
-		else:
-			parser.feed(f.read().decode())
-			if not parser.hadheading:
-				raise NoMailmanResultException()
-	finally:
-		f.close()
+	def split_addresses(self, addresses):
+		"Split a list of strings in 'Name <email>' format into separate name and email strings"
+		addresses_dict = dict()
+		for line in addresses:
+			(name, address) = parseaddr(line)
+			addresses_dict[address] = name
+		return addresses_dict
 
 
-def mailman_set_admins(ml, mladmins):
-	"Set administrators for list 'ml' to 'mladmins'"
-	url = LISTSETTINGSURL % ml
-	params = {'owner': "\n".join(mladmins)}
-	try:
-		mailman_action(url, params)
-	except NoMailmanResultException:
-		pass
-
-
-def mailman_subscribe(ml, addresses):
-	"Subscribe the email addresses in iterable 'addresses' to list 'ml' (not touching existing subscribers)"
-	url = LISTADDURL % ml
-	params = DEFAULTPARAMS.copy()
-	params.update({
-			'subscribees': "\n".join(addresses)
-		})
-	mailman_action(url, params)
-
-
-def mailman_import(ml, addresses):
-	"REPLACE the current subscribers for list 'ml' with those interable 'addresses'"
-	url = LISTADDURL % ml
-	params = DEFAULTPARAMS.copy()
-	params.update({
-			'import_upload': "\n".join(addresses)
-		})
-	mailman_action(url, params)
-
-
-def _read_addresses(file):
-	"Read a list of addresses from an open file"
-	def _read_addresses_iter(file):
-		for line in file:
-			stripped_line = line.decode("utf-8").strip()
-			if stripped_line != "":
-				yield stripped_line
-	return list(_read_addresses_iter(file))
+	def _read_addresses(self, file):
+		"Read a list of addresses from an open file"
+		def _read_addresses_iter(file):
+			for line in file:
+				stripped_line = line.decode("utf-8").strip()
+				if stripped_line != "":
+					yield stripped_line
+		return list(_read_addresses_iter(file))
 
 
 # Command-line interface
 def main(argv=None):
-	parser = argparse.ArgumentParser(description="Modify subscribers or administrators of a lists.cam.ac.uk Mailman mailing list")
+	parser = argparse.ArgumentParser(description="Modify subscribers or administrators of a lists.cam.ac.uk Sympa mailing list")
 
 	parser.add_argument('ml', metavar='LIST', nargs='+', help='The name of the mailing list on which to act (not including "@lists.cam.ac.uk")')
 
-	default_raven_user = os.getenv('SUDO_USER') or os.getenv('USER')
-	if default_raven_user is not None:
-		if default_raven_user[-4:] == '-adm':
+	default_lists_user = os.getenv('SUDO_USER') or os.getenv('USER')
+	if default_lists_user is not None:
+		if default_lists_user[-4:] == '-adm':
 			# SRCF-specific hack; we have accounts named e.g. "spqr2-adm" corresponding with Raven account "spqr2"
-			default_raven_user = default_raven_user[:-4]
-		parser.add_argument('-u', '--raven-user', metavar='USERID', default=default_raven_user,
-				help='Raven username (default autodetected from your environment: %s)' % default_raven_user)
+			default_lists_user = default_lists_user[:-4]
+		# For Sympa, we require a full email address
+		default_lists_user += '@cam.ac.uk'
+		parser.add_argument('-u', '--user', metavar='USERID', default=default_lists_user,
+				help='Sympa username (default autodetected from your environment: %s)' % default_lists_user)
 	else:
-		parser.add_argument('-u', '--raven-user', metavar='USERID', required=True,
-				help='Raven username (default cannot be autodetected in this environment)' % default_raven_user)
+		parser.add_argument('-u', '--user', metavar='USERID', required=True,
+				help='Sympa username (default cannot be autodetected in this environment)' % default_lists_user)
 
-	parser.add_argument('-p', '--raven-password-env', metavar='VARIABLE',
-			help='Name of the environment variable from which to obtain your Raven password (if not specified, you will be prompted on stdin)')
+	parser.add_argument('-p', '--password-env', metavar='VARIABLE',
+			help='Name of the environment variable from which to obtain your Sympa password (if not specified, you will be prompted on stdin)')
 	parser.add_argument('-a', '--admins', metavar='FILE', nargs='?', type=argparse.FileType('rb'), const=sys.stdin,
-			help='Set the administrators of LIST(s) to the addresses given on stdin or in FILE (one address per line)')
+			help='Check the administrators of LIST(s) against the addresses given on stdin or in FILE (one address per line)')
 	parser.add_argument('-s', '--subscribe', metavar='FILE', nargs='?', type=argparse.FileType('rb'), const=sys.stdin,
 			help='Subscribe the addresses given on stdin or in FILE (one address per line) to LIST(s)')
 	parser.add_argument('-i', '--import-subscribers', metavar='FILE', nargs='?', type=argparse.FileType('rb'), const=sys.stdin,

          
@@ 368,60 199,55 @@ def main(argv=None):
 		print("No action given.  Exiting.", file=sys.stderr)
 		sys.exit(1)
 
-	if args.raven_password_env:
-		raven_pass = os.getenv(args.raven_password_env)
-		if raven_pass is None:
-			print("Error: no Raven password found in environment variable '%s'.  Aborting." % args.raven_password_env, file=sys.stderr)
+	if args.password_env:
+		password = os.getenv(args.password_env)
+		if password is None:
+			print("Error: no Sympa password found in environment variable '%s'.  Aborting." % args.password_env, file=sys.stderr)
 			sys.exit(1)
 	else:
 		if args_stdin_count > 0:
-			print("Error: can't read both list data and Raven password from standard input.  Aborting.", file=sys.stderr)
-			print("Either pass list data as a filename or file descriptor, or use --raven-password-env VARIABLE.", file=sys.stderr)
+			print("Error: can't read both list data and Sympa password from standard input.  Aborting.", file=sys.stderr)
+			print("Either pass list data as a filename or file descriptor, or use --password-env VARIABLE.", file=sys.stderr)
 			sys.exit(1)
-		raven_pass = getpass(prompt="Raven password for %s: " % args.raven_user)
+		password = getpass(prompt="Sympa password for %s: " % args.user)
 		print()
 
 	try:
-		raven_auth(args.raven_user, raven_pass)
-	except RavenException as e:
+		lists = ListsDotCam(args.user, password)
+	except AuthenticationException as e:
 		print(e, file=sys.stderr)
 		sys.exit(2)
 
 	if args.admins:
-		admins = _read_addresses(args.admins)
+		admins = lists._read_addresses(args.admins)
 		args.admins.close()
 		for ml in args.ml:
-			print("Setting administrators for %s to %s" % (ml, ", ".join(admins)))
-			try:
-				mailman_set_admins(ml, admins)
-			except MailmanScrapingException as e:
-				print("   *** %s" % e)
+			print("Checking administrators for %s against %s" % (ml, ", ".join(admins)))
+			lists.sympa_check_admins(ml, admins)
 			print()
 
 	if args.import_subscribers:
-		addresses = _read_addresses(args.import_subscribers)
+		addresses = lists._read_addresses(args.import_subscribers)
 		args.import_subscribers.close()
 		for ml in args.ml:
 			print("Importing %d address%s to %s" % (len(addresses), '' if len(addresses) == 1 else 'es', ml))
 			try:
-				mailman_import(ml, addresses)
-			except MailmanScrapingException as e:
+				addresses_dict = split_addresses(addresses)
+				lists.sympa_import(ml, addresses_dict)
+			except SympaActionFailedException as e:
 				print("   *** %s" % e, file=sys.stderr)
-			except NoMailmanResultException:
-				print("   No changes.")
 			print()
 
 	if args.subscribe:
-		addresses = _read_addresses(args.subscribe)
+		addresses = lists._read_addresses(args.subscribe)
 		args.subscribe.close()
 		for ml in args.ml:
 			print("Subscribing %d address%s to %s" % (len(addresses), '' if len(addresses) == 1 else 'es', ml))
 			try:
-				mailman_subscribe(ml, addresses)
-			except MailmanScrapingException as e:
+				addresses_dict = split_addresses(addresses)
+				lists.sympa_subscribe(ml, addresses_dict)
+			except SympaActionFailedException as e:
 				print("   *** %s" % e, file=sys.stderr)
-			except NoMailmanResultException:
-				print("   *** Error: no result from Mailman!", file=sys.stderr)
 			print()