cs.urlutils: UR: new .cleanpath and .cleanrpath properties
1 files changed, 22 insertions(+), 0 deletions(-)

M lib/python/cs/urlutils.py
M lib/python/cs/urlutils.py +22 -0
@@ 10,6 10,7 @@ from functools import cached_property
 from heapq import heappush, heappop
 import os
 import os.path
+import re
 import sys
 from typing import Iterable
 

          
@@ 314,6 315,27 @@ class URL(HasThreadState, Promotable):
     '''
     return self.url_parsed.path
 
+  @cached_property
+  @unattributable
+  def cleanpath(self):
+    ''' The URL path as returned by urlparse.urlparse,
+        with multiple slashes (`/`) reduced to a single slash.
+        Technically this can change the meaning of the URL path,
+        but usually these are an artifact of sloppy path construction.
+    '''
+    path = self.path
+    if '///' in path:
+      path = re.sub('//+', '/', path)  # the thorough thing
+    elif '//' in path:
+      path = path.replace('//', '/')  # the fast thing
+    return path
+
+  @property
+  def cleanrpath(self):
+    ''' The `cleanpath` with its leading slash stripped.
+    '''
+    return self.cleanpath.lstrip('/')
+
   @property
   @unattributable
   def path_elements(self):