Source code for daklib.textutils
# vim:set et ts=4 sw=4:
"""Text utility functions
@contact: Debian FTP Master <ftpmaster@debian.org>
@copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006 James Troup <james@nocrew.org>
@license: GNU General Public License version 2 or later
"""
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import email.header
from .dak_exceptions import *
from .regexes import re_parse_maintainer
################################################################################
[docs]def rfc2047_encode(s):
"""
Encodes a (header) string per RFC2047 if necessary. If the
string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1.
"""
for enc in ('ascii', 'utf-8', 'iso-8859-1'):
try:
return email.header.Header(s, enc, 998).encode()
except UnicodeEncodeError:
pass
# If we get here, we're boned beyond belief
raise RuntimeError("Failed to encode string")
################################################################################
# <Culus> 'The standard sucks, but my tool is supposed to interoperate
# with it. I know - I'll fix the suckage and make things
# incompatible!'
[docs]def fix_maintainer(maintainer: str) -> tuple[str, str, str, str]:
"""
Parses a Maintainer or Changed-By field and returns:
1. an RFC822 compatible version,
2. an RFC2047 compatible version,
3. the name
4. the email
The name is forced to UTF-8 for both 1. and 3.. If the name field
contains '.' or ',' (as allowed by Debian policy), 1. and 2. are
switched to 'email (name)' format.
"""
maintainer = maintainer.strip()
if not maintainer:
return ('', '', '', '')
if maintainer.find("<") == -1:
email = maintainer
name = ""
elif (maintainer[0] == "<" and maintainer[-1:] == ">"):
email = maintainer[1:-1]
name = ""
else:
m = re_parse_maintainer.match(maintainer)
if not m:
raise ParseMaintError("Doesn't parse as a valid Maintainer field.")
name = m.group(1)
email = m.group(2)
# Get an RFC2047 compliant version of the name
rfc2047_name = rfc2047_encode(name)
if name.find(',') != -1 or name.find('.') != -1:
rfc822_maint = "%s (%s)" % (email, name)
rfc2047_maint = "%s (%s)" % (email, rfc2047_name)
else:
rfc822_maint = "%s <%s>" % (name, email)
rfc2047_maint = "%s <%s>" % (rfc2047_name, email)
if email.find("@") == -1 and email.find("buildd_") != 0:
raise ParseMaintError("No @ found in email address part.")
return (rfc822_maint, rfc2047_maint, name, email)
################################################################################
[docs]def split_uploaders(field):
import re
for u in re.sub(">[ ]*,", ">\t", field).split("\t"):
u = u.strip()
# Trailing commas will give an empty final uploader
if u:
yield u