Source code for daklib.textutils

# vim:set et ts=4 sw=4:

"""Text utility functions

@contact: Debian FTP Master <ftpmaster@debian.org>
@copyright: 2000, 2001, 2002, 2003, 2004, 2005, 2006  James Troup <james@nocrew.org>
@license: GNU General Public License version 2 or later
"""

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import email.header

from .dak_exceptions import *
from .regexes import re_parse_maintainer

################################################################################


[docs]def rfc2047_encode(s): """ Encodes a (header) string per RFC2047 if necessary. If the string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1. """ for enc in ('ascii', 'utf-8', 'iso-8859-1'): try: return email.header.Header(s, enc, 998).encode() except UnicodeEncodeError: pass # If we get here, we're boned beyond belief raise RuntimeError("Failed to encode string")
################################################################################ # <Culus> 'The standard sucks, but my tool is supposed to interoperate # with it. I know - I'll fix the suckage and make things # incompatible!'
[docs]def fix_maintainer(maintainer: str) -> tuple[str, str, str, str]: """ Parses a Maintainer or Changed-By field and returns: 1. an RFC822 compatible version, 2. an RFC2047 compatible version, 3. the name 4. the email The name is forced to UTF-8 for both 1. and 3.. If the name field contains '.' or ',' (as allowed by Debian policy), 1. and 2. are switched to 'email (name)' format. """ maintainer = maintainer.strip() if not maintainer: return ('', '', '', '') if maintainer.find("<") == -1: email = maintainer name = "" elif (maintainer[0] == "<" and maintainer[-1:] == ">"): email = maintainer[1:-1] name = "" else: m = re_parse_maintainer.match(maintainer) if not m: raise ParseMaintError("Doesn't parse as a valid Maintainer field.") name = m.group(1) email = m.group(2) # Get an RFC2047 compliant version of the name rfc2047_name = rfc2047_encode(name) if name.find(',') != -1 or name.find('.') != -1: rfc822_maint = "%s (%s)" % (email, name) rfc2047_maint = "%s (%s)" % (email, rfc2047_name) else: rfc822_maint = "%s <%s>" % (name, email) rfc2047_maint = "%s <%s>" % (rfc2047_name, email) if email.find("@") == -1 and email.find("buildd_") != 0: raise ParseMaintError("No @ found in email address part.") return (rfc822_maint, rfc2047_maint, name, email)
################################################################################
[docs]def split_uploaders(field): import re for u in re.sub(">[ ]*,", ">\t", field).split("\t"): u = u.strip() # Trailing commas will give an empty final uploader if u: yield u