Source code for wpull.namevalue

# encoding=utf-8
'''Key-value pairs.'''
import collections
import gettext
import io
import textwrap

from wpull.collections import OrderedDefaultDict

_ = gettext.gettext

[docs]class NameValueRecord(collections.MutableMapping): '''An ordered mapping of name-value pairs. Duplicated names are accepted. .. seealso:: ''' def __init__(self, normalize_overrides=None, encoding='utf-8', wrap_width=None): self._map = OrderedDefaultDict(list) self.raw = None self.encoding = encoding self._normalize_overrides = normalize_overrides self._wrap_width = wrap_width
[docs] def parse(self, string, strict=True): '''Parse the string or bytes. Args: strict (bool): If True, errors will not be ignored Raises: :class:`ValueError` if the record is malformed. ''' if isinstance(string, bytes): errors = 'strict' if strict else 'replace' string = string.decode(self.encoding, errors=errors) if not self.raw: self.raw = string else: self.raw += string lines = unfold_lines(string).splitlines() for line in lines: if line: if ':' not in line: if strict: raise ValueError('Field missing colon.') else: continue name, value = line.split(':', 1) name = name.strip() value = value.strip() self.add(name, value)
def __getitem__(self, name): normalized_name = normalize_name(name, self._normalize_overrides) if normalized_name in self._map: if self._map[normalized_name]: return self._map[normalized_name][0] raise KeyError(name) def __setitem__(self, name, value): normalized_name = normalize_name(name, self._normalize_overrides) self._map[normalized_name][:] = (value,) def __delitem__(self, name): del self._map[normalize_name(name, self._normalize_overrides)] def __iter__(self): return iter(self._map) def __len__(self): return len(self._map)
[docs] def add(self, name, value): '''Append the name-value pair to the record.''' normalized_name = normalize_name(name, self._normalize_overrides) self._map[normalized_name].append(value)
[docs] def get_list(self, name): '''Return all the values for given name.''' normalized_name = normalize_name(name, self._normalize_overrides) return self._map[normalized_name]
[docs] def get_all(self): '''Return an iterator of name-value pairs.''' for name, values in self._map.items(): for value in values: yield (name, value)
def __str__(self): return self.to_str()
[docs] def to_str(self): '''Convert to string.''' pairs = [] for name, value in self.get_all(): if value and self._wrap_width: pairs.append('{0}:{1}'.format( name, '\r\n'.join(textwrap.wrap( value, width=self._wrap_width, drop_whitespace=False, initial_indent=' ', subsequent_indent=' ' )) )) elif value: pairs.append('{0}: {1}'.format(name, value)) else: pairs.append('{0}:'.format(name)) pairs.append('') return '\r\n'.join(pairs)
def __bytes__(self): return self.to_bytes()
[docs] def to_bytes(self, errors='strict'): '''Convert to bytes.''' return str(self).encode(self.encoding, errors=errors)
[docs]def normalize_name(name, overrides=None): '''Normalize the key name to title case. For example, ``normalize_name('content-id')`` will become ``Content-Id`` Args: name (str): The name to normalize. overrides (set, sequence): A set or sequence containing keys that should be cased to themselves. For example, passing ``set('WARC-Type')`` will normalize any key named "warc-type" to ``WARC-Type`` instead of the default ``Warc-Type``. Returns: str ''' normalized_name = name.title() if overrides: override_map = dict([(name.title(), name) for name in overrides]) return override_map.get(normalized_name, normalized_name) else: return normalized_name
[docs]def guess_line_ending(string): '''Return the most likely line delimiter from the string.''' assert isinstance(string, str), 'Expect str. Got {}'.format(type(string)) crlf_count = string.count('\r\n') lf_count = string.count('\n') if crlf_count >= lf_count: return '\r\n' else: return '\n'
[docs]def unfold_lines(string): '''Join lines that are wrapped. Any line that starts with a space or tab is joined to the previous line. ''' assert isinstance(string, str), 'Expect str. Got {}'.format(type(string)) lines = string.splitlines() line_buffer = io.StringIO() for line_number in range(len(lines)): line = lines[line_number] if line and line[0:1] in (' ', '\t'): line_buffer.write(' ') elif line_number != 0: line_buffer.write('\r\n') line_buffer.write(line.strip()) line_buffer.write('\r\n') return line_buffer.getvalue()