'''Misc functions.'''
import logging
from wpull.backport.logging import BraceMessage as __
import wpull.protocol.http.util
import wpull.util
import wpull.string
_logger = logging.getLogger(__name__)
[docs]def get_heading_encoding(response):
'''Return the document encoding from a HTTP header.
Args:
response (Response): An instance of :class:`.http.Response`.
Returns:
``str``, ``None``: The codec name.
'''
encoding = wpull.protocol.http.util.parse_charset(
response.fields.get('content-type', ''))
if encoding:
return wpull.string.normalize_codec_name(encoding)
else:
return None
[docs]def detect_response_encoding(response, is_html=False, peek=131072):
'''Return the likely encoding of the response document.
Args:
response (Response): An instance of :class:`.http.Response`.
is_html (bool): See :func:`.util.detect_encoding`.
peek (int): The maximum number of bytes of the document to be analyzed.
Returns:
``str``, ``None``: The codec name.
'''
encoding = get_heading_encoding(response)
encoding = wpull.string.detect_encoding(
wpull.util.peek_file(response.body, peek), encoding=encoding, is_html=is_html
)
_logger.debug(__('Got encoding: {0}', encoding))
return encoding
[docs]def is_gzip(data):
'''Return whether the data is likely to be gzip.'''
return data.startswith(b'\x1f\x8b')