From 5cab892dcd26fb51938634e13e300c6611ab66e0 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:46:51 +0200 Subject: [PATCH] [Streaming] retry on requests errors (#6963) * [Streaming] retry on requests errors * lucain's comment --- src/datasets/utils/file_utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/datasets/utils/file_utils.py b/src/datasets/utils/file_utils.py index a4f28e2ddd0..2fb6ca20438 100644 --- a/src/datasets/utils/file_utils.py +++ b/src/datasets/utils/file_utils.py @@ -4,6 +4,7 @@ Copyright by the AllenNLP authors. """ +import asyncio import copy import glob import io @@ -20,7 +21,6 @@ import warnings import xml.dom.minidom import zipfile -from asyncio import TimeoutError from contextlib import closing, contextmanager from functools import partial from io import BytesIO @@ -31,10 +31,10 @@ from urllib.parse import urljoin, urlparse from xml.etree import ElementTree as ET +import aiohttp.client_exceptions import fsspec import huggingface_hub import requests -from aiohttp.client_exceptions import ClientError from fsspec.core import strip_protocol, url_to_fs from fsspec.utils import can_be_local from huggingface_hub.utils import EntryNotFoundError, insecure_hashlib @@ -1093,7 +1093,12 @@ def read_with_retries(*args, **kwargs): try: out = read(*args, **kwargs) break - except (ClientError, TimeoutError) as err: + except ( + aiohttp.client_exceptions.ClientError, + asyncio.TimeoutError, + requests.exceptions.ConnectTimeout, + requests.exceptions.ConnectionError, + ) as err: disconnect_err = err logger.warning( f"Got disconnected from remote data host. Retrying in {config.STREAMING_READ_RETRY_INTERVAL}sec [{retry}/{max_retries}]"