Skip to content

Commit

Permalink
Support configuring a proxy for fetching YouTube transcripts
Browse files Browse the repository at this point in the history
Add a `YOUTUBE_PROXY` environment variable which specifies a proxy
service to use when fetching caption tracks/transcripts from YouTube.

The format of this variable should be an argument that can be used with the
`proxies` argument to `requests.post`, eg.
"http://proxy_user:proxy_pass@proxy_host:port".

Part of hypothesis/support#143
Slack thread: https://hypothes-is.slack.com/archives/C4K6M7P5E/p1723188976861449
  • Loading branch information
robertknight committed Aug 12, 2024
1 parent ee4b292 commit c141d88
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 3 deletions.
35 changes: 33 additions & 2 deletions tests/unit/via/services/youtube_transcript_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,12 +289,43 @@ def svc(self, http_service):


class TestFactory:
def test_factory(self, YouTubeTranscriptService, http_service, pyramid_request):
def test_factory_without_proxy(
self, YouTubeTranscriptService, pyramid_request, HTTPService
):
svc = factory(sentinel.context, pyramid_request)

YouTubeTranscriptService.assert_called_once_with(http_service=http_service)
HTTPService.assert_called_once()
session = HTTPService.call_args[1]["session"]
assert session.proxies == {}

YouTubeTranscriptService.assert_called_once_with(
http_service=HTTPService.return_value
)

assert svc == YouTubeTranscriptService.return_value

def test_factory_with_proxy(
self, YouTubeTranscriptService, pyramid_request, HTTPService
):
proxy_server = "http://proxy_user@proxy_pass:proxy_host.com:123"
pyramid_request.registry.settings["youtube_proxy"] = proxy_server

svc = factory(sentinel.context, pyramid_request)

HTTPService.assert_called_once()
session = HTTPService.call_args[1]["session"]
assert session.proxies == {"https": proxy_server}

YouTubeTranscriptService.assert_called_once_with(
http_service=HTTPService.return_value
)

assert svc == YouTubeTranscriptService.return_value

@pytest.fixture
def HTTPService(self, patch):
return patch("via.services.youtube_transcript.HTTPService")

@pytest.fixture
def YouTubeTranscriptService(self, patch):
return patch("via.services.youtube_transcript.YouTubeTranscriptService")
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ passenv =
dev: CLIENT_EMBED_URL
dev: SIGNED_URLS_REQUIRED
dev: YOUTUBE_API_KEY
dev: YOUTUBE_PROXY
deps =
pip-tools
pip-sync-faster
Expand Down
1 change: 1 addition & 0 deletions via/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"youtube_transcripts": {"formatter": asbool},
"api_jwt_secret": {"required": True},
"youtube_api_key": {},
"youtube_proxy": {},
}


Expand Down
8 changes: 7 additions & 1 deletion via/services/youtube_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from typing import Dict, List
from xml.etree import ElementTree

import requests

from via.services.http import HTTPService


Expand Down Expand Up @@ -145,4 +147,8 @@ def strip_html(xml_string):


def factory(_context, request):
return YouTubeTranscriptService(http_service=request.find_service(HTTPService))
session = requests.Session()
if youtube_proxy := request.registry.settings.get("youtube_proxy"):
session.proxies["https"] = youtube_proxy
http_svc = HTTPService(session=session)
return YouTubeTranscriptService(http_service=http_svc)

0 comments on commit c141d88

Please sign in to comment.