Hover, code analysis for v2

- update environments to add tree-sitter dependency - update robocorp-trustore version - add hover - code analysis using new spec format for v2 - remove any trace of requests usage - make downloads with resume - use uv instead of pip in conda environments
Sema4AI · Aug 28, 2024 · 311fee5 · 311fee5
1 parent c36ef60
commit 311fee5
Show file tree

Hide file tree

Showing 42 changed files with 2,580 additions and 886 deletions.
diff --git a/sema4ai-python-ls-core/poetry.lock b/sema4ai-python-ls-core/poetry.lock
diff --git a/sema4ai-python-ls-core/pyproject.toml b/sema4ai-python-ls-core/pyproject.toml
@@ -43,16 +43,14 @@ isort = { version = "^5.12.0", python = "^3.8" }
 invoke = "^2.0"
 tomlkit = "^0.11.8"
 semver = "^3.0.0"
-requests = "*"
-truststore = "0.8.0"
+robocorp-truststore = "0.9.1"
 
 mock = "*"
 pytest = "*"
 pytest-regressions = "1.0.6"
 pytest-xdist = "*"
 pytest-timeout = "*"
 
-types-requests = "*"
 types-invoke = "^2.0"
 types-psutil = "^5.9"
 types-mock = "*"

diff --git a/...-ls-core/src/sema4ai_ls_core/debug_adapter_core/dap/__main__gen_debug_adapter_protocol.py b/...-ls-core/src/sema4ai_ls_core/debug_adapter_core/dap/__main__gen_debug_adapter_protocol.py
@@ -102,17 +102,17 @@ def __str__(self):
 
 
 def load_schema_data():
-    import os.path
     import json
+    import os.path
 
     json_file = os.path.join(os.path.dirname(__file__), "debugProtocol.json")
     if not os.path.exists(json_file):
-        import requests
+        from sema4ai_ls_core import http
 
-        req = requests.get(
+        req = http.get(
             "https://raw.githubusercontent.com/microsoft/debug-adapter-protocol/gh-pages/debugAdapterProtocol.json"
         )
-        assert req.status_code == 200
+        req.raise_for_status()
         with open(json_file, "wb") as stream:
             stream.write(req.content)
 
@@ -122,8 +122,8 @@ def load_schema_data():
 
 
 def load_custom_schema_data():
-    import os.path
     import json
+    import os.path
 
     json_file = os.path.join(os.path.dirname(__file__), "debugProtocolCustom.json")
 

diff --git a/sema4ai-python-ls-core/src/sema4ai_ls_core/http.py b/sema4ai-python-ls-core/src/sema4ai_ls_core/http.py
@@ -0,0 +1,178 @@
+import os
+import urllib.parse
+import urllib.request
+from pathlib import Path
+
+from sema4ai_ls_core.core_log import get_logger
+
+log = get_logger(__name__)
+
+
+def download_with_resume(url: str, target: Path, make_executable: bool) -> Path:
+    """
+    Downloads a file from a URL to a target path with resume support.
+    """
+    import stat
+
+    log.info(f"Downloading '{url}' to '{target}'")
+
+    try:
+        os.makedirs(os.path.dirname(target), exist_ok=True)
+    except Exception:
+        pass
+
+    chunk_size = 1024 * 5
+    with _open_urllib(url) as response:
+        content_size = int(response.getheader("Content-Length") or -1)
+        try:
+            with open(url, "wb") as stream:
+                while True:
+                    chunk = response.read(chunk_size)
+                    if not chunk:
+                        # Note: in a bad connection it can return an empty chunk
+                        # even before finishing (so, we resume it afterward if
+                        # that was the case).
+                        break
+                    stream.write(chunk)
+        except Exception:
+            # Non-resumable case, just raise.
+            if content_size <= 0:
+                raise
+            # Otherwise, keep on going to resume the download if it still
+            # hasn't finished.
+
+    MAX_TRIES = 10
+    for i in range(MAX_TRIES):
+        curr_file_size = _get_file_size(target)
+
+        if content_size > 0:
+            # It can be resumed.
+            if content_size > curr_file_size:
+                log.info(
+                    f"Resuming download of '{url}' to '{target}' (downloaded {curr_file_size} of {content_size} (bytes))"
+                )
+                try:
+                    _resume_download(url, target, chunk_size)
+                except Exception:
+                    if i == MAX_TRIES - 1:
+                        raise
+            else:
+                break
+        else:
+            # It cannot be resumed: raise if everything wasn't downloaded.
+            if content_size > curr_file_size:
+                raise RuntimeError(
+                    f"Unable to download {url} to {target}. Please retry later."
+                )
+
+    if make_executable:
+        st = os.stat(target)
+        os.chmod(target, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+
+    return target
+
+
+def _open_urllib(url: str, headers=None):
+    # Cloudflare seems to be blocking "User-Agent: Python-urllib/3.9".
+    # Use a different one as that must be sorted out.
+    use_headers = {"User-Agent": "Mozilla"}
+    if headers:
+        use_headers.update(headers)
+    return urllib.request.urlopen(
+        urllib.request.Request(url, headers=use_headers), timeout=20
+    )
+
+
+def _get_file_size(filename: str | Path) -> int:
+    # Check if file already exists and get downloaded size (if any)
+    file_size = 0
+    if os.path.exists(filename):
+        with open(filename, "rb") as f:
+            file_size = os.fstat(f.fileno()).st_size
+    return file_size
+
+
+def _resume_download(url: str, filename: str | Path, chunk_size: int = 1024):
+    """Downloads a file in chunks with resume support.
+
+    Args:
+        url: The URL of the file to download.
+        filename: The filename to save the downloaded file.
+        chunk_size: The size of each chunk to download (in bytes).
+    """
+    downloaded_size = _get_file_size(filename)
+    # Set headers for resume download
+    headers = {}
+    if downloaded_size > 0:
+        headers["Range"] = f"bytes={downloaded_size}-"
+
+    with _open_urllib(url, headers) as response, open(filename, "ab") as stream:
+        content_size = response.getheader("Content-Length")
+
+        if not content_size:
+            raise RuntimeError("Resuming downloads is not supported.")
+
+        while True:
+            chunk = response.read(chunk_size)
+            if not chunk:
+                break
+            stream.write(chunk)
+
+
+class HTTPError(Exception):
+    """Custom HTTPError exception to mimic requests' HTTPError."""
+
+
+class Response:
+    def __init__(self, response):
+        self._response = response
+        self.status_code = response.status
+        self.headers = response.headers
+        self.url = response.url
+        self.reason = response.reason
+        self.content = response.read()
+
+    @property
+    def text(self):
+        return self.content.decode("utf-8")
+
+    def json(self):
+        import json
+
+        return json.loads(self.text)
+
+    def raise_for_status(self):
+        if self.status_code != 200:
+            raise HTTPError(f"{self.status_code} {self.reason}, accessing: {self.url}")
+
+
+def _request(method, url, **kwargs):
+    data = kwargs.pop("data", None)
+    headers = kwargs.pop("headers", {})
+    timeout = kwargs.pop("timeout", None)
+    assert kwargs == {}, f"Unexpected kwargs: {kwargs}"
+
+    if data:
+        if isinstance(data, dict):
+            data = urllib.parse.urlencode(data).encode()
+
+    req = urllib.request.Request(url, data=data, headers=headers, method=method.upper())
+
+    with urllib.request.urlopen(req, timeout=timeout) as response:
+        return Response(response)
+
+
+def get(url, **kwargs):
+    return _request("GET", url, **kwargs)
+
+
+def post(url, **kwargs):
+    return _request("POST", url, **kwargs)
+
+
+def put(url, **kwargs):
+    return _request("PUT", url, **kwargs)
+
+
+def delete(url, **kwargs):
+    return _request("DELETE", url, **kwargs)
diff --git a/sema4ai-python-ls-core/src/sema4ai_ls_core/protocols.py b/sema4ai-python-ls-core/src/sema4ai_ls_core/protocols.py
@@ -1,41 +1,41 @@
 import sys
 import threading
+import typing
+from enum import Enum
 from typing import (
-    Dict,
-    Union,
     Any,
-    Generic,
     Callable,
+    Dict,
+    Generic,
+    Iterable,
+    List,
     Mapping,
     Optional,
-    List,
-    Type,
-    Iterable,
     Tuple,
+    Type,
+    TypeVar,
+    Union,
 )
-from typing import TypeVar
-import typing
-
-from enum import Enum
-
 
 if typing.TYPE_CHECKING:
     # This would lead to a circular import, so, do it only when type-checking.
     from sema4ai_ls_core.callbacks import Callback
-    from sema4ai_ls_core.lsp import TextDocumentContentChangeEvent
-    from sema4ai_ls_core.lsp import HoverResponseTypedDict
-    from sema4ai_ls_core.lsp import ReferencesResponseTypedDict
-    from sema4ai_ls_core.lsp import TextDocumentTypedDict
-    from sema4ai_ls_core.lsp import ResponseTypedDict
-    from sema4ai_ls_core.lsp import CodeLensTypedDict
-    from sema4ai_ls_core.lsp import RangeTypedDict
-    from sema4ai_ls_core.lsp import DocumentHighlightResponseTypedDict
-    from sema4ai_ls_core.lsp import PositionTypedDict
-    from sema4ai_ls_core.lsp import CompletionItemTypedDict
-    from sema4ai_ls_core.lsp import CompletionsResponseTypedDict
-    from sema4ai_ls_core.lsp import CompletionResolveResponseTypedDict
-    from sema4ai_ls_core.lsp import TextDocumentItem
-    from sema4ai_ls_core.lsp import TextEditTypedDict
+    from sema4ai_ls_core.lsp import (
+        CodeLensTypedDict,
+        CompletionItemTypedDict,
+        CompletionResolveResponseTypedDict,
+        CompletionsResponseTypedDict,
+        DocumentHighlightResponseTypedDict,
+        HoverResponseTypedDict,
+        PositionTypedDict,
+        RangeTypedDict,
+        ReferencesResponseTypedDict,
+        ResponseTypedDict,
+        TextDocumentContentChangeEvent,
+        TextDocumentItem,
+        TextDocumentTypedDict,
+        TextEditTypedDict,
+    )
 
 # Hack so that we don't break the runtime on versions prior to Python 3.8.
 if sys.version_info[:2] < (3, 8):
@@ -48,8 +48,7 @@ def __init_subclass__(self, *args, **kwargs):
             pass
 
 else:
-    from typing import Protocol
-    from typing import TypedDict
+    from typing import Protocol, TypedDict
 
 
 T = TypeVar("T")
@@ -838,6 +837,12 @@ def is_source_in_sync(self) -> bool:
     def find_line_with_contents(self, contents: str) -> int:
         pass
 
+    def set_custom_data(self, key: str, value: Any) -> None:
+        pass
+
+    def get_custom_data(self, key: str) -> Any:
+        pass
+
 
 class IWorkspaceFolder(Protocol):
     uri: str