Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xf8 in position 0: invalid start byte #901

Open
kyosukegg opened this issue Jun 14, 2024 · 7 comments

Comments

@kyosukegg
Copy link

When I used model-analyzer, I got "UnicodeDecodeError: 'utf-8' codec can't decode byte 0xf8 in position 0: invalid start byte".
I have the same problem with the latest tag:24.05-py3-sdk.
Why do I get such an error? And how can I get rid of it?

[compose.yml]

services:
  profile:
    image: nvcr.io/nvidia/tritonserver:22.12-py3-sdk
    container_name: profile
    build:
      context: .
      # dockerfile: profile/Dockerfile
    command: /bin/bash
    tty: true
    volumes:
      # model repository path.
      - /home/kyosukegg/detection-app/detection/models:/home/kyosukegg/detection-app/detection/models
      - /var/run/docker.sock:/var/run/docker.sock 
      # Get 
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [ gpu ]
    network_mode: host

[command]

model-analyzer -v profile --model-repository /home/kyosukegg/detection-app/detection/models  --profile-models car_model --triton-launch-mode=docker --output-model-repository-path /home/kyosukegg/detection-app/profile/car_model --export-path /workspace/profile_repo --triton-output-path ./triton.txt --override-output-model-repository

[Error]

09:31:58 [Model Analyzer] DEBUG: 
{'batch_sizes': [1],
 'checkpoint_directory': '/workspace/checkpoints',
 'client_max_retries': 50,
 'client_protocol': 'grpc',
 'collect_cpu_metrics': False,
 'concurrency': [],
 'config_file': None,
 'constraints': {},
 'duration_seconds': 3,
 'early_exit_enable': False,
 'export_path': '/workspace/profile_repo',
 'filename_model_gpu': 'metrics-model-gpu.csv',
 'filename_model_inference': 'metrics-model-inference.csv',
 'filename_server_only': 'metrics-server-only.csv',
 'gpu_output_fields': ['model_name',
                       'gpu_uuid',
                       'batch_size',
                       'concurrency',
                       'model_config_path',
                       'instance_group',
                       'satisfies_constraints',
                       'gpu_used_memory',
                       'gpu_utilization',
                       'gpu_power_usage'],
 'gpus': ['all'],
 'inference_output_fields': ['model_name',
                             'batch_size',
                             'concurrency',
                             'model_config_path',
                             'instance_group',
                             'max_batch_size',
                             'satisfies_constraints',
                             'perf_throughput',
                             'perf_latency_p99'],
 'latency_budget': None,
 'min_throughput': None,
 'model_repository': '/home/kyosukegg/detection-app/detection/models',
 'monitoring_interval': 1.0,
 'num_configs_per_model': 3,
 'num_top_model_configs': 0,
 'objectives': {'perf_throughput': 10},
 'output_model_repository_path': '/home/kyosukegg/detection-app/profile/car_model',
 'override_output_model_repository': True,
 'perf_analyzer_cpu_util': 2240.0,
 'perf_analyzer_flags': {},
 'perf_analyzer_max_auto_adjusts': 10,
 'perf_analyzer_path': 'perf_analyzer',
 'perf_analyzer_timeout': 600,
 'perf_output': False,
 'perf_output_path': None,
 'plots': [{'name': 'throughput_v_latency', 'title': 'Throughput vs. Latency', 'x_axis': 'perf_latency_p99', 'y_axis': 'perf_throughput', 'monotonic': True},
           {'name': 'gpu_mem_v_latency', 'title': 'GPU Memory vs. Latency', 'x_axis': 'perf_latency_p99', 'y_axis': 'gpu_used_memory', 'monotonic': False}],
 'profile_models': [{'model_name': 'car_model', 'cpu_only': False, 'objectives': {'perf_throughput': 10}, 'parameters': {'batch_sizes': [1], 'concurrency': []}, 'weighting': 1}],
 'reload_model_disable': False,
 'run_config_profile_models_concurrently_enable': False,
 'run_config_search_disable': False,
 'run_config_search_max_concurrency': 1024,
 'run_config_search_max_instance_count': 5,
 'run_config_search_max_model_batch_size': 128,
 'run_config_search_min_concurrency': 1,
 'run_config_search_min_instance_count': 1,
 'run_config_search_min_model_batch_size': 1,
 'run_config_search_mode': 'brute',
 'server_output_fields': ['model_name',
                          'gpu_uuid',
                          'gpu_used_memory',
                          'gpu_utilization',
                          'gpu_power_usage'],
 'skip_summary_reports': False,
 'triton_docker_image': 'nvcr.io/nvidia/tritonserver:22.12-py3',
 'triton_docker_labels': {},
 'triton_docker_mounts': [],
 'triton_docker_shm_size': None,
 'triton_grpc_endpoint': 'localhost:8001',
 'triton_http_endpoint': 'localhost:8000',
 'triton_install_path': '/opt/tritonserver',
 'triton_launch_mode': 'docker',
 'triton_metrics_url': 'http://localhost:8002/metrics',
 'triton_output_path': './triton.txt',
 'triton_server_environment': {},
 'triton_server_flags': {},
 'triton_server_path': 'tritonserver',
 'weighting': None}
09:31:58 [Model Analyzer] Initializing GPUDevice handles
Traceback (most recent call last):
  File "/usr/local/bin/model-analyzer", line 8, in <module>
    sys.exit(main())
  File "/usr/local/lib/python3.8/dist-packages/model_analyzer/entrypoint.py", line 251, in main
    gpus = GPUDeviceFactory().verify_requested_gpus(config.gpus)
  File "/usr/local/lib/python3.8/dist-packages/model_analyzer/device/gpu_device_factory.py", line 36, in __init__
    self.init_all_devices()
  File "/usr/local/lib/python3.8/dist-packages/model_analyzer/device/gpu_device_factory.py", line 66, in init_all_devices
    device_name = str(device_atrributes.deviceName,
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xf8 in position 0: invalid start byte

[value of device_atrributes.deviceName]

b'\xf8\x95\xa0\x81\x8e\xf8\x91\x80\x81\x89\xf8\x90\x90\x81\x89\xf8\x91\xb0\x80\xa0\xf8\x91\xa0\x81\xa5\xf8\x9c\xa0\x81\xaf\xf8\x99\x90\x81\xa3\xf8\x94\xa0\x80\xa0\xf8\x96\x80\x81\x94\xf8\x8d\x80\x80\xa0\xf8\x8d\xb0\x80\xb00'

[environment]

  • WSL2, Ubuntu-22.04 (It also comes out in Ubuntu-20.04)
  • docker version 26.1.4
  • NVIDIA GeForce RTX 4070

Best regards,

@nv-braf
Copy link
Contributor

nv-braf commented Jun 14, 2024

The version of DCGM you are using is incompatible with the version of MA you are running. You can fix this by using a more recent (within the last 6 months) release of MA.

@kyosukegg
Copy link
Author

I tried MA version=1.40.0, but I got the same error.

Does DCGM mean Data Center GPU Manager?
I don't have DCGM installed, but is it something I should do?

When I printed the dcgmPath on line 54 of model_analyzer/device/gpu_device_factory.py, it was None.

@nv-braf
Copy link
Contributor

nv-braf commented Jun 14, 2024

Yes, you need to have DCGM installed to run MA.

@kyosukegg
Copy link
Author

I understand. I will try again after installing DCGM.

@kyosukegg
Copy link
Author

kyosukegg commented Jun 15, 2024

@nv-braf
Unfortunately, it was not resolved.
DCGM was already installed in the container, and the GPU was correctly recognized, so I tried installing it on the host side, but it did not improve.
In docker container,

$ dcgmi discovery -l
1 GPU found.
+--------+----------------------------------------------------------------------+
| GPU ID | Device Information                                                   |
+--------+----------------------------------------------------------------------+
| 0      | Name: NVIDIA GeForce RTX 4070                                        |
|        | PCI Bus ID: 00000000:01:00.0                                         |
|        | Device UUID: GPU-eb2680cc-69ce-69df-3073-7268cefc8776                |
+--------+----------------------------------------------------------------------+
0 NvSwitches found.
+-----------+
| Switch ID |
+-----------+
+-----------+

Do you have any ideas?

@Anas0x45
Copy link

If you still have the problem check #931

@kyosukegg
Copy link
Author

Thanks for your information. Now I have a similar solution.
In gpu_device_factory.py, at l.41,

def init_all_devices(self, dcgmPath=None):
        """
        Create GPUDevice objects for all DCGM visible
        devices.

        Parameters
        ----------
        dcgmPath : str
            Absolute path to dcgm shared library
        """

        if numba.cuda.is_available():
            logger.info("Initializing GPUDevice handles")
            structs._dcgmInit(dcgmPath)
            dcgm_agent.dcgmInit()

            # Start DCGM in the embedded mode to use the shared library
            dcgm_handle = dcgm_agent.dcgmStartEmbedded(
                structs.DCGM_OPERATION_MODE_MANUAL
            )

            # Create a GPU device for every supported DCGM device
            dcgm_device_ids = dcgm_agent.dcgmGetAllSupportedDevices(dcgm_handle)

            for device_id in dcgm_device_ids:
                device_atrributes = dcgm_agent.dcgmGetDeviceAttributes(
                    dcgm_handle, device_id
                ).identifiers
               # <My custom change>-----------------------------------------
                try:
                    pci_bus_id = device_atrributes.pciBusId
                    device_uuid = device_atrributes.uuid
                    device_name = device_atrributes.deviceName
                except UnicodeDecodeError:
                    import os
                    keys = ['Name', 'PCI Bus ID', 'Device UUID']
                    device_atrributes = {}

                    stream = os.popen('dcgmi discovery -l')
                    output = stream.read()
                    output = output.splitlines()
                    for i, char in enumerate(output):
                        if char[2] == str(device_id):
                            attributes = output[i: i+len(keys)]
                            for key in keys:
                                for row in attributes:
                                    pos = row.find(key)
                                    if pos != -1:
                                        sindex = row.find(':') + 1
                                        eindex = row.rfind('|')
                                        value = row[sindex:eindex].lstrip().rstrip()
                                        device_atrributes[key] = value
                    pci_bus_id = device_atrributes[keys[1]]
                    device_uuid = device_atrributes[keys[2]]
                    device_name = device_atrributes[keys[0]]
               # ----------------------------------------------------------------


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Development

No branches or pull requests

3 participants