From 2d0324946a8b7495cce77956375a824fa5c3c35b Mon Sep 17 00:00:00 2001 From: greats3an Date: Sat, 24 Aug 2024 14:45:00 +0800 Subject: [PATCH] Version 0.2.7 Implement `rla2json` command to convert RLA files to JSON format * Please refer to the updated Wiki page for more information Removed `mitm` script --- .vscode/launch.json | 13 +-- sssekai/__init__.py | 2 +- sssekai/__main__.py | 10 +- sssekai/entrypoint/mitm.py | 7 -- sssekai/entrypoint/rla2json.py | 49 +++++++++ sssekai/entrypoint/usmdemux.py | 12 +-- sssekai/fmt/rla.py | 191 +++++++++++++++++++++++++++++++++ 7 files changed, 258 insertions(+), 26 deletions(-) delete mode 100644 sssekai/entrypoint/mitm.py create mode 100644 sssekai/entrypoint/rla2json.py create mode 100644 sssekai/fmt/rla.py diff --git a/.vscode/launch.json b/.vscode/launch.json index d53d5e9..79cdf0c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -139,19 +139,16 @@ "justMyCode": false }, { - "name": "MITM: Launch server", + "name": "Python: rla2json", "type": "python", "request": "launch", "module": "sssekai", - "cwd": "${workspaceFolder}", "args": [ - "-p", - "8888", - "--mode", - "upstream:http://127.0.0.1:8080/", - "-q" + "rla2json", + "H:\\Sekai\\streaming_live\\archive\\1st_live_vbs-1", + "H:\\Sekai\\streaming_live\\archive\\1st_live_vbs-1_src" ], - "justMyCode": false + "justMyCode": true }, ] } \ No newline at end of file diff --git a/sssekai/__init__.py b/sssekai/__init__.py index 6bc5ad5..2ade290 100644 --- a/sssekai/__init__.py +++ b/sssekai/__init__.py @@ -1,5 +1,5 @@ __VERSION_MAJOR__ = 0 __VERSION_MINOR__ = 2 -__VERSION_PATCH__ = 6 +__VERSION_PATCH__ = 7 __version__ = '%s.%s.%s' % (__VERSION_MAJOR__,__VERSION_MINOR__,__VERSION_PATCH__) diff --git a/sssekai/__main__.py b/sssekai/__main__.py index bc47aee..b7b407f 100644 --- a/sssekai/__main__.py +++ b/sssekai/__main__.py @@ -3,7 +3,7 @@ import argparse from sssekai.entrypoint.apidecrypt import main_apidecrypt from sssekai.entrypoint.abdecrypt import main_abdecrypt -from sssekai.entrypoint.mitm import main_mitm +from sssekai.entrypoint.rla2json import main_rla2json from sssekai.entrypoint.usmdemux import main_usmdemux from sssekai.entrypoint.abcache import main_abcache, DEFAULT_CACHE_DB_FILE from sssekai.entrypoint.live2dextract import main_live2dextract @@ -75,9 +75,11 @@ def write(__s): spineextract_parser.add_argument('infile', type=str, help='input file') spineextract_parser.add_argument('outdir', type=str, help='output directory') spineextract_parser.set_defaults(func=main_spineextract) - # mitm - mitm_parser = subparsers.add_parser('mitm', help='Run Sekai API MITM proxy (WIP)') - mitm_parser.set_defaults(func=main_mitm) + # rla2json + rla2json_parser = subparsers.add_parser('rla2json', help='''Read streaming_live/archive files and dump their information to JSON''') + rla2json_parser.add_argument('infile', type=str, help='input file') + rla2json_parser.add_argument('outdir', type=str, help='output directory. multiple json files may be produced') + rla2json_parser.set_defaults(func=main_rla2json) # parse args args = parser.parse_args() # set logging level diff --git a/sssekai/entrypoint/mitm.py b/sssekai/entrypoint/mitm.py deleted file mode 100644 index ceaf77f..0000000 --- a/sssekai/entrypoint/mitm.py +++ /dev/null @@ -1,7 +0,0 @@ -import os,sys -def main_mitm(args): - from mitmproxy.tools.main import mitmdump - current_dir = os.path.dirname(os.path.realpath(__file__)) - args = [*sys.argv[1:], '-s', '"%s"' % os.path.join(os.path.dirname(os.path.dirname(current_dir)),'mitmproxy_sekai_api.py')] - print('running mitmdump with args:', *args) - mitmdump(args=args) diff --git a/sssekai/entrypoint/rla2json.py b/sssekai/entrypoint/rla2json.py new file mode 100644 index 0000000..de5a1ab --- /dev/null +++ b/sssekai/entrypoint/rla2json.py @@ -0,0 +1,49 @@ +from os import path,makedirs +from json import loads,dump +from io import BytesIO +from logging import getLogger +from concurrent.futures import ProcessPoolExecutor +from time import sleep +from sssekai.unity.AssetBundle import load_assetbundle +from UnityPy.enums import ClassIDType +from sssekai.fmt.rla import read_rla +from tqdm import tqdm +logger = getLogger(__name__) + + +def worker_job(sname, version, script): + rla = read_rla(BytesIO(script), version) + dump(rla, open(sname + '.json', 'w'), indent=4, ensure_ascii=False) + +def main_rla2json(args): + with open(args.infile,'rb') as f: + env = load_assetbundle(f) + datas = dict() + for obj in env.objects: + if obj.type in {ClassIDType.TextAsset}: + data = obj.read() + datas[data.name] = data + header = datas.get('sekai.rlh', None) + assert header, "RLH Header file not found!" + makedirs(args.outdir, exist_ok=True) + header = loads(header.text) + dump(header, open(path.join(args.outdir, 'sekai.rlh.json'), 'w'), indent=4, ensure_ascii=False) + version = tuple(map(int, header['version'].split('.'))) + splitSeconds = header['splitSeconds'] + logger.info('Version: %d.%d' % version) + logger.info('Count: %d' % len(header['splitFileIds'])) + with ProcessPoolExecutor() as executor: + logger.info('Dumping RLA data with %d processors' % executor._max_workers) + futures = [] + for sid in header['splitFileIds']: + sname = 'sekai_%2d_%08d' % (splitSeconds, sid) + script = datas[sname + '.rla'].script.tobytes() + futures.append(executor.submit(worker_job, path.join(args.outdir,sname), version, script)) + finsihed_futures = set() + with tqdm(total=len(futures)) as pbar: + while len(finsihed_futures) < len(futures): + for i, future in enumerate(futures): + if future.done() and i not in finsihed_futures: + pbar.update(1) + finsihed_futures.add(i) + sleep(.1) \ No newline at end of file diff --git a/sssekai/entrypoint/usmdemux.py b/sssekai/entrypoint/usmdemux.py index 65440ba..0ab88ba 100644 --- a/sssekai/entrypoint/usmdemux.py +++ b/sssekai/entrypoint/usmdemux.py @@ -1,10 +1,10 @@ from sssekai.unity.AssetBundle import load_assetbundle - +from UnityPy.enums import ClassIDType +from wannacri.usm import Usm from os import path,remove,makedirs +from logging import getLogger +logger = getLogger(__name__) def main_usmdemux(args): - from UnityPy.enums import ClassIDType - from wannacri.usm import Usm - with open(args.infile,'rb') as f: env = load_assetbundle(f) datas = dict() @@ -16,7 +16,7 @@ def main_usmdemux(args): assert movieInfo, "Invalid AssetBundle. No MovieBundleBuildData found!" movieInfo = movieInfo.read_typetree() usm_name = movieInfo['movieBundleDatas'][0]['usmFileName'][:-len('.bytes')] - print('USM: %s' % usm_name) + logger.info('USM: %s' % usm_name) usm_folder = path.join(args.outdir,usm_name) makedirs(usm_folder,exist_ok=True) usm_temp = path.join(usm_folder,usm_name + '.tmp') @@ -28,4 +28,4 @@ def main_usmdemux(args): usm = Usm.open(usm_temp,encoding='shift-jis') usm.demux(path.join(args.outdir,usm_name),usm_name) remove(usm_temp) - print('Saved to %s/' % usm_folder) \ No newline at end of file + logger.info('Saved to %s/' % usm_folder) \ No newline at end of file diff --git a/sssekai/fmt/rla.py b/sssekai/fmt/rla.py new file mode 100644 index 0000000..630d913 --- /dev/null +++ b/sssekai/fmt/rla.py @@ -0,0 +1,191 @@ +from base64 import b64decode, b64encode +from struct import unpack as s_unpack +from io import BytesIO +from collections import defaultdict +import math, gzip +import msgpack + +def read_rla(src : BytesIO, version=(1,0)) -> dict: + '''Parses the Sekai RLA file format used in 'streaming_live/archive' assets. + + Args: + src (BytesIO): Source RLA file stream + version (tuple, optional): RLA version, found in respective RLH (JSON) header files. Defaults to (1,0). + + Returns: + dict: Parsed RLA data. The dictionary is sorted by the frame ticks. + ''' + read_int = lambda stream, nbytes, signed=False: int.from_bytes(stream.read(nbytes), 'little',signed=signed) + read_float = lambda stream: s_unpack(' + # Sekai_Streaming_StreamingData__Deserialize + assert get_next_byte() == decoder_type + compress_type = get_next_int() + sequence_no = get_next_int() + target_time = get_next_long() + # StreamingData$$ReadValue implementations + deg_to_rad = lambda value: tuple(deg * (math.pi / 180) for deg in value) + get_next_deg_as_rad = lambda: deg_to_rad(get_next_ushort_vector3()) # Eulers [0, 2pi] + get_next_pose_data = lambda: { + 'bodyPosition': get_next_vector3(), + 'bodyRotation': get_next_deg_as_rad(), # Eulers + **({ + 'musicItemPropPosition': get_next_vector3(), + 'musicItemPropRotation': get_next_deg_as_rad() + } if version >= (1,1) else {}), + 'boneDatas': get_next_array(get_next_deg_as_rad), # Eulers [0, 2pi] + 'shapeDatas': get_next_array(get_next_float), # [0,100] + **({ + 'propBoneDatas': get_next_array(get_next_deg_as_rad) # Eulers [0, 2pi] + } if version >= (1,1) else {}), + 'heightOffset': get_next_short() * 0.01, + 'isActive': get_next_mask(), + 'useActiveFx': get_next_mask(), + **({ + 'isEyeLookAt': get_next_mask() + } if version >= (1,4) else {}) + } + match decoder_type: + case 0: + # Sekai_Streaming_MotionData + timeStamps = get_next_array(get_next_long) if get_next_pred() == 2 else None + poses = get_next_array(get_next_pose_data) + return {'type': 'MotionData', 'timeStamps': timeStamps, 'poses': poses} + case 1: + # Sekai_Streaming_MotionCaptureData + read_character_capture_data = lambda: { + 'id': get_next_int(), + 'timestamp': get_next_long(), + 'pose': get_next_pose_data() + } + data = get_next_array(read_character_capture_data) + return {'type': 'MotionCaptureData', 'data': data} + case 2: + # Sekai_Streaming_SoundData + channels = get_next_int() + sample_rate = get_next_int() + data_length = get_next_int() + if compress_type != 1: + data = stream.read(data_length) # Raw sampling data + return {'type': 'SoundData', 'channels': channels, 'sampleRate': sample_rate, 'encoding': 'raw', 'data': b64encode(data).decode()} + else: + data = stream.read(data_length) # HCA w/o metadata + return {'type': 'SoundData', 'channels': channels, 'sampleRate': sample_rate, 'encoding': 'hca', 'data': b64encode(data).decode()} + case 3: + # Sekai_Streaming_StatusData + read_stage_status = lambda: { + 'liveState': get_next_byte(), + 'lightIntensity': get_next_float(), + 'gayaVolume': get_next_float(), + 'cheerVolume': get_next_float(), + 'characterSpotlightIndex': get_next_int(), + 'characterSpotlightIntensity': get_next_float(), + 'stageSetlistIndex': get_next_int(), + 'musicSetlistIndex': get_next_int(), + 'musicTargetTime': get_next_long(), + 'musicStartTime': get_next_float(), + 'seId': get_next_int(), + 'seStartTime': get_next_long(), + 'timeStamp': get_next_long(), + 'unk0': get_next_byte(), + 'unk1': get_next_byte(), + 'playTimelineId': get_next_int(), + ** ({ + 'screenFadeColorR': get_next_byte(), + 'screenFadeColorG': get_next_byte(), + 'screenFadeColorB': get_next_byte(), + 'screenFade': get_next_float(), + 'characterFormationRotate': get_next_int(), + 'stageCenterPosition': get_next_vector3(), + 'playerAvatarStartPosition': get_next_vector3() + } if version >= (1,4) else {}) + } + read_character_status = lambda: { + 'costumeIndex': get_next_int(), + ** ({'useFx': get_next_mask()} if version >= (1,2) else {}), + 'timeStamp': get_next_long(), + } + stage_status_list = get_next_array(read_stage_status) + stage_status_length = get_next_int() + charcter_status_list = [get_next_array(read_character_status) for _ in range(stage_status_length)] + return {'type': 'StatusData', 'stageStatus': stage_status_list, 'characterStatus': charcter_status_list} + case 4: + # Sekai_Streaming_VirtualLiveMessageData + message_id = get_next_int() + user_id = get_next_string() + data_length = get_next_int() + data = stream.read(data_length) + # CP_BinarySerializer__Deserialize_OtherRoomMessageData + # CP_BinarySerializer__Deserialize_OtherRoomActionData + # NOTE: The msgpack payload does not contain the message type + data = msgpack.unpackb(data) + return {'type': 'VirtualLiveMessageData','messageId': message_id, 'userId': user_id, 'data': data} + case 5: + # Sekai_Streaming_ComplementInfoData + info_type = get_next_int() + info_data = get_next_string() + return {'type': 'ComplementInfoData', 'infoType': info_type, 'infoData': info_data} + return {'type': 'Unknown', 'data': buffer} + result = defaultdict(dict) + def read_frames(): + ticks = read_int(src, 8) + if ticks: + buffer_length = read_int(src, 4) + buffer = src.read(buffer_length) + data = decode_buffer_base64(buffer) + decoder_type, data = decode_buffer_payload(data) + data = decode_streaming_data(decoder_type, data) + result[ticks].setdefault(data['type'], list()).append(data) + return True + return False + while read_frames(): pass + result = dict(sorted(result.items(), key=lambda x: x[0])) + return result + +if __name__ == '__main__': + result = read_rla(open(r"D:\project\TextAsset\sekai_30_00000000.rla.bytes",'rb'), (1,0)) + pass \ No newline at end of file