From 2d0324946a8b7495cce77956375a824fa5c3c35b Mon Sep 17 00:00:00 2001
From: greats3an <greats3an@gmail.com>
Date: Sat, 24 Aug 2024 14:45:00 +0800
Subject: [PATCH] Version 0.2.7 Implement `rla2json` command to convert RLA
 files to JSON format * Please refer to the updated Wiki page for more
 information Removed `mitm` script

---
 .vscode/launch.json            |  13 +--
 sssekai/__init__.py            |   2 +-
 sssekai/__main__.py            |  10 +-
 sssekai/entrypoint/mitm.py     |   7 --
 sssekai/entrypoint/rla2json.py |  49 +++++++++
 sssekai/entrypoint/usmdemux.py |  12 +--
 sssekai/fmt/rla.py             | 191 +++++++++++++++++++++++++++++++++
 7 files changed, 258 insertions(+), 26 deletions(-)
 delete mode 100644 sssekai/entrypoint/mitm.py
 create mode 100644 sssekai/entrypoint/rla2json.py
 create mode 100644 sssekai/fmt/rla.py

diff --git a/.vscode/launch.json b/.vscode/launch.json
index d53d5e9..79cdf0c 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -139,19 +139,16 @@
             "justMyCode": false
         },
         {
-            "name": "MITM: Launch server",
+            "name": "Python: rla2json",
             "type": "python",
             "request": "launch",
             "module": "sssekai",
-            "cwd": "${workspaceFolder}",
             "args": [
-                "-p",
-                "8888",
-                "--mode",
-                "upstream:http://127.0.0.1:8080/",
-                "-q"
+                "rla2json",
+                "H:\\Sekai\\streaming_live\\archive\\1st_live_vbs-1",
+                "H:\\Sekai\\streaming_live\\archive\\1st_live_vbs-1_src"
             ],
-            "justMyCode": false
+            "justMyCode": true
         },
     ]
 }
\ No newline at end of file
diff --git a/sssekai/__init__.py b/sssekai/__init__.py
index 6bc5ad5..2ade290 100644
--- a/sssekai/__init__.py
+++ b/sssekai/__init__.py
@@ -1,5 +1,5 @@
 __VERSION_MAJOR__ = 0
 __VERSION_MINOR__ = 2
-__VERSION_PATCH__ = 6
+__VERSION_PATCH__ = 7
 
 __version__ = '%s.%s.%s' % (__VERSION_MAJOR__,__VERSION_MINOR__,__VERSION_PATCH__)
diff --git a/sssekai/__main__.py b/sssekai/__main__.py
index bc47aee..b7b407f 100644
--- a/sssekai/__main__.py
+++ b/sssekai/__main__.py
@@ -3,7 +3,7 @@
 import argparse
 from sssekai.entrypoint.apidecrypt import main_apidecrypt
 from sssekai.entrypoint.abdecrypt import main_abdecrypt
-from sssekai.entrypoint.mitm import main_mitm
+from sssekai.entrypoint.rla2json import main_rla2json
 from sssekai.entrypoint.usmdemux import main_usmdemux
 from sssekai.entrypoint.abcache import main_abcache, DEFAULT_CACHE_DB_FILE
 from sssekai.entrypoint.live2dextract import main_live2dextract
@@ -75,9 +75,11 @@ def write(__s):
     spineextract_parser.add_argument('infile', type=str, help='input file')
     spineextract_parser.add_argument('outdir', type=str, help='output directory')    
     spineextract_parser.set_defaults(func=main_spineextract)
-    # mitm
-    mitm_parser = subparsers.add_parser('mitm', help='Run Sekai API MITM proxy (WIP)')
-    mitm_parser.set_defaults(func=main_mitm)
+    # rla2json
+    rla2json_parser = subparsers.add_parser('rla2json', help='''Read streaming_live/archive files and dump their information to JSON''')
+    rla2json_parser.add_argument('infile', type=str, help='input file')
+    rla2json_parser.add_argument('outdir', type=str, help='output directory. multiple json files may be produced')
+    rla2json_parser.set_defaults(func=main_rla2json)
     # parse args
     args = parser.parse_args()
     # set logging level
diff --git a/sssekai/entrypoint/mitm.py b/sssekai/entrypoint/mitm.py
deleted file mode 100644
index ceaf77f..0000000
--- a/sssekai/entrypoint/mitm.py
+++ /dev/null
@@ -1,7 +0,0 @@
-import os,sys
-def main_mitm(args):
-    from mitmproxy.tools.main import mitmdump
-    current_dir = os.path.dirname(os.path.realpath(__file__))    
-    args = [*sys.argv[1:], '-s', '"%s"' % os.path.join(os.path.dirname(os.path.dirname(current_dir)),'mitmproxy_sekai_api.py')]
-    print('running mitmdump with args:', *args)
-    mitmdump(args=args)
diff --git a/sssekai/entrypoint/rla2json.py b/sssekai/entrypoint/rla2json.py
new file mode 100644
index 0000000..de5a1ab
--- /dev/null
+++ b/sssekai/entrypoint/rla2json.py
@@ -0,0 +1,49 @@
+from os import path,makedirs
+from json import loads,dump
+from io import BytesIO
+from logging import getLogger
+from concurrent.futures import ProcessPoolExecutor
+from time import sleep
+from sssekai.unity.AssetBundle import load_assetbundle
+from UnityPy.enums import ClassIDType
+from sssekai.fmt.rla import read_rla
+from tqdm import tqdm
+logger = getLogger(__name__)
+
+
+def worker_job(sname, version, script):
+    rla = read_rla(BytesIO(script), version)
+    dump(rla, open(sname + '.json', 'w'), indent=4, ensure_ascii=False)
+
+def main_rla2json(args):
+    with open(args.infile,'rb') as f:
+        env = load_assetbundle(f)
+        datas = dict()
+        for obj in env.objects:
+            if obj.type in {ClassIDType.TextAsset}:
+                data = obj.read()
+                datas[data.name] = data 
+        header = datas.get('sekai.rlh', None)
+        assert header, "RLH Header file not found!"
+        makedirs(args.outdir, exist_ok=True)
+        header = loads(header.text)
+        dump(header, open(path.join(args.outdir, 'sekai.rlh.json'), 'w'), indent=4, ensure_ascii=False)
+        version = tuple(map(int, header['version'].split('.')))
+        splitSeconds = header['splitSeconds']
+        logger.info('Version: %d.%d' % version)
+        logger.info('Count: %d' % len(header['splitFileIds']))        
+        with ProcessPoolExecutor() as executor:
+            logger.info('Dumping RLA data with %d processors' % executor._max_workers)
+            futures = []
+            for sid in header['splitFileIds']:
+                sname = 'sekai_%2d_%08d' % (splitSeconds, sid)
+                script = datas[sname + '.rla'].script.tobytes()                
+                futures.append(executor.submit(worker_job, path.join(args.outdir,sname), version, script))
+            finsihed_futures = set()
+            with tqdm(total=len(futures)) as pbar:
+                while len(finsihed_futures) < len(futures):
+                    for i, future in enumerate(futures):
+                        if future.done() and i not in finsihed_futures:
+                            pbar.update(1)
+                            finsihed_futures.add(i)
+                    sleep(.1)
\ No newline at end of file
diff --git a/sssekai/entrypoint/usmdemux.py b/sssekai/entrypoint/usmdemux.py
index 65440ba..0ab88ba 100644
--- a/sssekai/entrypoint/usmdemux.py
+++ b/sssekai/entrypoint/usmdemux.py
@@ -1,10 +1,10 @@
 from sssekai.unity.AssetBundle import load_assetbundle
-
+from UnityPy.enums import ClassIDType
+from wannacri.usm import Usm
 from os import path,remove,makedirs
+from logging import getLogger
+logger = getLogger(__name__)
 def main_usmdemux(args):
-    from UnityPy.enums import ClassIDType
-    from wannacri.usm import Usm
-
     with open(args.infile,'rb') as f:
         env = load_assetbundle(f)    
         datas = dict()
@@ -16,7 +16,7 @@ def main_usmdemux(args):
         assert movieInfo, "Invalid AssetBundle. No MovieBundleBuildData found!"
         movieInfo = movieInfo.read_typetree()
         usm_name = movieInfo['movieBundleDatas'][0]['usmFileName'][:-len('.bytes')]
-        print('USM: %s' % usm_name)
+        logger.info('USM: %s' % usm_name)
         usm_folder = path.join(args.outdir,usm_name)
         makedirs(usm_folder,exist_ok=True)
         usm_temp = path.join(usm_folder,usm_name + '.tmp')
@@ -28,4 +28,4 @@ def main_usmdemux(args):
         usm = Usm.open(usm_temp,encoding='shift-jis')
         usm.demux(path.join(args.outdir,usm_name),usm_name)
         remove(usm_temp)
-        print('Saved to %s/' % usm_folder)
\ No newline at end of file
+        logger.info('Saved to %s/' % usm_folder)
\ No newline at end of file
diff --git a/sssekai/fmt/rla.py b/sssekai/fmt/rla.py
new file mode 100644
index 0000000..630d913
--- /dev/null
+++ b/sssekai/fmt/rla.py
@@ -0,0 +1,191 @@
+from base64 import b64decode, b64encode
+from struct import unpack as s_unpack
+from io import BytesIO
+from collections import defaultdict
+import math, gzip
+import msgpack
+
+def read_rla(src : BytesIO, version=(1,0)) -> dict:
+    '''Parses the Sekai RLA file format used in 'streaming_live/archive' assets.
+
+    Args:
+        src (BytesIO): Source RLA file stream
+        version (tuple, optional): RLA version, found in respective RLH (JSON) header files. Defaults to (1,0).
+
+    Returns:
+        dict: Parsed RLA data. The dictionary is sorted by the frame ticks.
+    '''    
+    read_int = lambda stream, nbytes, signed=False: int.from_bytes(stream.read(nbytes), 'little',signed=signed)
+    read_float = lambda stream: s_unpack('<f', stream.read(4))[0]
+    # Sekai_Streaming_StreamingCommon__CheckHeader
+    def decode_buffer_base64(buffer):
+        is_base64_encoded = buffer[6 + 4 + 4]
+        is_split = buffer[6 + 4 + 1 + 4]
+        data = buffer[6 + 4 + 1 + 4:]
+        if is_base64_encoded:
+            data = b64decode(data)
+        return data
+    # Sekai_Streaming_SubscribeDecoder__Deserialize
+    def decode_buffer_payload(buffer):        
+        stream = BytesIO(buffer)
+        decoder_type = read_int(stream, 1)
+        unk1 = read_int(stream, 4)
+        if not unk1:
+            unk2 = read_int(stream, 4)
+            if unk2:
+                payload = stream.read()
+                payload = gzip.decompress(payload)
+                return decoder_type, payload        
+        return decoder_type, stream.read()
+    # Sekai_Streaming_StreamingData__Deserialize
+    def decode_streaming_data(decoder_type, buffer):
+        stream = BytesIO(buffer)
+        n_mask_offset = read_int(stream, 4)
+        n_init_pos = stream.tell()
+        stream.seek(n_mask_offset)
+        n_mask_length = read_int(stream, 2)
+        bitmask = stream.read(n_mask_length)
+        assert not stream.read() # EOF
+        stream.seek(n_init_pos)
+        # CP_Serialize_SerializableValueSet
+        bitmask = [bitmask[i // 8] & (1 << (i % 8)) != 0 for i in range(len(bitmask) * 8)]
+        get_next_mask = lambda: bitmask.pop(0) # GetNextMask, ReadBool
+        get_next_pred = lambda: get_next_mask() | (get_next_mask() << 1)
+        get_next_byte = lambda: [lambda: 0, lambda: 1, lambda: -1, lambda: read_int(stream, 1)][get_next_pred()]() # ReadByte
+        get_next_ushort = lambda: [lambda: 0.0, lambda: 0.0, lambda: read_int(stream, 1), lambda: read_int(stream, 2)][get_next_pred()]() # ReadUShort
+        get_next_short =  lambda: [lambda: 0.0, lambda: 0.0, lambda: read_int(stream, 1, True), lambda: read_int(stream, 2)][get_next_pred()]() # ReadShort
+        get_next_int = lambda: [lambda: 0, lambda: read_int(stream,1), lambda: read_int(stream,1,True), lambda: read_int(stream, 4)][get_next_pred()]() # ReadInt
+        get_next_long = lambda: [lambda: 0, lambda: read_int(stream,1), lambda: read_int(stream,2), lambda: read_int(stream, 8)][get_next_pred()]() # ReadLong
+        get_next_float = lambda: [lambda: 0.0, lambda: 0.0, lambda: 3.4028e38, lambda: read_float(stream)][get_next_pred()]() # ReadSingle
+        get_next_vector3 = lambda: (get_next_float(), get_next_float(), get_next_float()) # ReadVector3
+        get_next_ushort_vector3 = lambda: (get_next_ushort() * 0.01, get_next_ushort() * 0.01, get_next_ushort() * 0.01) # ReadUShortVector3
+        get_next_tiny_int = lambda type: {'+': lambda: read_int(stream, 2), '*': lambda: read_int(stream, 1, True), ')': lambda: read_int(stream, 1)}[type]() # ReadTinyInt
+        get_next_string = lambda: stream.read(get_next_tiny_int(chr(read_int(stream, 1)))).decode() if get_next_pred() else None # ReadString
+        get_next_array = lambda reader: [reader() for _ in range(get_next_int())]  # ReadArray<T>
+        # Sekai_Streaming_StreamingData__Deserialize
+        assert get_next_byte() == decoder_type
+        compress_type = get_next_int()
+        sequence_no = get_next_int()
+        target_time = get_next_long()
+        # StreamingData$$ReadValue implementations
+        deg_to_rad = lambda value: tuple(deg * (math.pi / 180) for deg in value)
+        get_next_deg_as_rad = lambda: deg_to_rad(get_next_ushort_vector3()) # Eulers [0, 2pi]
+        get_next_pose_data = lambda: {
+            'bodyPosition': get_next_vector3(),
+            'bodyRotation': get_next_deg_as_rad(), # Eulers
+            **({
+                'musicItemPropPosition': get_next_vector3(),
+                'musicItemPropRotation': get_next_deg_as_rad()
+            } if version >= (1,1) else {}),                    
+            'boneDatas': get_next_array(get_next_deg_as_rad), # Eulers [0, 2pi]
+            'shapeDatas': get_next_array(get_next_float), # [0,100]
+            **({
+                'propBoneDatas': get_next_array(get_next_deg_as_rad) # Eulers [0, 2pi]
+            } if version >= (1,1) else {}),
+            'heightOffset': get_next_short() * 0.01,
+            'isActive': get_next_mask(),
+            'useActiveFx': get_next_mask(),
+            **({
+                'isEyeLookAt': get_next_mask()
+            } if version >= (1,4) else {})
+        }
+        match decoder_type:
+            case 0:
+                # Sekai_Streaming_MotionData
+                timeStamps = get_next_array(get_next_long) if get_next_pred() == 2 else None
+                poses = get_next_array(get_next_pose_data)
+                return {'type': 'MotionData', 'timeStamps': timeStamps, 'poses': poses}
+            case 1:
+                # Sekai_Streaming_MotionCaptureData
+                read_character_capture_data = lambda: {
+                    'id': get_next_int(),
+                    'timestamp': get_next_long(),
+                    'pose': get_next_pose_data()
+                }
+                data = get_next_array(read_character_capture_data)
+                return {'type': 'MotionCaptureData', 'data': data}
+            case 2:
+                # Sekai_Streaming_SoundData
+                channels = get_next_int()
+                sample_rate = get_next_int()
+                data_length = get_next_int()
+                if compress_type != 1:
+                    data = stream.read(data_length) # Raw sampling data
+                    return {'type': 'SoundData', 'channels': channels, 'sampleRate': sample_rate, 'encoding': 'raw', 'data': b64encode(data).decode()}
+                else:
+                    data = stream.read(data_length) # HCA w/o metadata
+                    return {'type': 'SoundData', 'channels': channels, 'sampleRate': sample_rate, 'encoding': 'hca', 'data': b64encode(data).decode()}              
+            case 3:
+                # Sekai_Streaming_StatusData
+                read_stage_status = lambda: {
+                    'liveState': get_next_byte(),
+                    'lightIntensity': get_next_float(),
+                    'gayaVolume': get_next_float(),
+                    'cheerVolume': get_next_float(),
+                    'characterSpotlightIndex': get_next_int(),
+                    'characterSpotlightIntensity': get_next_float(),
+                    'stageSetlistIndex': get_next_int(),
+                    'musicSetlistIndex': get_next_int(),
+                    'musicTargetTime': get_next_long(),
+                    'musicStartTime': get_next_float(),
+                    'seId': get_next_int(),
+                    'seStartTime': get_next_long(),
+                    'timeStamp': get_next_long(),
+                    'unk0': get_next_byte(),
+                    'unk1': get_next_byte(),
+                    'playTimelineId': get_next_int(),                    
+                    ** ({
+                        'screenFadeColorR': get_next_byte(),
+                        'screenFadeColorG': get_next_byte(),
+                        'screenFadeColorB': get_next_byte(),
+                        'screenFade': get_next_float(),
+                        'characterFormationRotate': get_next_int(),
+                        'stageCenterPosition': get_next_vector3(),
+                        'playerAvatarStartPosition': get_next_vector3()
+                    } if version >= (1,4) else {})
+                }
+                read_character_status = lambda: {
+                    'costumeIndex': get_next_int(),
+                    ** ({'useFx': get_next_mask()} if version >= (1,2) else {}),
+                    'timeStamp': get_next_long(),
+                }
+                stage_status_list = get_next_array(read_stage_status)
+                stage_status_length = get_next_int()                
+                charcter_status_list = [get_next_array(read_character_status) for _ in range(stage_status_length)]
+                return {'type': 'StatusData', 'stageStatus': stage_status_list, 'characterStatus': charcter_status_list}
+            case 4:
+                # Sekai_Streaming_VirtualLiveMessageData
+                message_id = get_next_int()
+                user_id = get_next_string()
+                data_length = get_next_int()
+                data = stream.read(data_length)
+                # CP_BinarySerializer__Deserialize_OtherRoomMessageData
+                # CP_BinarySerializer__Deserialize_OtherRoomActionData
+                # NOTE: The msgpack payload does not contain the message type
+                data = msgpack.unpackb(data)
+                return {'type': 'VirtualLiveMessageData','messageId': message_id, 'userId': user_id, 'data': data}
+            case 5:
+                # Sekai_Streaming_ComplementInfoData
+                info_type = get_next_int()
+                info_data = get_next_string()
+                return {'type': 'ComplementInfoData', 'infoType': info_type, 'infoData': info_data}            
+        return {'type': 'Unknown', 'data': buffer}
+    result = defaultdict(dict)
+    def read_frames():
+        ticks = read_int(src, 8)
+        if ticks:
+            buffer_length = read_int(src, 4)
+            buffer = src.read(buffer_length)
+            data = decode_buffer_base64(buffer)
+            decoder_type, data = decode_buffer_payload(data)
+            data = decode_streaming_data(decoder_type, data)
+            result[ticks].setdefault(data['type'], list()).append(data)
+            return True
+        return False
+    while read_frames(): pass        
+    result = dict(sorted(result.items(), key=lambda x: x[0]))
+    return result
+
+if __name__ == '__main__':
+    result = read_rla(open(r"D:\project\TextAsset\sekai_30_00000000.rla.bytes",'rb'), (1,0))
+    pass
\ No newline at end of file