I'm trying to make a proper serializer for binary scores.db
and collections.db
for a game called osu!,
I first used a parser found here https://github.com/KirkSuD/osudb
Which allowed me to parse my db files into list that I can edit but I haven't seen any up to date serializer that I can use to serialize the data back into a new binary, so I tried making one myself.
I know that I can just replace struct.unpack(format,v1)
by just struct.pack(format,v1)
, but here
is the first issue:
I can't read a defined amount of bytes anymore since they just aren't bytes and I also can't do that for obvious reasons, which means I also can't use the opposite technique the dev of the parser used (to parse the data easily).
Right now, I have converted the binary to str
for vizualition
https://controlc.com/6cdcffa3 I need to obtain that from https://controlc.com/9ddc4af1
I need to get the exact same format, the issue being that, I started making this:
def serialize_types_exp(fobj, data_type):
if data_type == "Int": ## 4 bytes unsigned int
return struct.pack("<I", fobj)
def Serialize_score_exp(file_path):
with open(file_path, "r") as fobj:
with open("./osu!MapSync/output_func.txt", "r+") as otp:
otp.truncate(0)
for i in fobj:
fobj = ast.literal_eval(i)
print(type(fobj))
f=0
while f <= 1:
res = str(serialize_types_exp(fobj[f], "Int"))
if f == 0:
print(fobj[f],"->",res[0:-1])
otp.write(str(res)[0:-1])
else:
print(fobj[f],"->",res[2:-1])
otp.write(str(res)[2:-1])
f += 1
Serialize_score_exp('./osu!MapSync/output.txt')
Which I realized, would basically be the same thing as just returning the string I want aKa:
def Serialize_score_exp(filepath):
return "litteraly a static result copy pasted from the binary file"
Which isn't what I want, so I wonder what can I use to serialize it back to the format I want?
EDIT: here are the files if any of you want to try something
https://drive.google.com/file/d/1Qmam_u9mVfqxBZ_U5QvFnq1t_01yOQzT/view?usp=sharing
EDIT 02/01/2020: It seem like directly writing the binary to a file using wb and then reading it back solve most of the visualization issue, but before i continue, there are x0b
bytes (space included) in the original serialized data, and i don't know how to obtain these from my list, any idea?
here is my code right now
def serialize_type_exp(fobj, data_type):
if data_type == "Boolean": ## False if 0x00 else True
return struct.pack("<?", fobj)
elif data_type == "Byte": ## 1 byte int
return struct.pack("<s", fobj)
elif data_type == "Double": ## 8 bytes floating point
return struct.pack("<d", fobj)
elif data_type == "Int": ## 4 bytes unsigned int
return struct.pack("<I", fobj)
elif data_type == "Long": ## 8 bytes unsigned int
return struct.pack("<Q", fobj)
elif data_type == "Short": ## 2 bytes unsigned int
return struct.pack("<H", fobj)
elif data_type == "Single": ## 4 bytes floating point
return struct.pack("<f", fobj)
elif data_type == "String": ## 0x00 or 0x0b - ULE128(n) - UTF-8(length=n)
bb = fobj
if bb == None:
return None
return fobj.encode("utf-8")
else:
raise NotImplementedError('parse_type(fobj, data_type): Unknown data type: "%s".' % data_type)
def serialize_types_exp(fobj, types):
return [serialize_type_exp(fobj, i) for i in types]
score_data_types = ['Byte', 'Int', 'String', 'String', 'String', 'Short', 'Short', 'Short', 'Short', 'Short', 'Short', 'Int', 'Short', 'Boolean', 'Int', 'String', 'Long', 'Int', 'Long']
def serialize_scoredb_data(file_path):
with open(file_path, "r") as fobj:
with open("./osu!MapSync/output_func.db", "wb") as otp:
otp.truncate(0)
for i in fobj:
fobj = ast.literal_eval(i)
for i in range(2):
otp.write((serialize_type_exp(fobj[i],'Int')))
for maps in fobj[2]:
for i in range(2):
if type(maps[i]) == str:
otp.write(serialize_type_exp(maps[i],'String'))
if type(maps[i]) == int:
otp.write(serialize_type_exp(maps[i],'Int'))
for scores in maps[2]:
for idx, stats in enumerate(scores):
if score_data_types[idx] == 'Byte':
print(stats,"->",stats.to_bytes,":",type(bytes(stats)))
print(type(serialize_type_exp(bytes(stats), score_data_types[idx])))
otp.write(serialize_type_exp(bytes(stats), score_data_types[idx]))
else:
otp.write(serialize_type_exp(stats, score_data_types[idx]))
serialize_scoredb_data('./osu!MapSync/output.txt')
EDIT 3: fixed that x0b
and x0b
bytes, now i have to face one more problem before it work ingame:
some bytes seems to have changed from x00
to x01
in the process and i don't know why
import osudb
import ast
import struct
with open("./osu!MapSync/scores.db", "rb") as f:
with open("Exp.txt", "w") as i:
for stuff in f:
i.write(str(stuff))
parse = osudb.parse_score(r"./osu!MapSync/scores.db")
with open('./osu!MapSync/output.txt', 'w') as f:
f.write(str(parse)) #[1:-1].split(',')
def serialize_type_exp(fobj, data_type):
if data_type == "Boolean": ## False if 0x00 else True
return struct.pack("<?", fobj)
elif data_type == "Byte": ## 1 byte int
return struct.pack("<s", fobj)
elif data_type == "Double": ## 8 bytes floating point
return struct.pack("<d", fobj)
elif data_type == "Int": ## 4 bytes unsigned int
return struct.pack("<I", fobj)
elif data_type == "Long": ## 8 bytes unsigned int
return struct.pack("<Q", fobj)
elif data_type == "Short": ## 2 bytes unsigned int
return struct.pack("<H", fobj)
elif data_type == "Single": ## 4 bytes floating point
return struct.pack("<f", fobj)
elif data_type == "String": ## 0x00 or 0x0b - ULE128(n) - UTF-8(length=n)
bb = fobj
if bb == None:
return None
return fobj.encode("utf-8")
else:
raise NotImplementedError('parse_type(fobj, data_type): Unknown data type: "%s".' % data_type)
def serialize_types_exp(fobj, types):
return [serialize_type_exp(fobj, i) for i in types]
score_data_types = ['Byte', 'Int', 'String', 'String', 'String', 'Short', 'Short', 'Short', 'Short', 'Short', 'Short', 'Int', 'Short', 'Boolean', 'Int', 'String', 'Long', 'Int', 'Long']
def serialize_scoredb_data(file_path):
with open(file_path, "r") as fobj:
with open("./new osu!.db/scores.db", "wb") as otp:
otp.truncate(0)
for i in fobj:
fobj = ast.literal_eval(i)
for i in range(2):
otp.write((serialize_type_exp(fobj[i],'Int')))
for maps in fobj[2]:
for i in range(2):
if type(maps[i]) == str:
otp.write(b'x0b '+serialize_type_exp(maps[i],'String'))
if type(maps[i]) == int:
otp.write(serialize_type_exp(maps[i],'Int'))
for scores in maps[2]:
for idx, stats in enumerate(scores):
if score_data_types[idx] == 'Byte':
otp.write(serialize_type_exp(bytes(stats), score_data_types[idx]))
elif stats == None:
otp.write(b'x00')
elif score_data_types[idx] == 'String' and len(stats) == 32:
otp.write(b'x0b '+serialize_type_exp(stats, score_data_types[idx]))
elif idx == 3:
otp.write(b'x0bx06'+serialize_type_exp(stats, score_data_types[idx]))
else:
otp.write(serialize_type_exp(stats, score_data_types[idx]))
serialize_scoredb_data('./osu!MapSync/output.txt')
with open("./osu!MapSync/output_func.db", "rb") as f:
with open("output_func.txt", "w") as i:
for stuff in f:
i.write(str(stuff))
this is what i want https://controlc.com/6cdcffa3 and this is what i obtained https://controlc.com/4e547b64
already, fixed the parser i used, now i discovered that the formatting on the username is incorrect and is variable from a player to another, i apparently need to add this to my serializer https://en.wikipedia.org/wiki/LEB128#Encode_unsigned_integer
except i i have no idea about what they mean about set high order bit of byte;
and how to do it in python