import re

def remove_comments(code):
    # 单行注释的正则表达式
    single_line_comment = r'//.*$'
    # 多行注释的正则表达式
    # 这里我们使用非贪婪匹配来确保在第一个'*/'处停止
    multi_line_comment =r'/\*.*?\*/'


    # 使用正则表达式替换多行注释
    code = re.sub(multi_line_comment, '', code, flags=re.DOTALL)
    # 使用正则表达式替换单行注释
    code = re.sub(single_line_comment, '', code, flags=re.MULTILINE) 

    return code

def load_and_remove_comments(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        code = file.read()

    # 移除注释
    cleaned_code = remove_comments(code) 
    return cleaned_code
 
dict_sfr = {}
list_sfr = []
def convert_sfr_to_data_format(file_path):
    with open(file_path, 'r') as file:
        code = file.read()

    # 移除注释
    cleaned_code = remove_comments(code)  
    # 按行分割内容
    lines = cleaned_code.splitlines() 
    converted_lines = []
    for line in lines:
        # 去除行尾的换行符并分割行
        parts = line.strip().split()
        # 检查是否符合sfr定义的模式
        
        if len(parts) >= 3 and parts[2] == '=':
            if parts[0] == "sfr":
                # 提取寄存器名和地址
                sfr_name = parts[1]
                address = parts[3].strip(';')
                # 转换地址为小写并添加前导0
                address = address.lower()
                address = address.replace('0x', '') 
                address = '0' + address.upper()
                #添加到sfr字典
                dict_sfr[sfr_name] = address
                # 转换为指定格式
                converted_line = f"{sfr_name} DATA {address}"
                converted_lines.append(converted_line)
            if parts[0] == "sbit":
                # 提取位名和地址
                sbit_name = parts[1]
                sfr_name = parts[3]
 
                if (sfr_name not in list_sfr):
                    list_sfr.append(sfr_name) 
                    converted_lines.append(";"+sfr_name)  

                sfr_add = "0x"+dict_sfr[sfr_name]
                offset   = parts[5].replace(';','')
                int_sfr_add = int(sfr_add, 16)
                int_offset  = int(offset, 16)
                int_address = int_sfr_add+int_offset  
                address = "0"+hex(int_address)[2:].upper()
                # 转换为指定格式
                converted_line = f"{sbit_name} BIT {address}"
                converted_lines.append(converted_line)
        else:
            # 如果不是sfr定义行，保持原样
            converted_lines.append(line.strip()) 

    return converted_lines 

if __name__ == '__main__': 

    # 假设当前目录中存在c8051F020.h文件
    header_file_path = 'c8051F020.h'
    cleaned_header = convert_sfr_to_data_format(header_file_path)

    # 打印清理后的代码，或者你可以将其写入新文件
    print(cleaned_header) 
    file_contain = '\n'.join(cleaned_header)
    with open('c8051F020.inc', 'w', encoding='utf-8') as file:
        file.write(file_contain)