from customer_dialog_clearn import get_dialog_to_load, load_dialog_to_merge
from customer_day_analysis import customer_day_analysis
from sqlalchemy import create_engine, text
import pandas as pd
from datetime import datetime, timedelta
import json, multiprocessing, math, os, subprocess
from online_streaming import online_llm_streaming


def get_db_customer_id_df(start_dt, end_dt):
    host = 'rdsonlyread.rwlb.rds.aliyuncs.com'
    user = 'tmp_presale'
    pwd = 'czWYsd2fjx8HaKSCrXh'
    db = 'presalemicros'
    port = '3306'
    engine = create_engine(f'mysql+pymysql://{user}:{pwd}@{host}:{port}/{db}')
    sql_string = f"""
    with temp as (
        select 客户id,
        TRIM(BOTH ',' from GROUP_CONCAT(工作状态)) 工作状态,
        TRIM(BOTH ',' from GROUP_CONCAT(是否有居住地)) 是否有居住地,
        TRIM(BOTH ',' from GROUP_CONCAT(流水怎么做)) 流水怎么做,
        TRIM(BOTH ',' from GROUP_CONCAT(工位)) 工位,
        TRIM(BOTH ',' from GROUP_CONCAT(销售卡点)) 销售卡点,
        TRIM(BOTH ',' from GROUP_CONCAT(客户需求判断)) 客户需求判断,
        TRIM(BOTH ',' from GROUP_CONCAT(意向产品)) 意向产品
        from (

        (select 
                ct.customer_id 客户id,
                ifnull(GROUP_CONCAT(tgzzt.name),'') 工作状态,
                ifnull(GROUP_CONCAT(tsfyjzd.name),'') 是否有居住地,
                ifnull(GROUP_CONCAT(tlszmz.name),'') 流水怎么做,
                ifnull(GROUP_CONCAT(tgw.name),'') 工位,
                ifnull(GROUP_CONCAT(txskd.name),'') 销售卡点,
                        '' 客户需求判断,
            '' 意向产品

        from micros_customer_tag ct 
        left join micros_tag tgzzt on ct.tag_id=tgzzt.id and ct.deleted_at is null and tgzzt.pid=3 and tgzzt.deleted_at is null
        left join micros_tag tsfyjzd on ct.tag_id=tsfyjzd.id and ct.deleted_at is null and tsfyjzd.pid=4 and tsfyjzd.deleted_at is null
        left join micros_tag tlszmz on ct.tag_id=tlszmz.id and ct.deleted_at is null and tlszmz.pid=5 and tlszmz.deleted_at is null
        left join micros_tag tgw on ct.tag_id=tgw.id and ct.deleted_at is null and tgw.pid=6 and tgw.deleted_at is null
        left join micros_tag txskd on ct.tag_id=txskd.id and ct.deleted_at is null and txskd.pid=23 and  txskd.deleted_at is null
        where ct.updated_at between '{start_dt}' and '{end_dt}'  and ct.deleted_at is null and (tgzzt.id is not null or tsfyjzd.id is not null or tlszmz.id is not null or tgw.id is not null or txskd.id is not null)
        group by ct.customer_id 
        order by ct.customer_id)

        union all

        (
        select 
        customer_id 客户id,
        '' 工作状态,
                '' 是否有居住地,
                '' 流水怎么做,
                '' 工位,
                '' 销售卡点,
                '' 客户需求判断,
        GROUP_CONCAT(pp)  意向产品
        from (
        select distinct customer_id,CONCAT(product_name,'(',price,')') pp from micros_customer_attention_product where updated_at between '{start_dt}' and '{end_dt}' and  deleted_at is null order by customer_id) as tmp group by customer_id)

        union all

        (
        select 
            customer_id 客户id, 
            '' 工作状态,
            '' 是否有居住地,
            '' 流水怎么做,
            '' 工位,
            '' 销售卡点,
            ifnull(GROUP_CONCAT(aa), '') 客户需求判断,
            '' 意向产品

         from (
            SELECT 
                tttt.customer_id, 
                -- CONCAT(tttt.name, if(tttt.son is not null,CONCAT("(",tttt.son,")"),"")) aa
                CONCAT(tttt.name, if(tttt.son is not null,CONCAT("-",tttt.son),"")) aa 
            from (
                SELECT
                    ct.tag_id,
                    ct.customer_id,
                    tt3.name, 
                    (
                        SELECT 
                            GROUP_CONCAT(t2.name)  
                        from micros_customer_tag ct2  
                        left join micros_tag t2 
                        on t2.id = ct2.tag_id 
                        where 
                            ct2.tag_id in (SELECT id from micros_tag t where t.pid = ct.tag_id and t.`level` = 3 and path like '2,24,%') and 
                            ct2.customer_id = ct.customer_id  and 
                            ct2.deleted_at is null
                    ) as son
                FROM micros_customer_tag ct
                left join micros_tag tt3 
                on tt3.id = ct.tag_id
                where 
                    EXISTS(SELECT 1 from micros_tag tt where tt.id = ct.tag_id and tt.level = 2 and tt.path='2,24') and 
                    ct.deleted_at is null and 
                    ct.updated_at between '{start_dt}' and '{end_dt}'

            ) as tttt
        ) ttttt 
        group by customer_id)
        ) tmp3 group by 客户id
    )
    select t.客户id,
           t.工作状态,
           t.是否有居住地,
           t.流水怎么做,
           t.工位,
           t.意向产品,
           t.销售卡点 as 人工_销售卡点,
           t.客户需求判断 as 人工_客户需求判断,
           t1.标签更新时间
     from temp t
     left join (select customer_id as 客户id,max(updated_at) as 标签更新时间 from micros_customer_tag where updated_at between '{start_dt}' and '{end_dt}' and deleted_at is null group by customer_id) t1
                  on t.客户id = t1.客户id
     where t1.客户id is not null
       and (length(trim(t.客户需求判断)) > 0)
       and trim(t.客户需求判断) not in ('其他')
       order by t1.标签更新时间 desc
    """
    with engine.connect() as connection:
        sql_result = connection.execute(text(sql_string))
        customer_id_df = pd.DataFrame(sql_result.fetchall(), columns=sql_result.keys())
    return customer_id_df


def get_db_data_df(start_dt, end_dt):
    host = 'rdsonlyread.rwlb.rds.aliyuncs.com'
    user = 'tmp_presale'
    pwd = 'czWYsd2fjx8HaKSCrXh'
    db = 'presalemicros'
    port = '3306'
    engine = create_engine(f'mysql+pymysql://{user}:{pwd}@{host}:{port}/{db}')
    sql_string = f"""
    with temp as (
        select 客户id,
        TRIM(BOTH ',' from GROUP_CONCAT(工作状态)) 工作状态,
        TRIM(BOTH ',' from GROUP_CONCAT(是否有居住地)) 是否有居住地,
        TRIM(BOTH ',' from GROUP_CONCAT(流水怎么做)) 流水怎么做,
        TRIM(BOTH ',' from GROUP_CONCAT(工位)) 工位,
        TRIM(BOTH ',' from GROUP_CONCAT(销售卡点)) 销售卡点,
        TRIM(BOTH ',' from GROUP_CONCAT(客户需求判断)) 客户需求判断,
        TRIM(BOTH ',' from GROUP_CONCAT(意向产品)) 意向产品
        from (
    
        (select 
                ct.customer_id 客户id,
                ifnull(GROUP_CONCAT(tgzzt.name),'') 工作状态,
                ifnull(GROUP_CONCAT(tsfyjzd.name),'') 是否有居住地,
                ifnull(GROUP_CONCAT(tlszmz.name),'') 流水怎么做,
                ifnull(GROUP_CONCAT(tgw.name),'') 工位,
                ifnull(GROUP_CONCAT(txskd.name),'') 销售卡点,
                        '' 客户需求判断,
            '' 意向产品
    
        from micros_customer_tag ct 
        left join micros_tag tgzzt on ct.tag_id=tgzzt.id and ct.deleted_at is null and tgzzt.pid=3 and tgzzt.deleted_at is null
        left join micros_tag tsfyjzd on ct.tag_id=tsfyjzd.id and ct.deleted_at is null and tsfyjzd.pid=4 and tsfyjzd.deleted_at is null
        left join micros_tag tlszmz on ct.tag_id=tlszmz.id and ct.deleted_at is null and tlszmz.pid=5 and tlszmz.deleted_at is null
        left join micros_tag tgw on ct.tag_id=tgw.id and ct.deleted_at is null and tgw.pid=6 and tgw.deleted_at is null
        left join micros_tag txskd on ct.tag_id=txskd.id and ct.deleted_at is null and txskd.pid=23 and  txskd.deleted_at is null
        where ct.updated_at between '{start_dt}' and '{end_dt}'  and ct.deleted_at is null and (tgzzt.id is not null or tsfyjzd.id is not null or tlszmz.id is not null or tgw.id is not null or txskd.id is not null)
        group by ct.customer_id 
        order by ct.customer_id)
    
        union all
    
        (
        select 
        customer_id 客户id,
        '' 工作状态,
                '' 是否有居住地,
                '' 流水怎么做,
                '' 工位,
                '' 销售卡点,
                '' 客户需求判断,
        GROUP_CONCAT(pp)  意向产品
        from (
        select distinct customer_id,CONCAT(product_name,'(',price,')') pp from micros_customer_attention_product where updated_at between '{start_dt}' and '{end_dt}' and  deleted_at is null order by customer_id) as tmp group by customer_id)
    
        union all
    
        (
        select 
            customer_id 客户id, 
            '' 工作状态,
            '' 是否有居住地,
            '' 流水怎么做,
            '' 工位,
            '' 销售卡点,
            ifnull(GROUP_CONCAT(aa), '') 客户需求判断,
            '' 意向产品
    
         from (
            SELECT 
                tttt.customer_id, 
                -- CONCAT(tttt.name, if(tttt.son is not null,CONCAT("(",tttt.son,")"),"")) aa
                CONCAT(tttt.name, if(tttt.son is not null,CONCAT("-",tttt.son),"")) aa 
            from (
                SELECT
                    ct.tag_id,
                    ct.customer_id,
                    tt3.name, 
                    (
                        SELECT 
                            GROUP_CONCAT(t2.name)  
                        from micros_customer_tag ct2  
                        left join micros_tag t2 
                        on t2.id = ct2.tag_id 
                        where 
                            ct2.tag_id in (SELECT id from micros_tag t where t.pid = ct.tag_id and t.`level` = 3 and path like '2,24,%') and 
                            ct2.customer_id = ct.customer_id  and 
                            ct2.deleted_at is null
                    ) as son
                FROM micros_customer_tag ct
                left join micros_tag tt3 
                on tt3.id = ct.tag_id
                where 
                    EXISTS(SELECT 1 from micros_tag tt where tt.id = ct.tag_id and tt.level = 2 and tt.path='2,24') and 
                    ct.deleted_at is null and 
                    ct.updated_at between '{start_dt}' and '{end_dt}'
    
            ) as tttt
        ) ttttt 
        group by customer_id)
        ) tmp3 group by 客户id
    )
    select t.客户id,
           t.工作状态,
           t.是否有居住地,
           t.流水怎么做,
           t.工位,
           t.意向产品,
           t.销售卡点 as 人工_销售卡点,
           t.客户需求判断 as 人工_客户需求判断,
           t1.标签更新时间
     from temp t
     left join (select customer_id as 客户id,max(updated_at) as 标签更新时间 from micros_customer_tag where updated_at between '{start_dt}' and '{end_dt}' and deleted_at is null group by customer_id) t1
                  on t.客户id = t1.客户id
     where t1.客户id is not null
       and (length(trim(t.客户需求判断)) > 0)
       and trim(t.客户需求判断) not in ('其他')
       order by t1.标签更新时间 desc
    """
    with engine.connect() as connection:
        sql_result = connection.execute(text(sql_string))
        sql_result_df = pd.DataFrame(sql_result.fetchall(), columns=sql_result.keys())
    return sql_result_df


def ai_man_compare_forward(row):
    prompt = f"""
    ```
    内容1：{row.iloc[0]}
    内容2：{row.iloc[1]}
    ```
    根据上面```中的内容1与内容2，打分规则如下：
    如果内容1或内容2的宽松的意思一致性较高：得分80分到100分范围
    如果内容1或内容2的宽松的意思一致性偏高：得分70分到80分范围
    如果内容1或内容2的宽松的意思一致性偏低：得分60分到70分范围
    如果内容1或内容2的宽松的意思一致性很低：得分50分到60分范围
    如果内容1或内容2的宽松的意思几乎不相关：得分50分以下
    如果内容1或内容2为空：得分50分
    请严格按照{{"得分":"得分原因"}}的json格式给出最终得分和原因，拒绝```json```的形式。
    例如：
    {{"85":"意向客户可更进与长期运营，因为意向客户可更进，可以做长期运营的基础条件，因此意思一致性较高"}}
    {{"50":"内容1为空，根据打分规则，如果内容1或内容2为空则得分为50分"}}
    {{"30":"内容1为身份续签相关，内容2为教育择校相关，根据打分规则，如果内容1或内容2的意思一致性几乎不相关则得分为30分"}}
    {{"0":"内容1为身份续签，内容2为放弃身份，根据打分规则，内容1或内容2意思完全相反则得分为0分"}}
    """
    # try:
    #     tmp_ai_result = online_llm_streaming(prompt).run()
    #     output = next(iter(json.loads(tmp_ai_result)))
    #     print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    #     return output
    # except:
    #     print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    #     return tmp_ai_result
    try:
        tmp_ai_result = online_llm_streaming(prompt).run()
        # output = next(iter(json.loads(tmp_ai_result)))
        # print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        return tmp_ai_result
    except:
        print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        return tmp_ai_result


def target_func(sub_df, result_pd_list):
    sub_df = sub_df.fillna('').apply(ai_man_compare_forward, axis=1)
    result_pd_list.append(sub_df)


def multi_func(df, group_index=0, group_size=5, num_processes=10):
    manager = multiprocessing.Manager()
    result_pd_list = manager.list()
    num_groups = math.ceil(len(df) / group_size)
    print(f"总共划分的组数为: {num_groups}")
    processes = []
    while group_index < num_groups:
        # 确定当前可用的进程数量，取剩余分组数和设定进程数的较小值
        available_processes = min(num_processes, num_groups - group_index)
        print("当前第%s分组" % group_index, f"总共划分的组数为: {num_groups}")
        for _ in range(available_processes):
            start_index = group_index * group_size
            end_index = min((group_index + 1) * group_size, len(df))
            print(f"当前分组的索引范围: {start_index} - {end_index}")
            sub_df = df[start_index:end_index]
            if len(sub_df) > 0:
                p = multiprocessing.Process(target=target_func,
                                            args=(sub_df, result_pd_list,))
                processes.append(p)
                p.start()
            group_index += 1
        # 等待当前一批次启动的进程执行完毕
        for p in processes[-available_processes:]:
            p.join()
        # 移除已完成的进程对象，避免内存占用过多
        processes = processes[:-available_processes]
    return pd.concat(result_pd_list).sort_index()


def split_rows_by_comma(df, split_column):
    """
    将DataFrame根据指定列的逗号分隔符拆分成多行

    参数:
        df: 原始DataFrame
        split_column: 需要拆分的列名

    返回:
        拆分后的新DataFrame
    """
    # 步骤1: 拆分逗号分隔的字符串为列表 [1,3,6](@ref)
    df[split_column] = df[split_column].str.split(',')

    # 步骤2: 使用explode()展开列表为多行 [6](@ref)
    exploded_df = df.explode(split_column)

    # 步骤3: 重置索引并清理数据 [6](@ref)
    exploded_df = exploded_df.reset_index(drop=True)
    exploded_df[split_column] = exploded_df[split_column].str.strip()

    return exploded_df


if __name__ == '__main__':
    # 从数据库中获取客户id并生成客户id的json格式文件到目标路径
    customer_id_df = get_db_customer_id_df(start_dt='2025-06-19', end_dt=datetime.now().strftime("%Y-%m-%d"))
    tmp_customer_id_list = []
    for index, customer_id in customer_id_df.iterrows():
        tmp_dict = {"客户id": f"{customer_id['客户id']}"}
        tmp_customer_id_list.append(tmp_dict)
    tmp_customer_id_dict = {"RECORDS": tmp_customer_id_list}
    diglog_original_date_dir = f'./diglog_original_data/{datetime.now().strftime("%Y-%m-%d")}'
    if not os.path.exists(diglog_original_date_dir):
        os.makedirs(diglog_original_date_dir)
        print(f"文件夹 {diglog_original_date_dir} 已创建。")
    diglog_original_date_file = os.path.join(diglog_original_date_dir, f'{datetime.now().strftime("%Y%m%d")}.json')
    with open(diglog_original_date_file, "w", encoding="utf-8") as f:
        json.dump(tmp_customer_id_dict, f, ensure_ascii=False, indent=4)
        print('customer_id文件生成:', diglog_original_date_file)
    # 根据customerid的json文件，调用语料解析文本程序，生成对话文本语料文件到本地
    diglog_original_date_file = './custom_sales/' + diglog_original_date_file.replace('./', '')
    command = f"cd /opt/ai_sale && /root/miniconda3/envs/ai_sale/bin/python get_data.py {diglog_original_date_file} https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=cc1909e1-a8bf-4807-8165-c2776a02b23c"
    print(command, '...')
    subprocess.run(command, shell=True)
    max_diglog_dict = \
        sorted([f for f in os.listdir('/opt/ai_sale/data') if os.path.isdir(os.path.join('/opt/ai_sale/data', f))],
               reverse=True)[0]
    print('生成语料文件夹：', max_diglog_dict)
    # 将解析出的对话文本语料进行规范整理
    # data_parent_dir = r'D:\Yanjq_Project\custom_sales\diglog_original_data\2025-07-28\2025-07-28-114941'
    data_parent_dir = os.path.join('/opt/ai_sale/data', max_diglog_dict)
    data_dt = datetime.now().strftime("%Y%m%d")
    # data_dt = '20250708'
    get_dialog_to_load(data_parent_dir)
    load_dialog_to_merge(data_parent_dir, data_file_dt=data_dt,
                         start_dt=f'{(datetime.strptime(data_dt, "%Y%m%d") - timedelta(days=60)).strftime("%Y-%m-%d")}',
                         end_dt=f'{(datetime.strptime(data_dt, "%Y%m%d") - timedelta(days=1)).strftime("%Y-%m-%d")}')
    customer_day_analysis(data_file_dt=data_dt)
    # 从crm数据库中获取真实标签关联到分析结果中
    sql_result_df = get_db_data_df(start_dt='2025-06-19', end_dt=datetime.now().strftime("%Y-%m-%d"))
    sql_result_df['客户id'] = sql_result_df['客户id'].astype(str)
    file_path = f'./user_day_research_result/user_day_research_result_{data_dt}.csv'
    data_df = pd.read_csv(file_path)
    data_df['客户id'] = data_df['客户名称'].str.replace('客户', '')
    data_df = pd.merge(data_df, sql_result_df, on='客户id', how='left')
    # data_df['客户名称'] = data_df['客户id']
    data_df = data_df.drop(['客户id', 'round'], axis=1)
    data_df_needs_ai = data_df['客户购买潜力'].str.replace('，', ',').str.split(',', n=2, expand=True)
    # 判断不足3列时补齐
    while data_df_needs_ai.shape[1] < 3:
        data_df_needs_ai[data_df_needs_ai.shape[1]] = None
    data_df_needs_ai.columns = ['AI_客户需求1', 'AI_客户需求2', 'AI_客户需求3']
    data_df['AI_客户需求二级标签'] = data_df_needs_ai.fillna('').apply(lambda x: ','.join(list(filter(None,
                                                                                                      set(['' if x.iloc[
                                                                                                                     0] is None else
                                                                                                           x.iloc[
                                                                                                               0].replace(
                                                                                                               ' ',
                                                                                                               '').split(
                                                                                                               '-')[
                                                                                                               0] if '-' in
                                                                                                                     x.iloc[
                                                                                                                         0] else
                                                                                                           x.iloc[0],
                                                                                                           '' if x.iloc[
                                                                                                                     1] is None else
                                                                                                           x.iloc[
                                                                                                               1].replace(
                                                                                                               ' ',
                                                                                                               '').split(
                                                                                                               '-')[
                                                                                                               0] if '-' in
                                                                                                                     x.iloc[
                                                                                                                         1] else
                                                                                                           x.iloc[1],
                                                                                                           '' if x.iloc[
                                                                                                                     2] is None else
                                                                                                           x.iloc[
                                                                                                               2].replace(
                                                                                                               ' ',
                                                                                                               '').split(
                                                                                                               '-')[
                                                                                                               0] if '-' in
                                                                                                                     x.iloc[
                                                                                                                         2] else
                                                                                                           x.iloc[2]
                                                                                                           ]
                                                                                                          )
                                                                                                      )
                                                                                               )
                                                                                          ), axis=1)
    data_df['AI_客户需求三级标签'] = data_df_needs_ai.fillna('').apply(lambda x: ','.join(list(filter(None,
                                                                                                      set(['' if x.iloc[
                                                                                                                     0] is None else
                                                                                                           x.iloc[
                                                                                                               0].replace(
                                                                                                               ' ',
                                                                                                               '').split(
                                                                                                               '-')[
                                                                                                               1] if '-' in
                                                                                                                     x.iloc[
                                                                                                                         0] else '',
                                                                                                           '' if x.iloc[
                                                                                                                     1] is None else
                                                                                                           x.iloc[
                                                                                                               1].replace(
                                                                                                               ' ',
                                                                                                               '').split(
                                                                                                               '-')[
                                                                                                               1] if '-' in
                                                                                                                     x.iloc[
                                                                                                                         1] else '',
                                                                                                           '' if x.iloc[
                                                                                                                     2] is None else
                                                                                                           x.iloc[
                                                                                                               2].replace(
                                                                                                               ' ',
                                                                                                               '').split(
                                                                                                               '-')[
                                                                                                               1] if '-' in
                                                                                                                     x.iloc[
                                                                                                                         2] else ''
                                                                                                           ]
                                                                                                          )
                                                                                                      )
                                                                                               )
                                                                                          ), axis=1)

    data_df_needs_man = data_df['人工_客户需求判断'].str.replace('，', ',').str.split(',', n=2, expand=True)
    # 判断不足3列时补齐
    while data_df_needs_man.shape[1] < 3:
        data_df_needs_man[data_df_needs_man.shape[1]] = None
    data_df_needs_man.columns = ['人工_客户需求1', '人工_客户需求2', '人工_客户需求3']
    data_df['人工_客户需求二级标签'] = data_df_needs_man.fillna('').apply(lambda x: ','.join(list(filter(None,
                                                                                                         set(['' if
                                                                                                              x.iloc[
                                                                                                                  0] is None else
                                                                                                              x.iloc[
                                                                                                                  0].replace(
                                                                                                                  ' ',
                                                                                                                  '').split(
                                                                                                                  '-')[
                                                                                                                  0] if '-' in
                                                                                                                        x.iloc[
                                                                                                                            0] else
                                                                                                              x.iloc[0],
                                                                                                              '' if
                                                                                                              x.iloc[
                                                                                                                  1] is None else
                                                                                                              x.iloc[
                                                                                                                  1].replace(
                                                                                                                  ' ',
                                                                                                                  '').split(
                                                                                                                  '-')[
                                                                                                                  0] if '-' in
                                                                                                                        x.iloc[
                                                                                                                            1] else
                                                                                                              x.iloc[1],
                                                                                                              '' if
                                                                                                              x.iloc[
                                                                                                                  2] is None else
                                                                                                              x.iloc[
                                                                                                                  2].replace(
                                                                                                                  ' ',
                                                                                                                  '').split(
                                                                                                                  '-')[
                                                                                                                  0] if '-' in
                                                                                                                        x.iloc[
                                                                                                                            2] else
                                                                                                              x.iloc[2]
                                                                                                              ]
                                                                                                             )
                                                                                                         )
                                                                                                  )
                                                                                             ), axis=1)
    data_df['人工_客户需求三级标签'] = data_df_needs_man.fillna('').apply(lambda x: ','.join(list(filter(None,
                                                                                                         set(['' if
                                                                                                              x.iloc[
                                                                                                                  0] is None else
                                                                                                              x.iloc[
                                                                                                                  0].replace(
                                                                                                                  ' ',
                                                                                                                  '').split(
                                                                                                                  '-')[
                                                                                                                  1] if '-' in
                                                                                                                        x.iloc[
                                                                                                                            0] else '',
                                                                                                              '' if
                                                                                                              x.iloc[
                                                                                                                  1] is None else
                                                                                                              x.iloc[
                                                                                                                  1].replace(
                                                                                                                  ' ',
                                                                                                                  '').split(
                                                                                                                  '-')[
                                                                                                                  1] if '-' in
                                                                                                                        x.iloc[
                                                                                                                            1] else '',
                                                                                                              '' if
                                                                                                              x.iloc[
                                                                                                                  2] is None else
                                                                                                              x.iloc[
                                                                                                                  2].replace(
                                                                                                                  ' ',
                                                                                                                  '').split(
                                                                                                                  '-')[
                                                                                                                  1] if '-' in
                                                                                                                        x.iloc[
                                                                                                                            2] else ''
                                                                                                              ]
                                                                                                             )
                                                                                                         )
                                                                                                  )
                                                                                             ), axis=1)
    data_df = data_df.rename(columns={'客户购买潜力依据': 'AI_客户购买潜力依据'})
    data_df = data_df.rename(columns={'顾问卡点依据': 'AI_顾问卡点依据'})
    data_df = data_df.rename(columns={'顾问卡点': 'AI_销售卡点'})
    data_df = data_df[
        ['会话组编号', '客户名称', '客户现状', '客户需求', '客户潜在需求', '客户意向等级', '跟进日期', '标签更新时间',
         'AI_客户购买潜力依据', 'AI_顾问卡点依据', 'AI_客户需求二级标签', 'AI_客户需求三级标签', 'AI_销售卡点',
         '人工_客户需求二级标签', '人工_客户需求三级标签', '人工_销售卡点']]
    data_df['核验_客户需求二级标签'] = ''
    data_df['核验_客户需求三级标签'] = ''
    data_df['核验_销售卡点'] = ''
    # data_df['核验_客户需求二级标签打分'] =  data_df[['AI_客户需求二级标签', '人工_客户需求二级标签']].fillna('').swifter.apply(ai_man_compare_forward, axis=1)
    # data_df['核验_客户需求三级标签打分'] = data_df[['AI_客户需求三级标签', '人工_客户需求三级标签']].fillna('').swifter.apply(ai_man_compare_forward, axis=1)
    data_df = data_df.sort_index()
    data_df = split_rows_by_comma(data_df, '人工_客户需求二级标签')
    data_df = split_rows_by_comma(data_df, '人工_客户需求三级标签')
    data_df = split_rows_by_comma(data_df, 'AI_客户需求二级标签')
    data_df = split_rows_by_comma(data_df, 'AI_客户需求三级标签')
    data_df = data_df[data_df['AI_客户需求三级标签'].notna()]
    data_df = data_df[data_df['人工_客户需求三级标签'].notna()]
    data_df['核验_客户需求二级标签打分'] = multi_func(
        data_df[['AI_客户需求二级标签', '人工_客户需求二级标签']].fillna(''))
    data_df['核验_客户需求三级标签打分'] = multi_func(
        data_df[['AI_客户需求三级标签', '人工_客户需求三级标签']].fillna(''))
    # data_df['核验_销售卡点打分'] = multi_func(data_df[['AI_销售卡点', '人工_销售卡点']].fillna(''))
    data_df = data_df.sort_values(by=['标签更新时间'], ascending=[False])
    data_df.to_excel(file_path.replace('csv', 'xlsx'), index=False)
    # 创建数据库连接
    engine = create_engine(
        'mysql+pymysql://test_pldb_rw:fbtYqZ0I4YszvFEjaKZuoTXCM77uP2@test-scrm-cluster.rwlb.rds.aliyuncs.com/ai')
    engine2 = create_engine(
        'mysql+pymysql://ai:LWJRe05rX5oe7rFH@prodwritepolardb.rwlb.rds.aliyuncs.com/ai')
    yesterday = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
    data_df['data_dt'] = yesterday
    # 将data_df写入MySQL
    data_df.rename(columns={'会话组编号': 'group_id',
                            '客户名称': 'customer_name',
                            '客户现状': 'current_status',
                            '客户需求': 'stated_needs',
                            '客户潜在需求': 'potential_needs',
                            '客户意向等级': 'intention_level',
                            '跟进日期': 'followup_date',
                            '标签更新时间': 'tag_update_time',
                            'AI_客户购买潜力依据': 'ai_purchase_potential',
                            'AI_顾问卡点依据': 'ai_consultant_blockers',
                            'AI_客户需求二级标签': 'ai_secondary_tags',
                            'AI_客户需求三级标签': 'ai_tertiary_tags',
                            'AI_销售卡点': 'ai_sales_blockers',
                            '人工_客户需求二级标签': 'manual_secondary_tags',
                            '人工_客户需求三级标签': 'manual_tertiary_tags',
                            '人工_销售卡点': 'manual_sales_blockers',
                            '核验_客户需求二级标签': 'verified_secondary_tags',
                            '核验_客户需求三级标签': 'verified_tertiary_tags',
                            '核验_销售卡点': 'verified_sales_blockers',
                            '核验_客户需求二级标签打分': 'verified_secondary_scores',
                            '核验_客户需求三级标签打分': 'verified_tertiary_scores',
                            '核验_销售卡点打分': 'verified_blocker_scores'
                            }, inplace=True)
    data_df.to_sql(name='ai_sale_original_data', con=engine, if_exists='append', index=False)
    data_df.to_sql(name='ai_sale_original_data', con=engine2, if_exists='append', index=False)
    to_mysql = pd.read_excel(file_path.replace('csv', 'xlsx'))

    # print('输出文件：', file_path.replace('csv', 'xlsx'))
# 核心拆分函数
