初始化代码

18a362c4 · Bart刘笑东 · 068062da · 18a362c4 · 18a362c4 · 18a362c4
Commit 18a362c4 authored Aug 12, 2025 by Bart刘笑东
6 changed files
--- a/customer_day_analysis.py
+++ b/customer_day_analysis.py
--- a/customer_dialog_clearn.py
+++ b/customer_dialog_clearn.py
--- a/customer_dialog_clearn_analysis_cp.py
+++ b/customer_dialog_clearn_analysis_cp.py
--- a/online_streaming.py
+++ b/online_streaming.py
+# online_streaming.py
+import json
+import logging
+import traceback
+from typing import Dict
+import requests
+from requests import RequestException
+from tenacity import retry, retry_if_exception_type, wait_fixed, stop_after_attempt, before_log
+import time
+logger = logging.getLogger()
+class AiGenerationException(Exception):
+    """ AI生成异常 """
+    def __init__(self, message: str = "AI生成错误"):
+        self.message = message
+    def __str__(self):
+        return f'AiGenerationException: {self.message}'
+class online_llm_streaming(object):
+    """ AI生成 """
+    base_url = "https://test-copilot.galaxy-immi.com/v1"
+    def __init__(self, input_query, api_key: str = "app-Ipg9sBRE3FRKYX5TMVO6OthV", route: str = "/workflows/run",
+                 response_mode: str = "blocking"):
+        self.route = route
+        self.response_mode = response_mode
+        self.api_key = api_key
+        self.inputs = {"input_query": input_query}
+    def ai_generate(self, url: str, headers: Dict, data: Dict, timeout: int, response_mode: str) -> str:
+        """AI生成"""
+        if response_mode == "blocking":
+            resp = requests.post(
+                url=url,
+                headers=headers,
+                json=data,
+                timeout=timeout
+            )
+            if resp.status_code != 200:
+                error_msg = f'AI生成失败，http-status_code：{resp.status_code}\nresponse.text：\n=====\n{resp.text}\n=====\n'
+                logger.error(error_msg)
+                raise AiGenerationException(message=error_msg)
+            res_json = resp.json()
+            logger.info(f"AI生成返回：\n=====\n{json.dumps(res_json, indent=4, ensure_ascii=False)}\n=====\n")
+            # 检查状态
+            if not isinstance(res_json, dict):
+                error_msg = f'AI生成返回格式错误，res_json不是字典类型'
+                logger.error(error_msg)
+                return "{}"
+            if 'data' not in res_json:
+                error_msg = f'AI生成返回格式错误，缺少data字段'
+                logger.error(error_msg)
+                return "{}"
+            data = res_json['data']
+            if not isinstance(data, dict):
+                error_msg = f'AI生成返回格式错误，data不是字典类型'
+                logger.error(error_msg)
+                return "{}"
+            if 'status' in data and data['status'] == "failed":
+                error_msg = f'AI生成失败，data.status为failed'
+                logger.error(error_msg)
+                return "{}"
+            if 'outputs' not in data or not isinstance(data['outputs'], dict):
+                error_msg = f'AI生成返回格式错误，缺少outputs字段或outputs不是字典类型'
+                logger.error(error_msg)
+                return "{}"
+            if 'output' not in data['outputs']:
+                error_msg = f'AI生成返回格式错误，缺少output字段'
+                logger.error(error_msg)
+                return "{}"
+            return data['outputs']['output']
+        elif response_mode == "streaming":
+            resp = requests.post(
+                url=url,
+                headers=headers,
+                json=data,
+                timeout=1200,
+                stream=True
+            )
+            if resp.status_code != 200:
+                error_msg = f'AI生成失败，http-status_code：{resp.status_code}\nresponse.text：\n=====\n{resp.text}\n=====\n'
+                logger.error(error_msg)
+                raise AiGenerationException(message=error_msg)
+            result = ""
+            for chunk in resp.iter_lines():
+                if not chunk:
+                    continue
+                try:
+                    _, data = chunk.decode('utf-8').split(':', maxsplit=1)
+                    data = data.strip()
+                    if data == "ping":
+                        continue
+                    chunk_data = json.loads(data)
+                    if not isinstance(chunk_data, dict):
+                        continue
+                    if 'event' not in chunk_data:
+                        continue
+                    if chunk_data['event'] != "workflow_finished":
+                        continue
+                    if 'data' not in chunk_data:
+                        continue
+                    chunk_result = chunk_data['data']
+                    if not isinstance(chunk_result, dict):
+                        continue
+                    if chunk_result.get('status') == 'failed':
+                        error_msg = f'AI生成失败，chunk_data：\n=====\n{chunk_data}\n=====\n'
+                        logger.error(error_msg)
+                        raise AiGenerationException(message=error_msg)
+                    if 'outputs' not in chunk_result or not isinstance(chunk_result['outputs'], dict):
+                        continue
+                    if 'output' not in chunk_result['outputs']:
+                        continue
+                    result += chunk_result['outputs']['output']
+                except Exception as e:
+                    logger.warning(f"处理数据块时发生错误: {str(e)}")
+                    continue
+            return result
+        else:
+            raise AiGenerationException(message=f"不支持的response_mode：{response_mode}")
+    def run(self, timeout: int = 600) -> str:
+        """运行AI生成"""
+        headers = {
+            'Authorization': f'Bearer {self.api_key}',
+            'Content-Type': 'application/json'
+        }
+        data = {
+            "inputs": self.inputs,
+            "response_mode": self.response_mode,
+            "user": "fadsf"
+        }
+        try:
+            response_output = self.ai_generate(
+                url=f"{self.base_url}{self.route}",
+                headers=headers,
+                data=data,
+                timeout=timeout,
+                response_mode=self.response_mode
+            )
+            return response_output
+        except Exception as e:
+            logger.error(f"AI生成失败: {str(e)}")
+            logger.error(traceback.format_exc())
+            return "{}"
+if __name__ == '__main__':
+    # test1
+    ai_generate = online_llm_streaming(input_query="香港四大天王是谁？")
+    res1 = ai_generate.run()
+    print(res1)
--- a/settings.py
+++ b/settings.py
+# settings.py
+"""配置文件"""
+import os
+from typing import Dict, Any
+# LLM配置
+class LLMConfig:
+    BASE_URL = os.getenv("LLM_BASE_URL", "http://your-llm-service-url")
+    ROUTE = os.getenv("LLM_ROUTE", "/v1/chat/completions")
+    API_KEY = os.getenv("LLM_API_KEY", "your-api-key")
+    MAX_RETRIES = int(os.getenv("LLM_MAX_RETRIES", "3"))
+    TIMEOUT = int(os.getenv("LLM_TIMEOUT", "30"))
+# 文件路径配置
+class PathConfig:
+    CORPUS_FILE = os.getenv("CORPUS_FILE", "sales_corpus.csv")
+    TAGS_FILE = os.getenv("TAGS_FILE", "sales_tags.csv")
+    CHANNEL_CONFIG = os.getenv("CHANNEL_CONFIG", "channel_activity.yaml")
+    LOG_FILE = os.getenv("LOG_FILE", "sales_analysis.log")
+# 销售违规场景定义
+SALES_VIOLATIONS: Dict[str, list] = {
+    "虚假信息": ["空壳公司", "挂靠", "不用注销户籍", "自动延续", "享受本地待遇"],
+    "过度承诺": ["肯定", "一定", "保证", "100%", "绝对"],
+    "不当建议": ["随便找", "不用着急", "来得及", "自雇"],
+    "违法建议": ["虚假证明", "造假", "修改材料"],
+}
+# 销售质量评估标准
+SALES_QUALITY_METRICS: Dict[str, Dict[str, Any]] = {
+    "专业性": {
+        "weight": 0.3,
+        "factors": ["产品知识准确", "政策解释清晰", "专业术语使用恰当"]
+    },
+    "合规性": {
+        "weight": 0.3,
+        "factors": ["无违规承诺", "无虚假信息", "无不当建议"]
+    },
+    "服务态度": {
+        "weight": 0.2,
+        "factors": ["礼貌用语", "耐心解答", "积极主动"]
+    },
+    "沟通技巧": {
+        "weight": 0.2,
+        "factors": ["需求挖掘", "异议处理", "总结复述"]
+    }
+}
+# 分析配置
+class AnalysisConfig:
+    MIN_DIALOGUE_LENGTH = int(os.getenv("MIN_DIALOGUE_LENGTH", "50"))
+    MAX_DIALOGUE_LENGTH = int(os.getenv("MAX_DIALOGUE_LENGTH", "3000"))
+    BATCH_SIZE = int(os.getenv("BATCH_SIZE", "10"))
+    SCORE_THRESHOLD = float(os.getenv("SCORE_THRESHOLD", "0.6"))
+# 日志配置
+class LogConfig:
+    LEVEL = os.getenv("LOG_LEVEL", "INFO")
+    FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
+    ENCODING = 'utf-8'
--- a/to_mysql.py
+++ b/to_mysql.py
+import pandas as pd
+from sqlalchemy import create_engine, text
+if __name__ == '__main__':
+    file_path = f'./user_day_research_result/user_day_research_result_20250711.xlsx'
+    to_mysql = pd.read_excel(file_path)