|
| 1 | +## tool learning 数据集评测教程 |
| 2 | + |
| 3 | +### chatml接入方式 |
| 4 | +如果需要在自己的 huggingface 格式的模型上进行测试的话,总的步骤分为如下几步: |
| 5 | +1. 编写 ~/evals/FuncCallEvalution 的 create_prompts 函数 |
| 6 | +2. 编写 ~/models/base_model 的 相关函数 |
| 7 | +3. 注册模型和评估函数 |
| 8 | +4. 执行测试脚本 |
| 9 | +如果模型在加载进来后不需要特殊的处理,而且输入也不需要转换为特定的格式(e.g. chatml 格式或者其他的 human-bot 格式),请直接跳转到第四步直接发起测试。 |
| 10 | + |
| 11 | +#### 1. 编写 loader 函数 |
| 12 | +如果模型在加载进来还需要做一些额外的处理(e.g. tokenizer 调整),需要去 `src.context_builder.context_builder_family.py` 中继承 `ModelAndTokenizerLoader` 类来覆写对应的 `load_model` 和 `load_tokenizer` 函数,具体可以参照以下示例: |
| 13 | +```python |
| 14 | +class FuncCallEvalution(ToolEvalution): |
| 15 | + |
| 16 | + def create_prompts(self, func_call_datas): |
| 17 | + ''' |
| 18 | + datas: [ |
| 19 | + { |
| 20 | + "instruction": history[his_idx], |
| 21 | + "input": "", |
| 22 | + "output": output, |
| 23 | + "history": [(human_content, ai_content), (), ()], |
| 24 | + "functions": tools |
| 25 | + } |
| 26 | + ] |
| 27 | + ''' |
| 28 | + system_content = '''CodeFuse是一个面向研发领域的智能助手,旨在中立的、无害的帮助用户解决开发相关的问题,所有的回答均使用Markdown格式返回。 |
| 29 | + 你能利用许多工具和功能来完成给定的任务,在每一步中,你需要分析当前状态,并通过执行函数调用来确定下一步的行动方向。你可以进行多次尝试。如果你计划连续尝试不同的条件,请每次尝试一种条件。若给定了Finish函数,则以Finish调用结束,若没提供Finish函数,则以不带function_call的对话结束。''' |
| 30 | + function_format = '''You are ToolGPT, you have access to the following APIs:\n{tools}''' |
| 31 | + |
| 32 | + func_call_train_datas = [] |
| 33 | + history_error_cnt = 0 |
| 34 | + funccall_error_cnt = 0 |
| 35 | + |
| 36 | + for data in func_call_datas: |
| 37 | + tools = data["functions"] |
| 38 | + chatrounds = data["chatrounds"] |
| 39 | + |
| 40 | + function_content = "" |
| 41 | + if len(tools) > 0: |
| 42 | + function_content = function_format.format(tools=json.dumps(tools, ensure_ascii=False, sort_keys=True)) |
| 43 | + |
| 44 | + history = [] |
| 45 | + for i in chatrounds: |
| 46 | + if i["role"]=="system": |
| 47 | + continue |
| 48 | + |
| 49 | + if i["role"]=="user": |
| 50 | + history.append(("user", i["content"])) |
| 51 | + |
| 52 | + if i["role"] == "assistant": |
| 53 | + if "function_call" in i: |
| 54 | + if not isinstance(i["function_call"], dict): |
| 55 | + funccall_error_cnt+=1 |
| 56 | + continue |
| 57 | + content = "#function" + json.dumps({**{"content": i["content"]}, **i["function_call"]}, ensure_ascii=False) |
| 58 | + else: |
| 59 | + content = i["content"] |
| 60 | + history.append(("assistant", content)) |
| 61 | + |
| 62 | + |
| 63 | + if i["role"] == "function": |
| 64 | + content = json.dumps({**{"content": i["content"]}, **{"name": i["name"]}}, ensure_ascii=False) |
| 65 | + history.append(("user", content)) |
| 66 | + |
| 67 | + |
| 68 | + history = [i[1] for i in history] |
| 69 | + history[0] = "\n".join([system_content,function_content, history[0]]) |
| 70 | + |
| 71 | + for his_idx in range(0, len(history), 2): |
| 72 | + output = history[his_idx+1] |
| 73 | + |
| 74 | + if "#function" in output: |
| 75 | + output = output.split("#function")[-1] |
| 76 | + |
| 77 | + try: |
| 78 | + output = json.loads(output) |
| 79 | + except: |
| 80 | + output = {"content": output} |
| 81 | + |
| 82 | + |
| 83 | + func_call_train_datas.append( |
| 84 | + { |
| 85 | + "instruction": history[his_idx], |
| 86 | + "input": "", |
| 87 | + "output": output, |
| 88 | + "history": [history[:his_idx+2][i:i+2] for i in range(0, len(history[:his_idx]), 2)], |
| 89 | + "functions": tools |
| 90 | + }, |
| 91 | + ) |
| 92 | + return func_call_train_datas |
| 93 | +``` |
| 94 | + |
| 95 | +#### 2. 编写 Model 的 context_builder 函数 |
| 96 | +如果输入需要转换为特定的格式(e.g. chatml 格式或者其他的 human-bot 格式),则需要去 `src.context_builder.context_builder_family` 中继承 ContextBuilder 类来覆写 make_context 函数,这个函数是用来将输入转换格式为对应需要的输出的,一个示例如下: |
| 97 | +```python |
| 98 | +class ToolModel: |
| 99 | + def __init__(self, model_path: str, template: str, trust_remote_code=True, tensor_parallel_size=1, gpu_memory_utilization=0.25): |
| 100 | + self.model_path = model_path |
| 101 | + self.trust_remote_code = trust_remote_code |
| 102 | + self.tensor_parallel_size = tensor_parallel_size |
| 103 | + self.gpu_memory_utilization = gpu_memory_utilization |
| 104 | + self.load_model(self.model_path, self.trust_remote_code, self.tensor_parallel_size, self.gpu_memory_utilization) |
| 105 | + |
| 106 | + def generate(self, prompts: str, template: str = None, generate_configs: GenerateConfigs = None) -> list: |
| 107 | + '''产出对应结果''' |
| 108 | + pass |
| 109 | + |
| 110 | + def generate_params( |
| 111 | + self, generate_configs: GenerateConfigs, |
| 112 | + ): |
| 113 | + '''generate param''' |
| 114 | + kargs = generate_configs.dict() |
| 115 | + return kargs |
| 116 | + |
| 117 | + def load_model(self, model_path, trust_remote_code=True, tensor_parallel_size=1, gpu_memory_utilization=0.25): |
| 118 | + '''加载模型''' |
| 119 | + self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=trust_remote_code) |
| 120 | + self.model = AutoModelForCausalLM.from_pretrained(self.model_path, device_map="auto", trust_remote_code=trust_remote_code).eval() |
| 121 | + |
| 122 | + # self.model = LLM(model=model_path, trust_remote_code=trust_remote_code, tensor_parallel_size=tensor_parallel_size, gpu_memory_utilization=gpu_memory_utilization) |
| 123 | +``` |
| 124 | + |
| 125 | +#### 3. 注册模型和eval函数即可 |
| 126 | +在 ~/models/__init__.py 中注册即可 |
| 127 | +```python |
| 128 | +from .base_model import ToolModel |
| 129 | + |
| 130 | +__all__ = [ |
| 131 | + "ToolModel", |
| 132 | +] |
| 133 | +``` |
| 134 | +在 ~/evasl/__init__.py 中注册即可 |
| 135 | +```python |
| 136 | +from .base_evalution import ToolEvalution |
| 137 | +from .toolfill_evalution import ToolFillEvalution |
| 138 | +from .toolparser_evalution import ToolParserEvalution |
| 139 | +from .toolsummary_evalution import ToolSummaryEvalution |
| 140 | +from .func_call_evalution import FuncCallEvalution |
| 141 | + |
| 142 | + |
| 143 | +__all__ = [ |
| 144 | + "ToolEvalution", "ToolFillEvalution", "ToolParserEvalution", "ToolSummaryEvalution", "FuncCallEvalution" |
| 145 | +] |
| 146 | +``` |
| 147 | + |
| 148 | + |
| 149 | +#### 4. 执行测试脚本 |
| 150 | +修改 ~/src/qwen_eval_main.py# datainfos和model_infos |
| 151 | +```python |
| 152 | +model_infos = [ |
| 153 | + {"model_name": "", "template": "chatml", "model_path": "", |
| 154 | + "peft_path": "", "model_class": QwenModel}] |
| 155 | + |
| 156 | +datainfos = [ |
| 157 | + {"dataset_path": "~/fcdata_luban_zh_test.jsonl", "dataset_name": "fcdata_luban_zh", "tool_task": "func_call"}, |
| 158 | + {"dataset_path": "~/test_datas/fcdata_zh_test_v1.jsonl", "dataset_name": "fcdata_zh", "tool_task": "func_call"}, |
| 159 | +] |
| 160 | +``` |
| 161 | + |
| 162 | +运行下述命令即可 |
| 163 | +```Bash |
| 164 | +python qwen_eval_main.py |
| 165 | +``` |
| 166 | + |
| 167 | +<br> |
| 168 | + |
| 169 | +### 非chatml接入 |
| 170 | +如果需要在自己的 huggingface 格式的模型上进行测试的话,总的步骤分为如下几步: |
| 171 | +1. 编写 ~/getAssistantAns.py 相关代码 |
| 172 | +2. 执行测试脚本 |
| 173 | + |
| 174 | + |
| 175 | +#### 1、编写 getAssistantAns 示例 |
| 176 | +``` |
| 177 | +class GetAssistantAns(): |
| 178 | + # 按照自己推理需求自己修改代码 |
| 179 | +
|
| 180 | + def __init__(self, gpu_num=1): |
| 181 | + model = AutoModelForCausalLM.from_pretrained(model_name) |
| 182 | + device_list = [] |
| 183 | + for gpu_idx in range(gpu_num): |
| 184 | + device_list.append(torch.device("cuda:0")) |
| 185 | +
|
| 186 | + # 将模型移动到指定的GPU设备 |
| 187 | + model.to(device) |
| 188 | +
|
| 189 | +
|
| 190 | + def gen_answer(self, chat_dict, gpu_index): |
| 191 | + # 这里实际根据自己推理逻辑 然后转为标准格式返回 |
| 192 | + # 以下仅仅是样例 |
| 193 | + import time |
| 194 | + print(os.environ["CUDA_VISIBLE_DEVICES"]) |
| 195 | + time.sleep(1) |
| 196 | + rtn_dict1 = { |
| 197 | + "role": "assistant", |
| 198 | + "content": None, |
| 199 | + "function_call": |
| 200 | + { |
| 201 | + "name": "get_fudan_university_scoreline", |
| 202 | + "arguments": "{\n \"year\": \"2020\"\n}" |
| 203 | + } |
| 204 | + } |
| 205 | +
|
| 206 | + rtn_dict2 = { |
| 207 | + "role": "assistant", |
| 208 | + "content": "2020年复旦大学的分数线如下:\n\n- 文科一批:630分\n- 文科二批:610分\n- 理科一批:650分\n- 理科二批:630分" |
| 209 | + } |
| 210 | +
|
| 211 | + return random.choice([rtn_dict1, rtn_dict2]) |
| 212 | +``` |
| 213 | +#### 2、执行测试脚本 |
| 214 | +修改 ~/src/opensource_functioncall_evalution.py # test_ans_file_list |
| 215 | +```python |
| 216 | +test_ans_file_list = [ |
| 217 | + "fcdata_zh_test.jsonl" |
| 218 | + ] |
| 219 | +``` |
| 220 | + |
| 221 | +运行下述命令即可 |
| 222 | +```Bash |
| 223 | +python opensource_functioncall_evalution.py |
| 224 | +``` |
0 commit comments