diff --git a/.gitignore b/.gitignore
index 6294371..c08061c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
.idea
__pycache__
-
+.DS_Store
diff --git a/01_basic/README.md b/01_basic/README.md
index ffd211b..54419b6 100644
--- a/01_basic/README.md
+++ b/01_basic/README.md
@@ -80,7 +80,32 @@
[**40. 字典转成对象(骚操作)**](#字典转成对象)
+[**41. lgb[gpu版本]和xgb[gpu版本]安装**](#boost安装)
+
+[**42. tqdm**](#tqdm)
+
+[**43. joblib Parallel并行**](#joblib_parallel)
+
+[**44. 调试神器pysnooper - 丢弃print**](#调试神器pysnooper)
+
+[**45. 调试神器debugpy**](#调试神器debugpy)
+
+[**46. 分组计算均值并填充**](#分组计算均值并填充)
+
+[**47. python日期处理**](#python日期处理)
+
+[**48. dataclass**](#dataclass)
+
+[**49. md5 sha256**](#md5_sha256)
+
+[**50. 查看内存**](#查看内存)
+
+[**51. __slots__用法**](#slots用法)
+
---
+
+点击展开
+
```python
%reload_ext autoreload
%autoreload 2
@@ -321,6 +346,10 @@ data[data.msg_from.apply(len)==10]
```
### re模块
+
+[常用正则表达式速查手册,Python文本处理必备](https://mp.weixin.qq.com/s/ySsgcrSnkguO2c8D-SQNxw)
+[regexlearn](https://github.com/aykutkardas/regexlearn.com)
+
```python
# 1. 将一个问题中的网址、邮箱、手机号、身份证、日期、价格提出来
@@ -370,7 +399,104 @@ s = '22基本日常生活活动:指食物摄取、大小便始末、穿脱衣
re.findall(r'^\d+(?![\d*小时*]|[\d*种*])[\u4e00-\u9fa5]+', s)
# 匹配只留下中文、英文和数字
-re.sub(r'[^\u4E00-\u9FA5\s0-9a-zA-Z]+', '', s)-
+re.sub(r'[^\u4E00-\u9FA5\s0-9a-zA-Z]+', '', s)
+
+# 日期解析202206
+import cn2an #version 0.5.14
+import datetime
+import re
+def getYearMonth(s):
+ '''
+ 【格式说明】
+ 今年上个月/上月/前一个月/前个月 -> 202204
+ 今年当月/该月/这月/这个月/本月 -> 202205
+ 去年5月/去年五月/2021年五月/2021五月/二零二一五月/二零二一 五月 -> 202105
+ 前年5月/前年五月/2020年五月/2020五月/二零二零五月/二零二零 五月 -> 202005
+ 2021年7月/二零二一年7月 -> 202107
+ 5月/五月份 -> 202205
+ 2021.6/2021.06/2021-6/2021-06/2021 - 6月/2021 ---6月/2021 . 6月/2021...6月, -> 202106
+ 2021 4月/2021 04 -> 202104
+ 如果没有提到时间 -> 202205(默认今年当月)
+ 如果输入的时间有误或月份有误比如输入2021 23, -> 202205(默认今年当月)
+ 如果输入时间超过当前时间 -> 202205(默认今年当月)
+ 如果输入时间早于2020年1月 -> 202205(默认今年当月)
+ '''
+ cur_date = datetime.datetime.now().strftime('%Y%m')
+ try:
+ DATE_REG1 = '(?:[一二三四五六七八九零十0-9]{1,4}年[一二三四五六七八九零十0-9]{1,2}月)|(?:去年[一二三四五六七八九零十0-9]+月)|(?:前年[一二三四五六七八九零十0-9]+月)|(?:[一二三四五六七八九零十0-9]+年[一二三四五六七八九零十0-9]+月)|(?:[一二三四五六七八九零十0-9]{1,2}月)|(?:[一二三四五六七八九零十0-9]+年)|(?:[一二三四五六七八九零十0-9]+月)'
+ thism_lst = ['当月', '该月', '这个月', '本月']
+ lastm_lst = ['上月', '上个月', '前一个月', '前个月']
+ date = ''
+ def helper(s, pattern):
+ date = ''
+ s = cn2an.transform(s, "cn2an") # 转换成阿拉伯数字
+ res = re.findall(pattern, s)
+ if res:
+ res = res[0] # 如果有多个就取第一个
+ year = '2022' #需要人工维护当年,还有过去两年的一个判断;每年要手动更新这部分
+ if '去年' in res or '21年' in res:
+ year = '2021'
+ elif '前年' in res or '20年' in res:
+ year = '2020'
+ month = re.findall('(?:([0-9]+)月)', res)
+ if month:
+ month = int(month[0])
+ if month > 0 and month < 13:
+ if month < 10:
+ month = '0' + str(month)
+ else:
+ month = str(month)
+ else:
+ return ''
+ date = year + month
+ else:
+ date = year + str(datetime.datetime.now().month)
+ return date
+ six_d = re.findall(r'2\d{5}', s) #直接识别6位日期比如202110
+ if six_d:
+ date = six_d[0]
+ if not date:
+ # 针对2021 4月/2021.6/2021.06/2021-6/2021-06/2021 - 6月/2021 ---6月/2021 . 6月/2021...6月这些情况
+ DATE_REG3 = r'(?:\d{4}\s*\.+\s*\d{1,2})|(?:\d{4}\s*-+\s*\d{1,2})|(?:\d{4}\s*_+\s*\d{1,2})|(?:\d{4}\s+\d{1,2})'
+ six_d2 = re.findall(DATE_REG3, s)
+ if six_d2:
+ _six_d2 = six_d2[0]
+ try:
+ int(_six_d2[-2])
+ _six_d2_m = _six_d2[-2:]
+ except:
+ _six_d2_m = _six_d2[-1]
+ s = _six_d2[:4]+'年'+_six_d2_m+'月'
+ s = s.replace(' ', '')
+ if not date:
+ for i in thism_lst:
+ if i in s:
+ date = cur_date
+ break
+ if not date:
+ for i in lastm_lst:
+ if i in s:
+ date = (datetime.datetime.now() - datetime.timedelta(days=30, hours=23)).strftime('%Y%m')
+ break
+ if not date:
+ # 判断2021五月这种情况
+ DATE_REG2 = '(?:[一二三四五六七八九零十0-9]{4}[一二三四五六七八九零十]{1,2}月)'
+ res = re.findall(DATE_REG2, s)
+ if res:
+ s = res[0][:4]+'年'+res[0][4:]
+ date = helper(s, DATE_REG1)
+ else:
+ date = ''
+ if not date:
+ date = helper(s, DATE_REG1)
+ if not date:
+ date = cur_date
+ #corner case再判断下,处理下边界问题
+ if date < '202001' or date[-2:] > '12':
+ date = cur_date
+ except:
+ date = cur_date
+ return date
```
### eval
@@ -444,6 +570,81 @@ tasks.append(executor.submit(func2, param1, param2))
wait(tasks, return_when='ALL_COMPLETED')
res1, res2 = (x.result() for x in tasks)
```
+```python
+# 多进程优化版(推荐用这个)
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import functools
+from concurrent.futures import ProcessPoolExecutor
+from tqdm import tqdm
+import time
+
+class Pipe(object):
+ """I am very like a linux pipe"""
+
+ def __init__(self, function):
+ self.function = function
+ functools.update_wrapper(self, function)
+
+ def __ror__(self, other):
+ return self.function(other)
+
+ def __call__(self, *args, **kwargs):
+ return Pipe(
+ lambda iterable, *args2, **kwargs2: self.function(
+ iterable, *args, *args2, **kwargs, **kwargs2
+ )
+ )
+
+@Pipe
+def xProcessPoolExecutor(iterable, func, max_workers=5, desc="Processing", unit="it"):
+ if max_workers > 1:
+ total = len(iterable) if hasattr(iterable, '__len__') else None
+
+ with ProcessPoolExecutor(max_workers) as pool, tqdm(total=total, desc=desc, unit=unit) as pbar:
+ for i in pool.map(func, iterable):
+ yield i
+ pbar.update()
+
+ else:
+ return map(func, iterable)
+
+xtuple, xlist, xset = Pipe(tuple), Pipe(list), Pipe(set)
+
+def ff(x):
+ for i in range(x):
+ a = 1
+ return x+2
+
+if __name__ == '__main__':
+ dfs = []
+ arr = [100000000,200000000,300000000,400000000]
+ #without multiprocess
+ for i in arr:
+ dfs.append(ff(i))
+ #with multiprocess
+ dfs = arr | xProcessPoolExecutor(ff, 16) | xlist #这里的16是进程数,一般cpu有N核就起N-1个进程
+ print(dfs)
+```
+```python
+# 多进程(yuanjie封装meutils) 以多进程读取data下pdf文件为例
+from meutils.pipe import *
+os.environ['LOG_PATH'] = 'pdf.log'
+from meutils.log_utils import *
+location = 'output' #pdf文件处理后保存的文件夹
+@diskcache(location=location)
+def func(file_path):
+ try:
+ df = pdf_layout(str(file_path)) #解析成字典 详见https://github.com/binzhouchn/deep_learning/blob/master/4_llm/1_%E5%90%91%E9%87%8F%E6%95%B0%E6%8D%AE%E5%BA%93/es/es.py 中的body字典
+ with open(f'{location}/{file_path.stem}.txt', 'w', encoding='utf8') as f:
+ json.dump(df, f, ensure_ascii=False)
+ except Exception as e:
+ logger.debug(f"{file_path}: {e}")
+ logger.debug(f"{file_path}: {traceback.format_exc().strip()}")
+if __name__ == '__main__':
+ ps = Path('./data/').glob('*.pdf') | xlist #将所有pdf文件都列出来
+ dfs = ps | xProcessPoolExecutor(func, 16) | xlist #这里的16是进程数,一般cpu有N核就起N-1个进程
+```
### cv的多进程实现
@@ -593,6 +794,21 @@ import pandas as pd
pd.get_dummies(data.categ_id)
```
+方法三
@@ -716,7 +932,7 @@ df = df.merge(df_aggr, how='left', on='personid').fillna(0)
```
### python画图显示中文
-f
+
```python
## 显示中文解决方法
# 解决方法一
@@ -827,6 +1043,13 @@ sorted(l, key=lambda x:x[1], reverse=True)
# Out[42]: [('c', 6), ('d', 4), ('e', 3), ('b', 2), ('a', 1)]
```
+用法一(衍生):
```python
# 调整数组顺序使奇数位于偶数前面,奇偶相对顺序不变
@@ -1036,12 +1259,303 @@ def dict_to_object(_d):
return inst
```
+### boost安装
+
+```shell
+sudo apt-get install libboost-all-dev
+sudo apt install ocl-icd-opencl-dev
+sudo apt install cmake(可以去https://cmake.org/files下载比如cmake-3.14.0.tar.gz然后执行./bootstrap然后make然后make install)
+```
+
+lgb gpu版安装
+```shell
+pip install --upgrade pip
+pip install lightgbm --install-option=--gpu
+```
+xgb gpu版安装
+```shell
+git clone --recursive https://github.com/dmlc/xgboost
+cd xgboost
+mkdir build
+cd build
+cmake .. -DUSE_CUDA=ON
+make(或者make -j4可能或报错)
+
+cd ..
+cd python-package
+python setup.py install
+```
+
+### tqdm
+
+[当Pytorch遇上tqdm](https://blog.csdn.net/dreaming_coder/article/details/113486645)
+```python
+for epoch in range(epoch):
+ with tqdm(
+ iterable=train_loader,
+ bar_format='{desc} {n_fmt:>4s}/{total_fmt:<4s} {percentage:3.0f}%|{bar}| {postfix}',
+ ) as t:
+ start_time = datetime.now()
+ loss_list = []
+ for batch, data in enumerate(train_loader):
+ t.set_description_str(f"\33[36m【Epoch {epoch + 1:04d}】")
+ # 训练代码
+ time.sleep(1)
+ # 计算当前损失
+ loss = random()
+ loss_list.append(loss)
+ cur_time = datetime.now()
+ delta_time = cur_time - start_time
+ t.set_postfix_str(f"train_loss={sum(loss_list) / len(loss_list):.6f}, 执行时长:{delta_time}\33[0m")
+ t.update()
+```
+
+### joblib_parallel
+
+
+```python
+#Parallel for loop 此方法可用于多个文件数据并行读取
+from joblib import Parallel, delayed
+from math import sqrt
+def ff(num):
+ return [sqrt(n ** 3) for n in range(num)]
+#不使用并行 7.5s
+res = []
+for i in range(10,7000):
+ res.append(ff(i))
+#使用并行 2.75s
+res = Parallel(n_jobs = -1, verbose = 1)(delayed(ff)(i) for i in range(10,7000))
+```
+
+### 调试神器pysnooper
+
+```python
+#pip install pysnooper
+import os
+os.environ['pysnooper'] = '1' # 开关
+
+from pysnooper import snoop
+#如果为0,则重新定义snoop然后这个修饰啥都不干
+if os.environ['pysnooper'] == '0':
+ import wrapt
+ def snoop(*args, **kwargs):
+ @wrapt.decorator
+ def wrapper(wrapped, instance, args, kwargs):
+ return wrapped(*args, **kwargs)
+ return wrapper
+```
+
+### 调试神器debugpy
+
+安装:pip install debugpy -U
+在python代码里面(最前面加上这句话)
+```python
+import debugpy
+try:
+ # 5678 is the default attach port in the VS Code debug configurations. Unless a host and port are specified, host defaults to 127.0.0.1
+ debugpy.listen(("localhost", 9501))
+ print("Waiting for debugger attach")
+ debugpy.wait_for_client()
+except Exception as e:
+ pass
+
+```
+
+在vscode软件中项目下新建一个.vscode目录,然后创建launch.json,看9501端口那个配置
+```python
+{
+ // 使用 IntelliSense 了解相关属性。
+ // 悬停以查看现有属性的描述。
+ // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "name": "torchr_ex2",
+ "type": "python",
+ "request": "launch",
+ "program": "/Users/zb/anaconda3/envs/rag/bin/torchrun",
+ "console": "integratedTerminal",
+ "justMyCode": true,
+ "args": [
+ "--nnodes",
+ "1",
+ "--nproc-per-node",
+ "2",
+ "${file}",
+ "--model_name_or_path",
+ "my_model_bz"
+ ]
+ },
+ {
+ "name": "sh_file_debug",
+ "type": "debugpy",
+ "request": "attach",
+ "connect": {
+ "host": "localhost",
+ "port": 9501
+ }
+ },
+ ]
+}
+```
+
+上面的端口号都写一样比如9501,别搞错了!
+
+### 分组计算均值并填充
+
+```python
+def pad_mean_by_group(df, gp_col='stock_id'):
+ # 只留下需要处理的列
+ cols = [col for col in df.columns if col not in["stock_id", "time_id", "target", "row_id"]]
+ # 查询nan的列
+ df_na = df[cols].isna()
+ # 根据分组计算平均值
+ df_mean = df.groupby(gp_col)[cols].mean()
+
+ # 依次处理每一列
+ for col in cols:
+ na_series = df_na[col]
+ names = list(df.loc[na_series,gp_col])
+
+ t = df_mean.loc[names,col]
+ t.index = df.loc[na_series,col].index
+
+ # 相同的index进行赋值
+ df.loc[na_series,col] = t
+ return df
+train_pca = pad_mean_by_group(train_pca)
+```
+
+### python日期处理
+
+[80个例子,彻底掌握Python日期时间处理](https://mp.weixin.qq.com/s/2bJUZBfWS_8ULGrb9tRpmw)
+
+### dataclass
+
+dataclass 提供一个简便的方式创建数据类, 默认实现__init__(), __repr__(), __eq__()方法
+dataclass支持数据类型的嵌套
+支持将数据设置为不可变:@dataclass(frozen=True)
+
+不用dataclass
+
+```python
+class Person:
+ def __init__(self, name, age):
+ self.name = name
+ self.age = age
+p = Person('test', 18)
+q = Person('test', 18)
+#<__main__.Person at 0x7ff4ade66f40>
+str(p)
+repr(p)
+#'<__main__.Person object at 0x7ff4ade66f40>'
+p == q
+#False
+```
+```python
+from typing import Any
+from dataclasses import dataclass
+@dataclass
+class Person:
+ name: Any
+ age: Any = 18
+p = Person('test', 18)
+q = Person('test', 18)
+#Person(name='test', age=18)
+str(p)
+repr(p)
+#"Person(name='test', age=18)"
+p == q
+#True
+```
+
+### md5_sha256
+
+```python
+import hashlib
+
+def enc(s, ed='md5'):
+ if ed == 'md5':
+ hash_object = hashlib.md5(s.encode())
+ elif ed == 'sha256':
+ hash_object = hashlib.sha256(s.encode())
+ else:
+ raise ValueError('unsupport type!')
+ hash_hex = hash_object.hexdigest()
+ return hash_hex
+
+for i in ['13730973320','13802198853','17619520726']:
+ print(enc(i,'md5'))
+```
+
+### 查看内存
+
+有几种方法可以在Python中获取对象的大小。可以使用sys.getsizeof()来获取对象的确切大小,使用objgraph.show_refs()来可视化对象的结构,或者使用psutil.Process().memory_info()。RSS获取当前分配的所有内存。
+
+```python
+>>> import numpy as np
+>>> import sys
+>>> import objgraph
+>>> import psutil
+>>> import pandas as pd
+
+>>> ob = np.ones((1024, 1024, 1024, 3), dtype=np.uint8)
+
+### Check object 'ob' size
+>>> sys.getsizeof(ob) / (1024 * 1024)
+3072.0001373291016
+
+### Check current memory usage of whole process (include ob and installed packages, ...)
+>>> psutil.Process().memory_info().rss / (1024 * 1024)
+3234.19140625
+
+### Check structure of 'ob' (Useful for class object)
+>>> objgraph.show_refs([ob], filename='sample-graph.png')
+
+### Check memory for pandas.DataFrame
+>>> from sklearn.datasets import load_boston
+>>> data = load_boston()
+>>> data = pd.DataFrame(data['data'])
+>>> print(data.info(verbose=False, memory_usage='deep'))
+
+RangeIndex: 506 entries, 0 to 505
+Columns: 13 entries, 0 to 12
+dtypes: float64(13)
+memory usage: 51.5 KB
+
+### Check memory for pandas.Series
+>>> data[0].memory_usage(deep=True) # deep=True to include all the memory used by underlying parts that construct the pd.Series
+4176
+```
+
+### slots用法
+
+```python
+#不使用__slots__时,可以很容易地添加一个额外的job属性
+class Author:
+ def __init__(self, name, age):
+ self.name = name
+ self.age = age
+
+ me = Author('Yang Zhou', 30)
+ me.job = 'Software Engineer'
+ print(me.job)
+ # Software Engineer
+
+# 在大多数情况下,我们不需要在运行时更改实例的变量或方法,并且__dict__不会(也不应该)在类定义后更改。所以Python为此提供了一个属性:__slots__
+class Author:
+ __slots__ = ('name', 'age')
+
+ def __init__(self, name, age):
+ self.name = name
+ self.age = age
+
+ me = Author('Yang Zhou', 30)
+ me.job = 'Software Engineer'
+ print(me.job)
+ # AttributeError: 'Author' object has no attribute 'job'
+```
-###
-working on bert
-working on bert 单句分类
-working on bert NER
-working on bert 两句输入分类或其他
-working on 多分类任务
+
\ No newline at end of file
diff --git a/01_basic/arg_test.py b/01_basic/arg_test.py
index 0e65c6b..85f8a82 100644
--- a/01_basic/arg_test.py
+++ b/01_basic/arg_test.py
@@ -28,3 +28,4 @@
print(opt.dev_path)
print('done.')
+
diff --git a/03_pandas/README.md b/03_pandas/README.md
index c8be243..f1c7f58 100644
--- a/03_pandas/README.md
+++ b/03_pandas/README.md
@@ -1,5 +1,9 @@
## 目录
+[pandas进阶修炼300题](https://www.heywhale.com/mw/project/6146c0318447b8001769ff20)
+
+[可以替代pandas比较好用的数据平行处理包](#数据平行处理)
+
[**1. pandas并行包**](#pandas并行包)
[**2. pandas dataframe手动创建**](#pandas_dataframe手动创建)
@@ -40,8 +44,27 @@
[**20. dataframe表格填充**](#dataframe表格填充)
+[**21. 加快dataframe读取**](#加快dataframe读取)
+
+[**22. df热力图**](#df热力图)
+
+[**23. df热力地图**](#df热力地图)
+
+[**24. 2个pandas EDA插件**](#eda插件)
+
+[**25. python批量插入mysql数据库**](#python批量插入mysql数据库)
+
---
+### 数据平行处理
+
+[polar]
+https://pola-rs.github.io/polars-book/user-guide/quickstart/intro.html
+https://pola-rs.github.io/polars/py-polars/html/reference
+
+[pandarallel](https://nalepae.github.io/pandarallel/)
+
+
### pandas_dataframe手动创建
手动创建dataframe
@@ -321,4 +344,88 @@ df.loc[df.content_id=='x6mbO2rHfU3hTej4','sentiment_tmp'] = 1
df.fillna(method='ffill', axis=1).fillna(method='ffill')
```
+### 加快dataframe读取
+
+方式一:cpu多线程读取(推荐)
+```python
+#安装datatable==0.11.1
+import datatable as dtable
+train = dtable.fread(path+'train.csv').to_pandas()
+```
+方式二:gpu读取
+```python
+#安装cudf(稍微有点麻烦)
+import cudf
+train = cudf.read_csv(path+'train.csv').to_pandas()
+```
+
+### df热力图
+
+```python
+df.corr().style.background_gradient(cmap='coolwarm').set_precision(2)
+```
+
+### df热力地图
+
+结合pyecharts将各省市高校上榜数量进行地图可视化
+```python
+from pyecharts import options as opts
+from pyecharts.charts import Map
+#省份
+list1 = ['北京','江苏','上海','广东','湖北','陕西','浙江','四川','湖南','山东','安徽','辽宁','重庆','福建','天津','吉林','河南','黑龙江','江西','甘肃','云南','河北']
+#省份对应的高效数量
+list2 = [18, 15, 10, 9, 7, 7, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1]
+c = (
+ Map()
+ .add('', [list(z) for z in zip(list1,list2)], "china",is_map_symbol_show=False)
+ .set_global_opts(
+ title_opts=opts.TitleOpts(title="排名前100高校各省市占比"),
+ visualmap_opts=opts.VisualMapOpts(max_=20),
+
+
+ )
+)
+c.render_notebook()
+```
+
+### eda插件
+
+```python
+#插件一
+#!pip install pandas_profiling
+import pandas_profiling
+pandas_profiling.ProfileReport(df)
+#插件二
+import sweetviz as sv
+report = sv.analyze(df)
+report.show_html()
+```
+
+### python批量插入mysql数据库
+
+```python
+df.to_numpy()[:5].tolist()
+'''
+[['25_B', 25, 'B', 0.6, '2024-08-12'],
+ ['23_C', 23, 'C', 2.2, '2024-08-12'],
+ ['24_D', 24, 'D', 3.8, '2024-08-12'],
+ ['29_E', 29, 'E', 1.5, '2024-08-12'],
+ ['22_F', 22, 'F', 4.1, '2024-08-12']]
+'''
+
+import pymysql
+MYSQL_W_CONFIG = {'host':'10.xx.xxx.xx',
+ 'port':3306,
+ 'user':'user',
+ 'password':'passwd',
+ 'database':'mydatabase',
+ 'charset':'utf8'}
+conn = pymysql.connect(autocommit=True, **MYSQL_W_CONFIG)
+cursor = conn.cursor()
+sql = "insert into xx_table(id,cust_id,agcode,score,s_time) values(%s,%s,%s,%s,%s)"
+cursor.executemany(sql, df_final.to_numpy().tolist())
+conn.commit()
+conn.close()
+#1w条数据批量插入大概0.45s左右
+```
\ No newline at end of file
diff --git a/07_database/README.md b/07_database/README.md
index 11f962f..92f8bd9 100644
--- a/07_database/README.md
+++ b/07_database/README.md
@@ -5,7 +5,7 @@
# 先下载镜像
docker pull mysql:5.5
# 运行容器 可以先把-v去掉
-docker run -p 3306:3306 --name mymysql -v $PWD/conf:/etc/mysql/conf.d -v $PWD/logs:/logs -v $PWD/data:/var/lib/mysql -e MYSQL_ROOT_PASSWORD=123456 -d mysql:5.5
+docker run -p 3306:3306 --name mymysql -v $PWD/conf:/etc/mysql/conf.d -v $PWD/logs:/logs -v $PWD/mysql_data:/var/lib/mysql -e MYSQL_ROOT_PASSWORD=123456 -d mysql:5.5
-p 3306:3306:将容器的 3306 端口映射到主机的 3306 端口。
-v -v $PWD/conf:/etc/mysql/conf.d:将主机当前目录下的 conf/my.cnf 挂载到容器的 /etc/mysql/my.cnf。
@@ -32,9 +32,11 @@ db.commit()
```
## 2. Redis(docker version)
+
+
```
# 启动redis命令
-docker run --name docker-redis-test -p 6379:6379 -d redis:latest --requirepass "123456"
+docker run --name docker-redis-test -p 6379:6379 -v $PWD/redis_data:/data -d redis:latest --requirepass "123456"
# redis客户端连接命令
docker exec -it redis-cli
# 进去以后的操作
diff --git a/07_database/es.md b/07_database/es.md
new file mode 100644
index 0000000..a4a00cd
--- /dev/null
+++ b/07_database/es.md
@@ -0,0 +1,150 @@
+# es存入768维度向量,以及向量查询(ES版本需要7.3之后)
+
+https://github.com/UKPLab/sentence-transformers/blob/master/examples/applications/semantic-search/semantic_search_quora_elasticsearch.py
+
+
+```python
+"""
+This script contains an example how to perform semantic search with ElasticSearch.
+
+As dataset, we use the Quora Duplicate Questions dataset, which contains about 500k questions:
+https://www.quora.com/q/quoradata/First-Quora-Dataset-Release-Question-Pairs
+
+Questions are indexed to ElasticSearch together with their respective sentence
+embeddings.
+
+The script shows results from BM25 as well as from semantic search with
+cosine similarity.
+
+You need ElasticSearch (https://www.elastic.co/de/elasticsearch/) up and running. Further, you need the Python
+ElasticSearch Client installed: https://elasticsearch-py.readthedocs.io/en/master/
+
+As embeddings model, we use the SBERT model 'quora-distilbert-multilingual',
+that it aligned for 100 languages. I.e., you can type in a question in various languages and it will
+return the closest questions in the corpus (questions in the corpus are mainly in English).
+"""
+
+from sentence_transformers import SentenceTransformer, util
+import os
+from elasticsearch import Elasticsearch, helpers
+import csv
+import time
+import tqdm.autonotebook
+
+
+
+es = Elasticsearch()
+
+model = SentenceTransformer('quora-distilbert-multilingual')
+
+url = "http://qim.fs.quoracdn.net/quora_duplicate_questions.tsv"
+dataset_path = "quora_duplicate_questions.tsv"
+max_corpus_size = 100000
+
+#Download dataset if needed
+if not os.path.exists(dataset_path):
+ print("Download dataset")
+ util.http_get(url, dataset_path)
+
+#Get all unique sentences from the file
+all_questions = {}
+with open(dataset_path, encoding='utf8') as fIn:
+ reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_MINIMAL)
+ for row in reader:
+ all_questions[row['qid1']] = row['question1']
+ if len(all_questions) >= max_corpus_size:
+ break
+
+ all_questions[row['qid2']] = row['question2']
+ if len(all_questions) >= max_corpus_size:
+ break
+
+qids = list(all_questions.keys())
+questions = [all_questions[qid] for qid in qids]
+
+#Index data, if the index does not exists
+if not es.indices.exists(index="quora"):
+ try:
+ es_index = {
+ "mappings": {
+ "properties": {
+ "question": {
+ "type": "text"
+ },
+ "question_vector": {
+ "type": "dense_vector",
+ "dims": 768
+ }
+ }
+ }
+ }
+
+ es.indices.create(index='quora', body=es_index, ignore=[400])
+ chunk_size = 500
+ print("Index data (you can stop it by pressing Ctrl+C once):")
+ with tqdm.tqdm(total=len(qids)) as pbar:
+ for start_idx in range(0, len(qids), chunk_size):
+ end_idx = start_idx+chunk_size
+
+ embeddings = model.encode(questions[start_idx:end_idx], show_progress_bar=False)
+ bulk_data = []
+ for qid, question, embedding in zip(qids[start_idx:end_idx], questions[start_idx:end_idx], embeddings):
+ bulk_data.append({
+ "_index": 'quora',
+ "_id": qid,
+ "_source": {
+ "question": question,
+ "question_vector": embedding
+ }
+ })
+
+ helpers.bulk(es, bulk_data)
+ pbar.update(chunk_size)
+
+ except:
+ print("During index an exception occured. Continue\n\n")
+
+
+
+
+#Interactive search queries
+while True:
+ inp_question = input("Please enter a question: ")
+
+ encode_start_time = time.time()
+ question_embedding = model.encode(inp_question)
+ encode_end_time = time.time()
+
+ #Lexical search
+ bm25 = es.search(index="quora", body={"query": {"match": {"question": inp_question }}})
+
+ #Sematic search
+ sem_search = es.search(index="quora", body={
+ "query": {
+ "script_score": {
+ "query": {
+ "match_all": {}
+ },
+ "script": {
+ "source": "cosineSimilarity(params.queryVector, doc['question_vector']) + 1.0",
+ "params": {
+ "queryVector": question_embedding
+ }
+ }
+ }
+ }
+ })
+
+ print("Input question:", inp_question)
+ print("Computing the embedding took {:.3f} seconds, BM25 search took {:.3f} seconds, semantic search with ES took {:.3f} seconds".format(encode_end_time-encode_start_time, bm25['took']/1000, sem_search['took']/1000))
+
+ print("BM25 results:")
+ for hit in bm25['hits']['hits'][0:5]:
+ print("\t{}".format(hit['_source']['question']))
+
+ print("\nSemantic Search results:")
+ for hit in sem_search['hits']['hits'][0:5]:
+ print("\t{}".format(hit['_source']['question']))
+
+ print("\n\n========\n")
+```
\ No newline at end of file
diff --git a/07_database/faiss.md b/07_database/faiss.md
new file mode 100644
index 0000000..8cecb90
--- /dev/null
+++ b/07_database/faiss.md
@@ -0,0 +1,11 @@
+# faiss向量搜索库
+
+与es.md提到的es7.3向量搜索一样,faiss是更加专业的向量搜索工具
+
+[实战入门faiss搜索bert最邻近句子:docker CPU镜像开箱即用,无需额外安装下载](https://mp.weixin.qq.com/s?__biz=MzA4NzkxNzM3Nw==&mid=2457484515&idx=1&sn=c13b27b09b4a7e2a31a1ee421b362540&chksm=87bc8acdb0cb03db46ca7cc0893e46d4078e925a3b35f717806315c0881f6ad75b2165df4a0f&cur_album_id=2002019450945896449&scene=189#wechat_redirect)
+
+[semantic_search_quora_faiss.py](https://github.com/UKPLab/sentence-transformers/blob/master/examples/applications/semantic-search/semantic_search_quora_faiss.py)
+
+
+
+## todo
\ No newline at end of file
diff --git a/07_database/imgs/redis_pic.png b/07_database/imgs/redis_pic.png
new file mode 100644
index 0000000..06eb3ec
Binary files /dev/null and b/07_database/imgs/redis_pic.png differ
diff --git a/08_vscode/README.md b/08_vscode/README.md
new file mode 100644
index 0000000..1118417
--- /dev/null
+++ b/08_vscode/README.md
@@ -0,0 +1,80 @@
+# vscode使用(版本1.86.2)
+
+## 1. 在VScode中添加远程Linux服务器中Docker容器中的Python解释器
+
+**以dgx.6机器为例**
+```shell
+# 第一步 创建容器
+nvidia-docker run -d --name myllm -p 8891:22 -v $PWD/llm:/workspace/llm -w /workspace/llm -it 10.xx.xx.xxx/zhoubin/llm:py311-cuda12.1.0-cudnn8-devel-ubuntu22.04 /bin/bash
+注释:
+[-p 8891:22]:把docker的端口号22映射到服务器的端口号8891。
+[-d]:容器后台运行,避免退出容器后容器自动关闭。
+[-v]:挂载和同步目录,服务器和docker内有一个文件夹保持同步。
+[-it]:确保docker后台交互运行。
+[10.xx.xx.xxx/zhoubin/llm:py311-cuda12.1.0-cudnn8-devel-ubuntu22.04]:镜像名。
+[/bin/bash]:docker内要运行的指令。
+```
+```shell
+#第二步 在容器内安装ssh服务
+docker exec -it [容器ID] /bin/bash
+# 更新apt-get
+命令:apt-get update
+# 安装vim
+命令:apt-get install vim
+# 安装openssh-server
+命令:apt-get install openssh-server
+# 设置root密码(docker里面的用户名和密码,我这边账号密码都是root/root)
+命令:passwd
+```
+```shell
+# 第三步 配置/etc/ssh/sshd_config文件
+# 在文件/etc/ssh/sshd_config中添加下面的代码:
+PubkeyAuthentication yes
+PermitRootLogin yes
+
+# 第四步 重启ssh服务(好像每次停止容器后重启都需要运行下)
+/etc/init.d/ssh restart
+或 service ssh restart
+
+# 第五步 退出docker后,验证端口映射
+docker ps -a
+docker port [容器ID] 22
+若结果输出“0.0.0.0:8891”,则说明端口映射正确。
+```
+```shell
+# 第6步 本地电脑连接docker(见Termius dgx6_docker_llm)
+ssh root@11.xx.xx.xxx -p 8891 ,密码是root
+```
+```shell
+# 使用VSCode连接远程主机上的docker container
+# 打开VScode编辑器,按下快捷键“Ctrl+Shift+X”,查找安装“Remote Development”。安装完成后需要点击“reload”,然后按下快捷键“Ctrl+Shift+P”,输入“remote-ssh”,选择“open SSH Configuration file”,在文件xx/username/.ssh/config中添加如下内容:
+Host llm_docker #Host随便起名字
+ HostName 11.xxx.xx.x
+ User root
+ Port 8891
+
+#保存后,按下快捷键"Ctrl+Shift+P",输入"remote-ssh",选择"Connect to Host...",然后点击"llm_docker",接着选择“Linux”,最后按提示输入第三步中设置的root连接密码,在左下角显示"SSH:llm_docker",说明已经成功连接docker。
+```
+
+```shell
+#内网环境远程如果出现连接不上,大概率是vscode-server无法下载导致,可以手动搞定
+https://update.code.visualstudio.com/commit:903b1e9d8990623e3d7da1df3d33db3e42d80eda/server-linux-x64/stable
+
+具体参考附录中的[VSCode连不上远程服务器]
+```
+
+
+## 2. Debugging(自带,不需要额外安装插件)
+
+在Visual Studio Code(VSCode)中,[Debug Console](https://code.visualstudio.com/Docs/editor/debugging)是一个用于查看程序调试信息的窗口。它通常用于查看程序在调试过程中输出的日志信息、变量的值等。Debug Console提供了一个方便的方式来查看和分析程序的执行过程,帮助开发人员定位和解决代码中的问题。
+
+
+----
+
+[vscode历史版本下载地址](https://code.visualstudio.com/updates/v1_86)
+[vscode扩展应用市场vsix文件手动下载安装](https://marketplace.visualstudio.com/search?target=VSCode&category=All%20categories&sortBy=Installs)
+[vscode插件历史版本下载https://open-vsx.org](https://open-vsx.org/)
+[vscode扩展应用市场vsix文件手动下载历史版本插件包](https://blog.csdn.net/qq_15054345/article/details/133884626)
+[在VScode中添加Linux中的Docker容器中的Python解释器](https://blog.csdn.net/weixin_43268590/article/details/129244984)
+[VSCode连不上远程服务器](https://blog.csdn.net/qq_42610612/article/details/132782965)
+[无网机的vscode中怎么使用jupyter notebook](https://www.bilibili.com/read/cv34411972/?jump_opus=1)
diff --git a/09_remote_ipython/README.md b/09_remote_ipython/README.md
index a797579..19a1543 100644
--- a/09_remote_ipython/README.md
+++ b/09_remote_ipython/README.md
@@ -26,7 +26,7 @@ done!
1. 打开ipython
```python
-from notebook.auth import passwd
+from IPython.lib import passwd #from notebook.auth import passwd
In [2] : passwd() # 输入密码
Enter password:
Verify password:
diff --git a/10_docker/README.md b/10_docker/README.md
index 92a4827..b823beb 100644
--- a/10_docker/README.md
+++ b/10_docker/README.md
@@ -1,4 +1,4 @@
-# simple use for docker
+# [docker入门实践](https://yeasy.gitbook.io/docker_practice/)
## 1. docker安装及配置Docker镜像站
@@ -7,7 +7,7 @@
[docker docs for mac](https://docs.docker.com/docker-for-mac/)
1.2 linux下安装
-TODO
+[Install Docker Engine on Ubuntu](https://docs.docker.com/engine/install/ubuntu/)
1.3 配置docker镜像站
[docker镜像站网址](https://www.daocloud.io/mirror#accelerator-doc)
@@ -15,7 +15,8 @@ TODO
1.4 配置docker代理
- windows中右击图标,选settings->Proxies
- - [mac/linux](https://www.cnblogs.com/EasonJim/p/9988154.html)
+ - [mac](https://www.cnblogs.com/EasonJim/p/9988154.html)
+ - [linux](https://blog.csdn.net/qq_30034989/article/details/132021346)
```shell
# 如果使用HTTP代理服务器时,将为docker服务创建systemd插件目录
@@ -146,11 +147,21 @@ docker stop $(docker ps -a -q)
docker rm $(docker ps -a -q)
```
-2.12 docker修改完镜像生成新的镜像以后貌似没看法删除旧的镜像
+2.12 虚悬镜像
+
+上面的镜像列表中,还可以看到一个特殊的镜像,这个镜像既没有仓库名,也没有标签,均为 。
+```shell
+ 00285df0df87 5 days ago 342 MB
+```
+这个镜像原本是有镜像名和标签的,原来为 mongo:3.2,随着官方镜像维护,发布了新版本后,重新 docker pull mongo:3.2 时,mongo:3.2 这个镜像名被转移到了新下载的镜像身上,而旧的镜像上的这个名称则被取消,从而成为了 。除了 docker pull 可能导致这种情况,docker build 也同样可以导致这种现象。由于新旧镜像同名,旧镜像名称被取消,从而出现仓库名、标签均为 的镜像。这类无标签镜像也被称为 虚悬镜像(dangling image) ,可以用下面的命令专门显示这类镜像:
+```shell
+$ docker image ls -f dangling=true
+REPOSITORY TAG IMAGE ID CREATED SIZE
+ 00285df0df87 5 days ago 342 MB
+```
+一般来说,虚悬镜像已经失去了存在的价值,是可以随意删除的,可以用下面的命令删除。
```shell
-pip install -i https://pypi.tuna.tsinghua.edu.cn/simple numpy
-pandas sklearn jieba gensim tqdm flask requests PyMySQL redis pyahocorasick
-pymongo pyspark py2neo neo4j-driver==$PYTHON_DRIVER_VERSION
+$ docker image prune
```
2.13 拷贝宿主机本地文件到docker中,和从docker中拷贝到宿主机
@@ -170,6 +181,14 @@ docker stop `docker ps -a| grep python:3.6 | awk '{print $1}'`
docker rm `docker ps -a| grep python:3.6 | awk '{print $1}'`
```
+2.15 docker中python print不生效解决办法
+```shell
+#方法一 显式调用flush
+print("Hello www", flush=True)
+#方法二 使用 "-u" 参数执行 python 命令
+sudo nvidia-docker run -v $PWD/masr_bz:/workspace/masr_bz -w /workspace/masr_bz binzhouchn/pytorch:1.7-cuda10.1-cudnn7-masr python -u train.py
+```
+
## 3. docker镜像使用
@@ -411,6 +430,26 @@ docker pull stardog/stardog:latest
docker run -v ~/stardog-6.2.2/:/var/opt/stardog -e STARDOG_SERVER_JAVA_ARGS="-Xmx8g -Xms8g -XX:MaxDirectMemorySize=2g" stardog/stardog:latest
```
+
+3.8 容器云k8s
+
+Kubernetes是什么?Kubernetes是一个全新的基于容器技术的分布式架构解决方案,是Google开源的一个容器集群管理系统,Kubernetes简称K8S。Kubernetes 提供了完善的管理工具,这些工具涵盖了开发、部署测试、运维监控在内的各个环节。
+
+Kubernetes特性
+ - 自我修复:在节点故障时,重新启动失败的容器,替换和重新部署,保证预期的副本数量;杀死健康检查失败的容器,并且在未准备好之前不会处理用户的请求,确保线上服务不中断。
+ - 弹性伸缩:使用命令、UI或者基于CPU使用情况自动快速扩容和缩容应用程序实例,保证应用业务高峰并发时的高可用性;业务低峰时回收资源,以最小成本运行服务。
+ - 自动部署和回滚:K8S采用滚动更新策略更新应用,一次更新一个Pod,而不是同时删除所有Pod,如果更新过程中出现问题,将回滚更改,确保升级不影响业务。
+ - 服务发现和负载均衡:K8S为多个容器提供一个统一访问入口(内部IP地址和一个DNS名称),并且负载均衡关联的所有容器,使得用户无需考虑容器IP问题。
+ - 机密和配置管理:管理机密数据和应用程序配置,而不需要把敏感数据暴露在镜像里,提高敏感数据安全性。并可以将一些常用的配置存储在K8S中,方便应用程序使用。
+ - 存储编排:挂载外部存储系统,无论是来自本地存储,公有云,还是网络存储,都作为集群资源的一部分使用,极大提高存储使用灵活性。
+ - 批处理:提供一次性任务,定时任务;满足批量数据处理和分析的场景。
+
+[Kubernetes 深入学习(一) —— 入门和集群安装部署](https://www.cnblogs.com/chiangchou/p/k8s-1.html#_label0_0)
+[Kubernetes(一) 跟着官方文档从零搭建K8S](https://juejin.cn/post/6844903943051411469)
+[kubeadm部署k8s集群最全最详细](https://blog.csdn.net/Doudou_Mylove/article/details/103901732)
+
+
+
[RDF入门](https://blog.csdn.net/txlCandy/article/details/50959358)
[OWL语言](https://blog.csdn.net/zycxnanwang/article/details/86557350)
diff --git a/10_docker/newapi_docker_demo/README.md b/10_docker/newapi_docker_demo/README.md
new file mode 100644
index 0000000..39dcc95
--- /dev/null
+++ b/10_docker/newapi_docker_demo/README.md
@@ -0,0 +1,94 @@
+## Dockerfile
+
+```Dockerfile
+FROM node:16 as builder
+
+WORKDIR /build
+COPY web/package.json .
+RUN npm install
+COPY ./web .
+COPY ./VERSION .
+RUN DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(cat VERSION) npm run build
+
+FROM golang AS builder2
+
+ENV GO111MODULE=on \
+ CGO_ENABLED=1 \
+ GOOS=linux
+
+WORKDIR /build
+ADD go.mod go.sum ./
+RUN go mod download
+COPY . .
+COPY --from=builder /build/dist ./web/dist
+RUN go build -ldflags "-s -w -X 'one-api/common.Version=$(cat VERSION)' -extldflags '-static'" -o one-api
+
+FROM alpine
+
+RUN apk update \
+ && apk upgrade \
+ && apk add --no-cache ca-certificates tzdata \
+ && update-ca-certificates 2>/dev/null || true
+
+COPY --from=builder2 /build/one-api /
+EXPOSE 3000
+WORKDIR /data
+ENTRYPOINT ["/one-api"]
+```
+
+
+## Dockerfile 解析
+
+这个 Dockerfile 通过多个阶段构建一个含前端和后端组件的应用。每个阶段使用不同的基础镜像和步骤来完成特定的任务。
+
+### 第一阶段:前端构建(Node.js)
+
+- **基础镜像**:
+ - `FROM node:16 as builder`:使用 Node.js 16 版本的官方镜像作为基础镜像,并标记此构建阶段为 `builder`。
+- **设置工作目录**:
+ - `WORKDIR /build`:将工作目录设置为 `/build`。
+- **复制文件**:
+ - `COPY web/package.json .`:将前端代码目录下的 `package.json` 文件复制到工作目录中。
+- **安装依赖**:
+ - `RUN npm install`:根据 `package.json` 安装所需依赖。
+- **复制前端代码和版本文件**:
+ - `COPY ./web .`:将web文件夹下所有文件复制到工作目录。
+ - `COPY ./VERSION .`:将项目版本文件复制到工作目录。
+- **构建前端项目**:
+ - `RUN DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(cat VERSION) npm run build`:设置环境变量并执行前端构建脚本,生成生产环境用的前端文件。
+
+### 第二阶段:后端构建(Go)
+
+- **基础镜像**:
+ - `FROM golang AS builder2`:使用 Go 的官方镜像作为基础,并标记此阶段为 `builder2`。
+- **环境变量**:
+ - 设置多个环境变量,以支持 Go 的模块系统和确保生成的是适用于 Linux 的静态链接二进制文件。
+- **设置工作目录**:
+ - `WORKDIR /build`:设置工作目录。
+- **添加 Go 模块文件**:
+ - `ADD go.mod go.sum ./`:添加 Go 模块定义文件。
+- **下载依赖**:
+ - `RUN go mod download`:下载 Go 依赖。
+- **复制代码和前端构建产物**:
+ - `COPY . .`:复制所有后端代码到工作目录。
+ - `COPY --from=builder /build/dist ./web/dist`:从第一阶段中复制构建好的前端文件到后端服务目录中。
+- **编译应用**:
+ - `RUN go build -ldflags "-s -w -X 'one-api/common.Version=$(cat VERSION)' -extldflags '-static'" -o one-api`:使用 Go 编译命令构建应用,设置链接器选项以嵌入版本信息并优化二进制大小。
+
+### 第三阶段:运行环境
+
+- **基础镜像**:
+ - `FROM alpine`:使用轻量级的 Alpine Linux 镜像作为基础。
+- **安装证书和时区数据**:
+ - 运行一系列命令以安装必要的证书和时区数据,确保应用可以处理 HTTPS 连接和正确的时间。
+- **复制编译好的应用**:
+ - `COPY --from=builder2 /build/one-api /`:从第二阶段复制编译好的应用到根目录。
+- **端口和工作目录**:
+ - `EXPOSE 3000`:声明容器在运行时会监听 3000 端口。
+ - `WORKDIR /data`:设置工作目录,应用可能会使用此目录来存储数据。
+- **设置入口点**:
+ - `ENTRYPOINT ["/one-api"]`:设置容器启动时执行的命令。
+
+### 总结
+
+此 Dockerfile 首先构建前端资源,然后构建后端服务,并将前端资源集成到后端服务中,最后在一个轻量级容器中运行编译好的二进制文件,实现前后端的自动化构建和部署。
diff --git a/12_nginx/README.md b/12_nginx/README.md
index 96a4c31..15f65bc 100644
--- a/12_nginx/README.md
+++ b/12_nginx/README.md
@@ -1,11 +1,17 @@
## nginx
-[nginx作为http服务器-静态页面的访问](https://www.cnblogs.com/xuyang94/p/12667844.html)
-[docker nginx反向代理](https://www.cnblogs.com/dotnet261010/p/12596185.html)
-[nginx负载均衡参考1](https://www.jianshu.com/p/4c250c1cd6cd)
-[nginx负载均衡参考2](https://www.cnblogs.com/diantong/p/11208508.html)
+[**1. nginx入门使用**](#nginx入门使用)
+
+[**2. nginx正则使用1(2024.4.2更新)**](#nginx正则使用1)
+
+
-### nginx使用
+---
+
+### nginx入门使用
+
+
+点击展开
**1. 第一步用安装docker nginx**
@@ -72,4 +78,31 @@ docker run --name=nginx -d -p 4030:4030 nginx
def custom():
return str(3 + 2)
```
-[配置文件2](default2.conf)
\ No newline at end of file
+[配置文件2](default2.conf)
+
+
+
+### nginx正则使用1
+
+```shell
+cd /etc/nginx/conf.d
+#修改后重启
+systemctl restart nginx
+nginx -s reload
+```
+[配置文件3](default3.conf)
+
+说明:本次使用正则的目的是当我访问
+http://10.28.xx.xx:8000/aimanager_gpu/recsys/时,
+正则匹配后转到http://localhost:10086,后面不加/aimanager_gpu/recsys路由
+(如果不走正则那么proxy_pass转到http://localhost:10086后会自动拼接/aimanager_gpu/recsys)
+
+
+
+
+ - 参考资料
+
+[nginx作为http服务器-静态页面的访问](https://www.cnblogs.com/xuyang94/p/12667844.html)
+[docker nginx反向代理](https://www.cnblogs.com/dotnet261010/p/12596185.html)
+[nginx负载均衡参考1](https://www.jianshu.com/p/4c250c1cd6cd)
+[nginx负载均衡参考2](https://www.cnblogs.com/diantong/p/11208508.html)
\ No newline at end of file
diff --git a/12_nginx/default3.conf b/12_nginx/default3.conf
new file mode 100644
index 0000000..2253947
--- /dev/null
+++ b/12_nginx/default3.conf
@@ -0,0 +1,15 @@
+upstream recsys {
+ server localhost:10086;
+ }
+
+server {
+ server_name localhost;
+ listen 8000;
+ location ~* /aimanager_gpu/recsys/ {
+ if ($request_uri ~ /aimanager_gpu/recsys/(.+))
+ {
+ set $rightUrl $1;
+ }
+ proxy_pass http://recsys/$rightUrl;
+ }
+}
\ No newline at end of file
diff --git a/14_go/README.md b/14_go/README.md
new file mode 100644
index 0000000..a3e9da9
--- /dev/null
+++ b/14_go/README.md
@@ -0,0 +1,192 @@
+# python调用golang
+
+## 示例一 python端输入int返回int
+
+```Go
+package main
+
+import (
+ "C"
+)
+
+func f1(x int) int {
+ return x*x + 2
+}
+
+//export Fib
+func Fib(n int) int {
+ if n == 1 || n == 2 {
+ return 1
+ } else {
+ return Fib(n-1) + Fib(n-2) + f1(1)
+ }
+}
+
+func main() {}
+```
+
+//go build -buildmode=c-shared -o _fib.so fib.go
+//参考链接https://blog.csdn.net/cainiao_python/article/details/107724309
+//将_fib.so文件拷贝到python文件夹下
+
+```python
+import ctypes
+import time
+from ctypes import *
+so = ctypes.cdll.LoadLibrary('./_fib.so')
+start = time.time()
+result = so.Fib(40)
+end = time.time()
+print(f'斐波那契数列第40项:{result},耗时:{end - start}')
+```
+
+## 示例二 python端输入string返回string(推荐看示例三)
+
+```Go
+package main
+
+import (
+ "C"
+ "database/sql"
+ "log"
+ "strings"
+
+ _ "github.com/go-sql-driver/mysql"
+)
+
+//export Gdbc
+func Gdbc(uri *C.char) string {
+ log.Println(uri)
+ db, err := sql.Open("mysql", C.GoString(uri))
+ if err != nil {
+ log.Fatalln(err)
+ }
+ rows, err := db.Query("SELECT feature_word FROM insurance_qa.feature_words")
+ if err != nil {
+ log.Fatalln(err)
+ }
+ res := []string{}
+ for rows.Next() {
+ var s string
+ err = rows.Scan(&s)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ // log.Printf("found row containing %q", s)
+ res = append(res, s)
+ }
+ rows.Close()
+ return strings.Join(res, ",")
+}
+
+func main() {
+ // res := Gdbc("username:password@tcp(localhost:3306)/database?charset=utf8")
+ // fmt.Println(res)
+}
+```
+//go build -buildmode=c-shared -o _gdbc.so test.go
+//将_gdbc.so文件拷贝到python文件夹下
+
+```python
+import ctypes
+import time
+from ctypes import *
+class StructPointer(Structure):
+ _fields_ = [("p", c_char_p), ("n", c_longlong)]
+
+so = ctypes.cdll.LoadLibrary('./_gdbc.so')
+so.Gdbc.restype = StructPointer
+start = time.time()
+uri = "username:password@tcp(localhost:3306)/database?charset=utf8"
+res = so.Gdbc(uri.encode("utf-8"))
+print(res.n)
+print(res.p[:res.n].decode())#print(res.p.decode())这样貌似也没问题
+end = time.time()
+print(f'耗时:{end - start}')
+```
+
+## 示例三 python端输入string,go查询数据库然后返回json str
+
+```Go
+package main
+
+import (
+ "C"
+ "database/sql"
+ "encoding/json"
+ "log"
+
+ _ "github.com/go-sql-driver/mysql"
+)
+
+type Fw struct {
+ feature_word string
+ word_type string
+ id int64
+}
+
+//export Gdbc
+func Gdbc(uri *C.char) string {
+ db, err := sql.Open("mysql", C.GoString(uri))
+ //设置数据库最大连接数
+ db.SetConnMaxLifetime(100)
+ //设置上数据库最大闲置连接数
+ db.SetMaxIdleConns(10)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ rows, err := db.Query("SELECT feature_word,word_type,id FROM insurance_qa.feature_words")
+ if err != nil {
+ log.Fatalln(err)
+ }
+ res := [][]interface{}{}
+ var fw Fw
+ for rows.Next() {
+ err = rows.Scan(&fw.feature_word, &fw.word_type, &fw.id)
+ if err != nil {
+ log.Fatalln(err)
+ }
+ // log.Printf("found row containing %q", s)
+ tmp := []interface{}{}
+ tmp = append(tmp, fw.feature_word)
+ tmp = append(tmp, fw.word_type)
+ tmp = append(tmp, fw.id)
+ res = append(res, tmp)
+ // res = append(res, []interface{}{fw.feature_word, fw.word_type, fw.id})//上面的一行写法
+ }
+ rows.Close()
+ b, err := json.Marshal(res)
+ if err != nil {
+ panic(err)
+ }
+ result := string(b)
+ return result
+}
+
+func main() {}
+
+```
+
+//go build -buildmode=c-shared -o _gdbc.so test.go
+//将_gdbc.so文件拷贝到python文件夹下
+
+```python
+import ctypes
+import time
+import json
+from ctypes import *
+class StructPointer(Structure):
+ _fields_ = [("p", c_char_p), ("n", c_longlong)]
+
+so = ctypes.cdll.LoadLibrary('./_gdbc.so')
+so.Gdbc.restype = StructPointer
+start = time.time()
+uri = "username:password@tcp(localhost:3306)/database?charset=utf8"
+res = so.Gdbc(uri.encode("utf-8"))
+print(res.n)
+print(res.p.decode())
+print(json.loads(res.p.decode()))
+end = time.time()
+```
+
+##
\ No newline at end of file
diff --git a/15_ansible/README.md b/15_ansible/README.md
new file mode 100644
index 0000000..9b2aff0
--- /dev/null
+++ b/15_ansible/README.md
@@ -0,0 +1,66 @@
+# ansible笔记
+
+```shell
+在/etc/ansible/ansible.cfg下配置[model]
+# ping
+ansible model -m ping
+# ansible-playbook写剧本
+ansible-playbook xxx.yaml
+# 传文件
+ansible model -m copy -a "src=./test.txt dest=/home/zhoubin"
+# 创建文件(ansible-playbook形式)
+- hosts: model
+ remote_user: zhoubin
+ tasks:
+ - name: "create test2.txt in the /etc directory"
+ file:
+ path: "/home/zhoubin/test2.txt"
+ state: "touch"
+# 创建文件夹(ansible-playbook形式)
+- hosts: model
+ remote_user: zhoubin
+ tasks:
+ - name: "create tmp file in the /etc directory"
+ file:
+ path: "/home/zhoubin/tmp"
+ state: "directory"
+# 删除文件(ansible-playbook形式)
+- hosts: model
+ remote_user: zhoubin
+ tasks:
+ - name: "delete test.txt in the /etc directory"
+ file:
+ path: "/home/zhoubin/test.txt"
+ state: "absent"
+# 删除多个文件(ansible-playbook形式)
+- hosts: model
+ remote_user: zhoubin
+ tasks:
+ - name: "delete multi files in the /etc directory"
+ file:
+ path: "{{ item }}"
+ state: "absent"
+ with_items:
+ - /home/zhoubin/test1.txt
+ - /home/zhoubin/test2.txt
+# 将远程服务器文件拷贝到本机
+ansible model -m fetch -a "src=/home/zhoubin/test.txt dest=./ force=yes backup=yes"
+
+# 写一个剧本(传docker镜像并且加载) become:yes可以避免sudo输密码!
+- hosts: model
+ remote_user: zhoubin
+ tasks:
+ - name: copy docker image
+ copy: src=./py37.tar.gz dest=/home/zhoubin
+ - name: load image
+ shell: docker load -i /home/zhoubin/py37.tar.gz
+ become: yes
+
+
+```
+
+
+### 附录
+
+[超简单ansible2.4.2.0与playbook入门教程](https://blog.csdn.net/qq_45206551/article/details/105004233)
+[ansible-命令使用说明](https://www.cnblogs.com/scajy/p/11353825.html)
diff --git a/99_pycharm_archive/.DS_Store b/99_pycharm_archive/.DS_Store
new file mode 100644
index 0000000..bec7f6f
Binary files /dev/null and b/99_pycharm_archive/.DS_Store differ
diff --git a/08_pycharm/README.md b/99_pycharm_archive/README.md
similarity index 100%
rename from 08_pycharm/README.md
rename to 99_pycharm_archive/README.md
diff --git a/08_pycharm/pic/pycharm_activ.png b/99_pycharm_archive/pic/pycharm_activ.png
similarity index 100%
rename from 08_pycharm/pic/pycharm_activ.png
rename to 99_pycharm_archive/pic/pycharm_activ.png
diff --git a/08_pycharm/pic/pycharm_git1.png b/99_pycharm_archive/pic/pycharm_git1.png
similarity index 100%
rename from 08_pycharm/pic/pycharm_git1.png
rename to 99_pycharm_archive/pic/pycharm_git1.png
diff --git a/08_pycharm/pic/pycharm_git2.png b/99_pycharm_archive/pic/pycharm_git2.png
similarity index 100%
rename from 08_pycharm/pic/pycharm_git2.png
rename to 99_pycharm_archive/pic/pycharm_git2.png
diff --git a/08_pycharm/pic/pycharm_remote1.png b/99_pycharm_archive/pic/pycharm_remote1.png
similarity index 100%
rename from 08_pycharm/pic/pycharm_remote1.png
rename to 99_pycharm_archive/pic/pycharm_remote1.png
diff --git a/08_pycharm/pic/pycharm_remote2.png b/99_pycharm_archive/pic/pycharm_remote2.png
similarity index 100%
rename from 08_pycharm/pic/pycharm_remote2.png
rename to 99_pycharm_archive/pic/pycharm_remote2.png
diff --git a/08_pycharm/pic/pycharm_remote3.png b/99_pycharm_archive/pic/pycharm_remote3.png
similarity index 100%
rename from 08_pycharm/pic/pycharm_remote3.png
rename to 99_pycharm_archive/pic/pycharm_remote3.png
diff --git a/08_pycharm/pic/pycharm_remote4.png b/99_pycharm_archive/pic/pycharm_remote4.png
similarity index 100%
rename from 08_pycharm/pic/pycharm_remote4.png
rename to 99_pycharm_archive/pic/pycharm_remote4.png
diff --git a/08_pycharm/pic/pycharm_remote5.png b/99_pycharm_archive/pic/pycharm_remote5.png
similarity index 100%
rename from 08_pycharm/pic/pycharm_remote5.png
rename to 99_pycharm_archive/pic/pycharm_remote5.png
diff --git "a/99_pycharm_archive/\346\277\200\346\264\273\347\240\201/.DS_Store" "b/99_pycharm_archive/\346\277\200\346\264\273\347\240\201/.DS_Store"
new file mode 100644
index 0000000..07fbdca
Binary files /dev/null and "b/99_pycharm_archive/\346\277\200\346\264\273\347\240\201/.DS_Store" differ
diff --git "a/08_pycharm/\346\277\200\346\264\273\347\240\201/jetbrains-agent.jar" "b/99_pycharm_archive/\346\277\200\346\264\273\347\240\201/jetbrains-agent.jar"
similarity index 100%
rename from "08_pycharm/\346\277\200\346\264\273\347\240\201/jetbrains-agent.jar"
rename to "99_pycharm_archive/\346\277\200\346\264\273\347\240\201/jetbrains-agent.jar"
diff --git "a/08_pycharm/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\201.txt" "b/99_pycharm_archive/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\201.txt"
similarity index 100%
rename from "08_pycharm/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\201.txt"
rename to "99_pycharm_archive/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\201.txt"
diff --git "a/08_pycharm/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2011.txt" "b/99_pycharm_archive/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2011.txt"
similarity index 100%
rename from "08_pycharm/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2011.txt"
rename to "99_pycharm_archive/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2011.txt"
diff --git "a/08_pycharm/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2012.txt" "b/99_pycharm_archive/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2012.txt"
similarity index 100%
rename from "08_pycharm/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2012.txt"
rename to "99_pycharm_archive/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2012.txt"
diff --git "a/08_pycharm/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2013.txt" "b/99_pycharm_archive/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2013.txt"
similarity index 100%
rename from "08_pycharm/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2013.txt"
rename to "99_pycharm_archive/\346\277\200\346\264\273\347\240\201/\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/\346\277\200\346\264\273\347\240\2013.txt"
diff --git "a/08_pycharm/\346\277\200\346\264\273\347\240\201/\351\235\236\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/Pycharm\346\226\271\345\274\217\344\270\200\346\277\200\346\264\273\347\240\201\346\261\207\346\200\273.docx" "b/99_pycharm_archive/\346\277\200\346\264\273\347\240\201/\351\235\236\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/Pycharm\346\226\271\345\274\217\344\270\200\346\277\200\346\264\273\347\240\201\346\261\207\346\200\273.docx"
similarity index 100%
rename from "08_pycharm/\346\277\200\346\264\273\347\240\201/\351\235\236\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/Pycharm\346\226\271\345\274\217\344\270\200\346\277\200\346\264\273\347\240\201\346\261\207\346\200\273.docx"
rename to "99_pycharm_archive/\346\277\200\346\264\273\347\240\201/\351\235\236\346\260\270\344\271\205\346\277\200\346\264\273\347\240\201/Pycharm\346\226\271\345\274\217\344\270\200\346\277\200\346\264\273\347\240\201\346\261\207\346\200\273.docx"
diff --git a/README.md b/README.md
index fbfa3cd..561cd7a 100644
--- a/README.md
+++ b/README.md
@@ -2,23 +2,28 @@
[](https://github.com/binzhouchn/feature_engineering)
# python笔记
-> 版本:0.3
+> 版本:0.5
> 作者:binzhou
> 邮件:binzhouchn@gmail.com
`Github`加载`ipynb`的速度较慢,建议在 [Nbviewer](http://nbviewer.ipython.org/github/lijin-THU/notes-python/blob/master/index.ipynb) 中查看该项目。
+[python各版本下载仓库](https://www.python.org/ftp/python/)
+
---
## 简介
-默认安装了 `Python 3.8`,以及相关的第三方包 `gensim`, `tqdm`, `flask`
+默认安装了 `Python 3.10`,以及相关的第三方包 `gensim`, `tqdm`, `flask`
+
+anaconda 虚拟环境创建python版本降级命令:conda create -n tableqa python=3.9
> life is short.use python.
推荐使用[Anaconda](http://www.continuum.io/downloads),这个IDE集成了大部分常用的包。
-### python pip使用国内镜像
+
+pip使用国内镜像
[让python pip使用国内镜像](https://www.cnblogs.com/wqpkita/p/7248525.html)
```shell
@@ -33,7 +38,10 @@ pip install -i http://pypi.douban.com/simple --trusted-host pypi.douban.com flas
pip --proxy=proxyAddress:port install -i http://pypi.douban.com/simple --trusted-host pypi.douban.com flask
```
-### pip镜像配置
+
+
+
+pip镜像配置
pip install镜像配置(Linux)
```
@@ -55,3 +63,28 @@ index-url = http://mirrors.aliyun.com/pypi/simple/
[install]
trusted-host = mirrors.aliyun.com
```
+
+
+## 使用conda升级到python3.12
+
+方法一
+https://qa.1r1g.com/sf/ask/4099772281/)
+```shell
+conda update -n base -c defaults conda
+conda install -c anaconda python=3.12
+#然后再重新安装下依赖包
+```
+方法二(或使用虚拟环境)
+```
+$ conda create -p /your_path/env_name python=3.12
+# 激活环境
+$ source activate /your_path/env_name
+# 关闭环境
+$ source deactivate /your_path/env_name
+# 删除环境
+$ conda env remove -p /your_path/env_name
+```
+
+## 其他python仓库推荐
+
+[All algorithms implemented in Python - for education](https://github.com/TheAlgorithms/Python/)