采用 gemini 模型实现语言学习模型(llm)的 SQL 问答功能,将自然语言转化为 SQL 语句(即 nl2sql)。该过程利用 llm 的 SQL 生成能力,将用户复杂的自然语言请求解析后映射到相应的 SQL 查询,从而更精准的获取数据库信息。这种技术在数据库管理、信息检索等方面具有重要应用价值。
代码来自于 ,原仓库是使用 streamlit 的,简单试用下就不要 streamlit 了。
1. gemini 生成 sql
import os | |
import pymysql | |
import streamlit as st | |
import google.generativeai as genai | |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) | |
## Function To Load Google Gemini Model and provide queries as response | |
def get_gemini_response(question,prompt): | |
model=genai.GenerativeModel('gemini-pro') | |
response=model.generate_content([prompt[0], question]) | |
return response.text |
2. pysql 执行 sql 语句
import pymysql | |
def read_sql_query(sql, db_config): | |
# db_config is a dictionary containing your MySQL connection parameters | |
# Example: {'host': 'your_host', 'user': 'your_user', 'password': 'your_password', 'database': 'your_database'} | |
# Connect to the MySQL database | |
conn = pymysql.connect(**db_config) | |
try: | |
# Create a cursor object | |
with conn.cursor() as cur: | |
# Execute the SQL query | |
cur.execute(sql) | |
# Fetch all the rows | |
rows = cur.fetchall() | |
Print the rows | |
for row in rows: | |
print(row) | |
return rows | |
finally: | |
# Close the connection when done | |
conn.close() | |
#测试 | |
sql_query = "SELECT * FROM STUDENT;" | |
database_config = {'host': 'localhost', 'port': 61106, 'user': 'root', 'password': 'rn', 'database': 'gemini'} | |
result = read_sql_query(sql_query, database_config) |
3. 实际应用
prompt=[ | |
""" | |
You are an expert in converting English questions to SQL query! | |
The SQL database has the name STUDENT and has the following columns - NAME, CLASS, | |
SECTION \n\nFor example,\nExample 1 - How many entries of records are present?, | |
the SQL command will be something like this SELECT COUNT(*) FROM STUDENT ; | |
\nExample 2 - Tell me all the students studying in Data Science class?, | |
the SQL command will be something like this SELECT * FROM STUDENT | |
where CLASS="Data Science"; | |
also the sql code should not have ``` in beginning or end and sql word in output | |
""" | |
] | |
question="Tell who is the highest MARKS in Data Science class?" | |
sql_cmd = get_gemini_response(question,prompt) | |
print("Gemini generate SQL:", sql_cmd) | |
response = read_sql_query(sql_cmd, database_config) | |
print("SQL query result:", response) |
执行效果如下,也有可能 sql 语句不对的,要是用 nl2sql 的数据微调下会比较好。
我使用的是 pymysql, 这是插入 mysql 部分代码。
也可以建表后直接命令行插入:
INSERT INTO STUDENT (NAME, CLASS, SECTION, MARKS) VALUES | |
('Krish', 'Data Science', 'A', 90), | |
('Sudhanshu', 'Data Science', 'B', 100), | |
('Darius', 'Data Science', 'A', 86), | |
('Vikash', 'DEVOPS', 'A', 50), | |
('Dipesh', 'DEVOPS', 'A', 35); |
create_db.py
import pymysql | |
# MySQL 连接 | |
mysql_connection = pymysql.connect( | |
host="145.22.xx.xx", | |
port=6306, | |
user="root", | |
password="xxxx", | |
database="gemini", | |
cursorclass=pymysql.cursors.DictCursor # 设置字典游标以获取字典格式的结果 | |
) | |
mysql_cursor = mysql_connection.cursor() | |
# 创建 MySQL 表结构 | |
mysql_cursor.execute(""" | |
CREATE TABLE STUDENT (NAME VARCHAR(25), | |
CLASS VARCHAR(25), | |
SECTION VARCHAR(25), | |
MARKS INT | |
) | |
""") | |
# 插入数据 | |
data_to_insert = [ | |
('Krish', 'Data Science', 'A', 90), | |
('Sudhanshu', 'Data Science', 'B', 100), | |
('Darius', 'Data Science', 'A', 86), | |
('Vikash', 'DEVOPS', 'A', 50), | |
('Dipesh', 'DEVOPS', 'A', 35) | |
] | |
mysql_cursor.executemany('''INSERT INTO STUDENT VALUES (%s, %s, %s, %s)''', data_to_insert) | |
# 提交更改 | |
mysql_connection.commit() | |
# 显示插入的数据 | |
mysql_cursor.execute('''SELECT * FROM STUDENT''') | |
result = mysql_cursor.fetchall() | |
for row in result: | |
print(row) | |
# 关闭连接 | |
mysql_connection.close() |
正文完