Files
speech-to-text/lib/summarize.py

106 lines
3.2 KiB
Python

import json
import http.client
import urllib.parse
import urllib.error
SUMMARY_PROMPT = """You are a meeting assistant. Given the following transcript, produce a structured summary with these sections:
## Summary
3-5 bullet points capturing the key information discussed.
## Action Items
Numbered list of concrete action items, with owners if mentioned.
## Key Decisions
Any decisions that were made or conclusions reached.
If a section has nothing relevant, omit it entirely. Be concise and factual. Use the same language as the transcript.
---
Transcript:
"""
def summarize(text, llm_url, api_key="", model="gpt-4o-mini"):
url = llm_url.rstrip("/") + "/chat/completions"
messages = [
{"role": "system", "content": SUMMARY_PROMPT},
{"role": "user", "content": text},
]
body = json.dumps({
"model": model,
"messages": messages,
"temperature": 0.3,
"max_tokens": 2048,
}, ensure_ascii=False).encode("utf-8")
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
parsed = urllib.parse.urlparse(url)
conn = http.client.HTTPSConnection(parsed.hostname, parsed.port or 443) if parsed.scheme == "https" else http.client.HTTPConnection(parsed.hostname, parsed.port or 80)
conn.request("POST", parsed.path or "/", body=body, headers=headers)
resp = conn.getresponse()
data = json.loads(resp.read())
conn.close()
return data["choices"][0]["message"]["content"]
def summarize_stream(text, llm_url, api_key="", model="gpt-4o-mini"):
url = llm_url.rstrip("/") + "/chat/completions"
messages = [
{"role": "system", "content": SUMMARY_PROMPT},
{"role": "user", "content": text},
]
body = json.dumps({
"model": model,
"messages": messages,
"temperature": 0.3,
"max_tokens": 2048,
"stream": True,
}, ensure_ascii=False).encode("utf-8")
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
parsed = urllib.parse.urlparse(url)
if parsed.scheme == "https":
conn = http.client.HTTPSConnection(parsed.hostname, parsed.port or 443)
else:
conn = http.client.HTTPConnection(parsed.hostname, parsed.port or 80)
conn.request("POST", parsed.path or "/", body=body, headers=headers)
resp = conn.getresponse()
if resp.status != 200:
err_body = resp.read().decode("utf-8", "replace")[:500]
conn.close()
raise urllib.error.HTTPError(url, resp.status, resp.reason, dict(resp.getheaders()), None)
while True:
raw_line = resp.readline()
if not raw_line:
break
line = raw_line.decode("utf-8", "replace").strip()
if not line:
continue
if line == "data: [DONE]":
conn.close()
return
if line.startswith("data: "):
data = line[6:]
try:
obj = json.loads(data)
delta = obj.get("choices", [{}])[0].get("delta", {})
text_part = delta.get("content", "")
if text_part:
yield text_part
except json.JSONDecodeError:
pass
conn.close()