Skip to content

Commit b5f1ca5

Browse files
author
calderbuild
committed
fix: remove heavy unused deps (jieba/tiktoken) to prevent Render OOM
- Remove jieba (~150MB) from seo_content.py; extract_keywords() was never called - Replace tiktoken (~80MB) with UTF-8 byte estimation in app/llm.py - Remove 6 dead dependencies: jieba, tiktoken, redis, websockets, beautifulsoup4, markdown2 - Fix keep-alive workflow: add continue-on-error + || true so cold starts don't fail the job
1 parent c0ab42f commit b5f1ca5

File tree

5 files changed

+49
-103
lines changed

5 files changed

+49
-103
lines changed

.github/workflows/keep-alive.yml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,27 +13,30 @@ jobs:
1313
timeout-minutes: 2
1414
steps:
1515
- name: Ping Health Endpoint
16+
continue-on-error: true
1617
run: |
17-
response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 https://meetspot-irq2.onrender.com/health)
18+
response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 https://meetspot-irq2.onrender.com/health || true)
1819
echo "Health check response: $response"
1920
if [ "$response" != "200" ]; then
2021
echo "Warning: Health check returned non-200 status"
2122
fi
2223
2324
- name: Ping Sitemap (SEO critical)
25+
continue-on-error: true
2426
run: |
25-
sitemap_response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 https://meetspot-irq2.onrender.com/sitemap.xml)
27+
sitemap_response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 https://meetspot-irq2.onrender.com/sitemap.xml || true)
2628
echo "Sitemap response: $sitemap_response"
2729
2830
- name: Ping Sitemap as Googlebot (simulate crawler)
31+
continue-on-error: true
2932
run: |
30-
# Simulate Googlebot to ensure server responds to Google's crawler
3133
googlebot_response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 \
3234
-H "User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" \
33-
https://meetspot-irq2.onrender.com/sitemap.xml)
35+
https://meetspot-irq2.onrender.com/sitemap.xml || true)
3436
echo "Sitemap (Googlebot UA) response: $googlebot_response"
3537
3638
- name: Ping Robots.txt
39+
continue-on-error: true
3740
run: |
38-
robots_response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 https://meetspot-irq2.onrender.com/robots.txt)
41+
robots_response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 https://meetspot-irq2.onrender.com/robots.txt || true)
3942
echo "Robots.txt response: $robots_response"

api/services/seo_content.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8,48 +8,10 @@
88
from functools import lru_cache
99
from typing import Dict, List
1010

11-
import jieba
12-
import jieba.analyse
13-
1411

1512
class SEOContentGenerator:
1613
"""封装SEO内容生成逻辑."""
1714

18-
def __init__(self) -> None:
19-
self.custom_words = [
20-
"聚会地点",
21-
"会面点",
22-
"中点推荐",
23-
"团队聚会",
24-
"远程团队",
25-
"咖啡馆",
26-
"餐厅",
27-
"图书馆",
28-
"共享空间",
29-
"北京",
30-
"上海",
31-
"广州",
32-
"深圳",
33-
"杭州",
34-
"成都",
35-
"meeting location",
36-
"midpoint",
37-
"group meeting",
38-
]
39-
for word in self.custom_words:
40-
jieba.add_word(word)
41-
42-
def extract_keywords(self, text: str, top_k: int = 10) -> List[str]:
43-
"""基于TF-IDF提取关键词."""
44-
if not text:
45-
return []
46-
return jieba.analyse.extract_tags(
47-
text,
48-
topK=top_k,
49-
withWeight=False,
50-
allowPOS=("n", "nr", "ns", "nt", "nw", "nz", "v", "vn"),
51-
)
52-
5315
def generate_meta_tags(self, page_type: str, data: Dict) -> Dict[str, str]:
5416
"""根据页面类型生成Meta标签."""
5517
if page_type == "homepage":

app/llm.py

Lines changed: 40 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import math
22
from typing import Dict, List, Optional, Union
33

4-
import tiktoken
54
from openai import (APIError, AsyncAzureOpenAI, AsyncOpenAI,
65
AuthenticationError, OpenAIError, RateLimitError)
76
from openai.types.chat import ChatCompletion
@@ -38,12 +37,9 @@ class TokenCounter:
3837
HIGH_DETAIL_TARGET_SHORT_SIDE = 768
3938
TILE_SIZE = 512
4039

41-
def __init__(self, tokenizer):
42-
self.tokenizer = tokenizer
43-
4440
def count_text(self, text: str) -> int:
45-
"""Calculate tokens for a text string"""
46-
return 0 if not text else len(self.tokenizer.encode(text))
41+
"""Estimate tokens for a text string using UTF-8 byte length."""
42+
return 0 if not text else len(text.encode("utf-8")) // 3
4743

4844
def count_image(self, image_item: dict) -> int:
4945
"""
@@ -197,13 +193,6 @@ def __init__(
197193
else None
198194
)
199195

200-
# Initialize tokenizer
201-
try:
202-
self.tokenizer = tiktoken.encoding_for_model(self.model)
203-
except KeyError:
204-
# If the model is not in tiktoken's presets, use cl100k_base as default
205-
self.tokenizer = tiktoken.get_encoding("cl100k_base")
206-
207196
if self.api_type == "azure":
208197
self.client = AsyncAzureOpenAI(
209198
base_url=self.base_url,
@@ -213,13 +202,13 @@ def __init__(
213202
else:
214203
self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)
215204

216-
self.token_counter = TokenCounter(self.tokenizer)
205+
self.token_counter = TokenCounter()
217206

218207
def count_tokens(self, text: str) -> int:
219-
"""Calculate the number of tokens in a text"""
208+
"""Estimate the number of tokens in a text using UTF-8 byte length."""
220209
if not text:
221210
return 0
222-
return len(self.tokenizer.encode(text))
211+
return len(text.encode("utf-8")) // 3
223212

224213
def count_message_tokens(self, messages: List[dict]) -> int:
225214
return self.token_counter.count_message_tokens(messages)
@@ -576,41 +565,41 @@ async def ask_with_images(
576565
)
577566

578567
# Handle non-streaming request
579-
if not stream:
580-
response = await self.client.chat.completions.create(**params)
581-
582-
if not response.choices or not response.choices[0].message.content:
583-
raise ValueError("Empty or invalid response from LLM")
584-
585-
self.update_token_count(
586-
response.usage.prompt_tokens, response.usage.completion_tokens
587-
)
588-
return response.choices[0].message.content
589-
590-
# Handle streaming request
591-
response = await self.client.chat.completions.create(**params)
592-
593-
collected_messages = []
594-
completion_text = ""
595-
async for chunk in response:
596-
chunk_message = chunk.choices[0].delta.content or ""
597-
collected_messages.append(chunk_message)
598-
completion_text += chunk_message
599-
print(chunk_message, end="", flush=True)
600-
601-
print() # Newline after streaming
602-
full_response = "".join(collected_messages).strip()
603-
604-
if not full_response:
605-
raise ValueError("Empty response from streaming LLM")
606-
607-
completion_tokens = self.count_tokens(completion_text)
608-
logger.info(
609-
f"Estimated completion tokens for streaming response with images: {completion_tokens}"
610-
)
611-
self.update_token_count(input_tokens, completion_tokens)
612-
613-
return full_response
568+
if not stream:
569+
response = await self.client.chat.completions.create(**params)
570+
571+
if not response.choices or not response.choices[0].message.content:
572+
raise ValueError("Empty or invalid response from LLM")
573+
574+
self.update_token_count(
575+
response.usage.prompt_tokens, response.usage.completion_tokens
576+
)
577+
return response.choices[0].message.content
578+
579+
# Handle streaming request
580+
response = await self.client.chat.completions.create(**params)
581+
582+
collected_messages = []
583+
completion_text = ""
584+
async for chunk in response:
585+
chunk_message = chunk.choices[0].delta.content or ""
586+
collected_messages.append(chunk_message)
587+
completion_text += chunk_message
588+
print(chunk_message, end="", flush=True)
589+
590+
print() # Newline after streaming
591+
full_response = "".join(collected_messages).strip()
592+
593+
if not full_response:
594+
raise ValueError("Empty response from streaming LLM")
595+
596+
completion_tokens = self.count_tokens(completion_text)
597+
logger.info(
598+
f"Estimated completion tokens for streaming response with images: {completion_tokens}"
599+
)
600+
self.update_token_count(input_tokens, completion_tokens)
601+
602+
return full_response
614603

615604
except TokenLimitExceeded:
616605
raise

environment.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,11 @@ dependencies:
3333
# 日期处理
3434
- python-dateutil=2.9.0
3535

36-
# SEO相关依赖
36+
# pip依赖
3737
- pip
3838
- pip:
39-
- jieba==0.42.1 # 中文分词(conda-forge暂无)
4039
- whitenoise==6.6.0 # 静态文件服务
4140
- slowapi==0.1.9 # API限流
42-
- markdown2==2.4.12 # Markdown解析
4341

4442
# 系统工具(可选,提升开发体验)
4543
- git

requirements.txt

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,19 @@ python-multipart==0.0.20
1111
loguru==0.7.3
1212
tomli==2.4.0
1313
python-dateutil==2.9.0
14-
jieba==0.42.1
1514
whitenoise==6.6.0
1615
slowapi==0.1.9
17-
markdown2==2.4.12
1816
asgiref==3.8.1
1917
requests==2.32.3
20-
beautifulsoup4==4.12.3
2118
sqlalchemy==2.0.23
2219
alembic==1.13.0
2320
aiosqlite==0.19.0
2421
python-jose[cryptography]==3.3.0
2522
passlib[bcrypt]==1.7.4
26-
redis==5.0.1
27-
websockets==12.0
2823

2924
# Environment
3025
python-dotenv==1.2.1
3126

3227
# LLM/AI Dependencies
3328
openai>=1.0.0
34-
tiktoken>=0.5.0
3529
tenacity>=8.2.0

0 commit comments

Comments
 (0)