Skip to content

Commit 0b4518c

Browse files
authored
Merge pull request #106 from Guovin/dev
Release: v1.1.2
2 parents 5278408 + 086e66d commit 0b4518c

File tree

5 files changed

+71
-35
lines changed

5 files changed

+71
-35
lines changed

.github/workflows/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
steps:
1919
- name: Set branch name
2020
id: vars
21-
run: echo ::set-output name=branch::${{ github.ref == 'refs/heads/gd' && 'gd' || 'master' }}
21+
run: echo ::set-output name=branch::${{ github.repository_owner == 'Guovin' && 'gd' || 'master' }}
2222
- uses: actions/checkout@v3
2323
with:
2424
ref: ${{ steps.vars.outputs.branch }}

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# 更新日志(Changelog)
22

3+
## v1.1.2
4+
5+
### 2024/5/7
6+
7+
- 重构接口获取方法,增强通用性,适应结构变更(Refactored the method for obtaining the interface, enhanced its universality, and adapted to structural changes)
8+
- 修复 gd 分支自动更新问题(#105)(Fixed the automatic update issue of the gd branch (#105))
9+
- 优化自定义接口源获取,接口去重(Optimized the acquisition of custom interface sources and removed duplicate interfaces)
10+
311
## v1.1.1
412

513
### 2024/4/29

main.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88
from selenium.webdriver.support import expected_conditions as EC
99
from selenium_stealth import stealth
1010
import asyncio
11-
from bs4 import BeautifulSoup
11+
from bs4 import BeautifulSoup, NavigableString
1212
from utils import (
1313
getChannelItems,
1414
updateChannelUrlsTxt,
1515
updateFile,
16-
getUrlInfo,
16+
getChannelUrl,
17+
getChannelInfo,
1718
sortUrlsBySpeedAndResolution,
1819
getTotalUrls,
1920
filterUrlsByPatterns,
@@ -25,6 +26,7 @@
2526
from logging.handlers import RotatingFileHandler
2627
import os
2728
from tqdm import tqdm
29+
import re
2830

2931
handler = RotatingFileHandler("result_new.log", encoding="utf-8")
3032
logging.basicConfig(
@@ -114,18 +116,37 @@ async def visitPage(self, channelItems):
114116
self.driver.execute_script(
115117
"arguments[0].click();", page_link
116118
)
117-
soup = BeautifulSoup(self.driver.page_source, "html.parser")
118-
results = (
119-
soup.find_all("div", class_="result") if soup else []
119+
source = re.sub(
120+
r"<!--.*?-->",
121+
"",
122+
self.driver.page_source,
123+
flags=re.DOTALL,
120124
)
121-
for result in results:
122-
try:
123-
url, date, resolution = getUrlInfo(result)
125+
soup = BeautifulSoup(source, "html.parser")
126+
if soup:
127+
results = []
128+
for element in soup.descendants:
129+
if isinstance(element, NavigableString):
130+
url = getChannelUrl(element)
131+
if url and not any(
132+
item[0] == url for item in results
133+
):
134+
url_element = soup.find(
135+
lambda tag: tag.get_text(strip=True)
136+
== url
137+
)
138+
if url_element:
139+
info_element = (
140+
url_element.find_next_sibling()
141+
)
142+
date, resolution = getChannelInfo(
143+
info_element
144+
)
145+
results.append((url, date, resolution))
146+
for result in results:
147+
url, date, resolution = result
124148
if url and checkUrlByPatterns(url):
125149
infoList.append((url, date, resolution))
126-
except Exception as e:
127-
print(f"Error on result {result}: {e}")
128-
continue
129150
except Exception as e:
130151
print(f"Error on page {page}: {e}")
131152
continue

utils.py

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ async def getChannelsByExtendBaseUrls(channel_names):
9191
url = re.match(pattern, line).group(2)
9292
value = (url, None, resolution)
9393
if key in link_dict:
94-
link_dict[key].append(value)
94+
if value not in link_dict[key]:
95+
link_dict[key].append(value)
9596
else:
9697
link_dict[key] = [value]
9798
found_channels = []
@@ -137,31 +138,37 @@ def updateFile(final_file, old_file):
137138
os.replace(old_file, final_file)
138139

139140

140-
def getUrlInfo(result):
141+
def getChannelUrl(element):
141142
"""
142143
Get the url, date and resolution
143144
"""
144-
url = date = resolution = None
145-
result_div = [div for div in result.children if div.name == "div"]
146-
if 1 < len(result_div):
147-
channel_text = result_div[1].get_text(strip=True)
148-
url_match = re.search(
149-
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
150-
channel_text,
145+
url = None
146+
urlRegex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
147+
url_search = re.search(
148+
urlRegex,
149+
element.get_text(strip=True),
150+
)
151+
if url_search:
152+
url = url_search.group()
153+
return url
154+
155+
156+
def getChannelInfo(element):
157+
"""
158+
Get the channel info
159+
"""
160+
date, resolution = None, None
161+
info_text = element.get_text(strip=True)
162+
if info_text:
163+
date, resolution = (
164+
(info_text.partition(" ")[0] if info_text.partition(" ")[0] else None),
165+
(
166+
info_text.partition(" ")[2].partition("•")[2]
167+
if info_text.partition(" ")[2].partition("•")[2]
168+
else None
169+
),
151170
)
152-
if url_match is not None:
153-
url = url_match.group()
154-
info_text = result_div[-1].get_text(strip=True)
155-
if info_text:
156-
date, resolution = (
157-
(info_text.partition(" ")[0] if info_text.partition(" ")[0] else None),
158-
(
159-
info_text.partition(" ")[2].partition("•")[2]
160-
if info_text.partition(" ")[2].partition("•")[2]
161-
else None
162-
),
163-
)
164-
return url, date, resolution
171+
return date, resolution
165172

166173

167174
async def getSpeed(url, urlTimeout=5):

version.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
{
2-
"version": "1.1.1"
2+
"version": "1.1.2"
33
}

0 commit comments

Comments
 (0)