欧美一区二区三区,视频,国产成人久久av免费高清密臂,中文字幕免费在线

核心代碼

requests.get 下載html網(wǎng)頁
bs4.BeautifulSoup 分析html內(nèi)容

				?

									from requests import get

									from bs4 import BeautifulSoup as bs

									from datetime import datetime as dt

									def Today(style=1):

									    date = dt.today()

									    if style!=1: return f'{date.month}月{date.day}日'

									    return f'{date.year}-{date.month:02}-{date.day:02}'

									def SinaNews(style=1):

									    url1 = 'http://news.***.com.cn/'

									    if style==1: url1 += 'world'

									    elif style==2: url1 += 'china'

									    else: url1='https://mil.news.sina.com.cn/'

									    text = get(url1)

									    text.encoding='uft-8'

									    soup = bs(text.text,'html.parser')

									    aTags = soup.find_all("a")

									    return [(t.text,t['href']) for t in aTags if Today() in str(t)]

爬取標(biāo)題

				?

									for i,news in enumerate(SinaNews(1)):

									    print(f'No{i+1}:',news[0])

									No1: 外媒：*****

									No2: 日媒：******

									......

									......

內(nèi)容已馬賽克！！！

首次做爬蟲，為了方便下手找一個(gè)不用破解網(wǎng)頁的某新聞網(wǎng)站，下載網(wǎng)頁就能直接取得內(nèi)容。其中的國(guó)際、國(guó)內(nèi)和軍事新聞三個(gè)網(wǎng)頁作內(nèi)容源，requests.get下載網(wǎng)頁后，分析所得html文本，所有<a href=...>標(biāo)記帶日期剛好所需要的。

爬取正文

然后再根據(jù)url下載正文網(wǎng)頁，分析可知id=‘article'的<div>層就是正文所在位置，.get_text()是取得文本的關(guān)鍵函數(shù)，然后適當(dāng)做一些格式處理：

				?

									>>> def NewsDownload(url):

									    html = get(url)

									    html.encoding='uft-8'

									    soup = bs(html.text,'html.parser')

									    text = soup.find('div',id='article').get_text().strip()

									    text = text.replace('點(diǎn)擊進(jìn)入專題：','相關(guān)專題：')

									    text = text.replace('　　','\n　　')

									    while '\n\n\n' in text:

									        text = text.replace('\n\n\n','\n\n')

									    return text 

									>>> url = 'https://******/w/2021-09-29/doc-iktzqtyt8811588.shtml'

									>>> NewsDownload(url)

									'原標(biāo)題：******************************************************'

									>>>

界面代碼

使用內(nèi)置的圖形界面庫 tkinter 控件 Text 、Listbox、Scrollbar、Button。設(shè)置基本屬性、放置位置、綁定命令，然后調(diào)試到程序完工！

源代碼 News.pyw ：其中涉及的網(wǎng)站名稱已馬賽克！

				?

									from requests import get

									from bs4 import BeautifulSoup as bs

									from datetime import datetime as dt

									from os import path

									import tkinter as tk 

									def Today(style=1):

									    date = dt.today()

									    if style!=1: return f'{date.month}月{date.day}日'

									    return f'{date.year}-{date.month:02}-{date.day:02}'

									def SinaNews(style=1):

									    url1 = 'http://news.****.com.cn/'

									    if style==1: url1 += 'world'

									    elif style==2: url1 += 'china'

									    else: url1='https://mil.****.com.cn/'

									    text = get(url1)

									    text.encoding='uft-8'

									    soup = bs(text.text,'html.parser')

									    aTags = soup.find_all("a")

									    return [(t.text,t['href']) for t in aTags if Today() in str(t)] 

									def NewsList(i):

									    global news

									    news = SinaNews(i)

									    tList.delete(0,tk.END)

									    for idx,item in enumerate(news):

									        tList.insert(tk.END,f'{idx+1:03} {item[0]}')

									    tText.config(state=tk.NORMAL)

									    tText.delete(0.0,tk.END)

									    tText.config(state=tk.DISABLED)

									    NewsShow(0)   

									def NewsList1(): NewsList(1)

									def NewsList2(): NewsList(2)

									def NewsList3(): NewsList(3) 

									def NewsShow(idx):

									    if idx!=0:

									        idx = tList.curselection()[0]

									    title,url = news[idx][0],news[idx][1]

									    html = get(url)

									    html.encoding='uft-8'

									    soup = bs(html.text,'html.parser')

									    text = soup.find('div',id='article').get_text().strip()

									    text = text.replace('點(diǎn)擊進(jìn)入專題：','相關(guān)專題：')

									    text = text.replace('　　','\n　　')

									    while '\n\n\n' in text:

									        text = text.replace('\n\n\n','\n\n')

									    tText.config(state=tk.NORMAL)

									    tText.delete(0.0,tk.END)

									    tText.insert(tk.END, title+'\n\n'+text)

									    tText.config(state=tk.DISABLED)   

									def InitWindow(self,W,H):

									    Y = self.winfo_screenheight()

									    winPosition = str(W)+'x'+str(H)+'+8+'+str(Y-H-100)

									    self.geometry(winPosition)

									    icoFile = 'favicon.ico'

									    f = path.exists(icoFile)

									    if f: win.iconbitmap(icoFile)

									    self.resizable(False,False)

									    self.wm_attributes('-topmost',True)

									    self.title(bTitle[0])

									    SetControl()

									    self.update()

									    self.mainloop()

									def SetControl():

									    global tList,tText

									    tScroll = tk.Scrollbar(win, orient=tk.VERTICAL)

									    tScroll.place(x=450,y=320,height=300)

									    tList = tk.Listbox(win,selectmode=tk.BROWSE,yscrollcommand=tScroll.set)

									    tScroll.config(command=tList.yview)

									    for idx,item in enumerate(news):

									        tList.insert(tk.END,f'{idx+1:03} {item[0]}')

									    tList.place(x=15,y=320,width=435,height=300)

									    tList.select_set(0)

									    tList.focus()

									    bW,bH = 70,35    #按鈕的寬高

									    bX,bY = 95,270    #按鈕的坐標(biāo)

									    tBtn1 = tk.Button(win,text=bTitle[1],command=NewsList1)

									    tBtn1.place(x=bX,y=bY,width=bW,height=bH)

									    tBtn2=tk.Button(win,text=bTitle[2],command=NewsList2)

									    tBtn2.place(x=bX+100,y=bY,width=bW,height=bH)

									    tBtn3 = tk.Button(win,text=bTitle[3],command=NewsList3)

									    tBtn3.place(x=bX+200,y=bY,width=bW,height=bH)

									    tScroll2 = tk.Scrollbar(win, orient=tk.VERTICAL)

									    tScroll2.place(x=450,y=10,height=240)

									    tText = tk.Text(win,yscrollcommand=tScroll2.set)

									    tScroll2.config(command=tText.yview)

									    tText.place(x=15,y=10,width=435,height=240)

									    tText.config(state=tk.DISABLED,bg='azure',font=('宋體', '14'))

									    NewsShow(0)

									    tList.bind("<Double-Button-1>",NewsShow)

									if __name__=='__main__':

									    win = tk.Tk()

									    bTitle = ('今日新聞','國(guó)際新聞','國(guó)內(nèi)新聞','軍事新聞')

									    news = SinaNews()

									    InitWindow(win,480,640)

奉上全部代碼，在此就不作詳細(xì)分析了，如有需要請(qǐng)留言討論。我的使用環(huán)境 Win7+Python3.8.8 下可以無錯(cuò)運(yùn)行！文中涉及網(wǎng)站名稱已打上馬賽克，猜不出名字的可以私下里問我。

軟件編譯

使用pyinstaller.exe編譯成單個(gè)運(yùn)行文件，注意源碼文件的后綴名應(yīng)該用.pyw否則會(huì)有cmd黑窗口出現(xiàn)。還有一個(gè)小知識(shí)點(diǎn)，任意網(wǎng)站的Logo圖標(biāo)icon文件，一般都能在根目錄里下載到，即：
http(s)://websiteurl.com(.cn)/favicon.ico

編譯命令如下：