get 下来的内容,有 t_con 的字符的 但是呢?经过 BeautifulSoup 处理后,却没有了 t_con 的字符,这是怎么回事呢?
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
def get_info_from(url):
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
}
web_data = requests.get(url, headers=headers)
web_data.encoding = 'utf-8'
# print(web_data.text) # 输出的结果中,搜索 t_con ,可以搜到
soup = BeautifulSoup(web_data.text, 'lxml')
# print(soup) # 输出的结果中,搜索 t_con, 搜不到了,为什么经过处理后却搜索不到了呢?
if __name__ == "__main__":
test_url = "http://tieba.baidu.com/f?kw=%E4%B8%BA%E7%9F%A5%E7%AC%94%E8%AE%B0&ie=utf-8&pn=0"
get_info_from(test_url)