You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
```bash
$ python export_csdn_mds.py
export_csdn_mds.py:39: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("lxml"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.
The code that caused this warning is on line 39 of the file export_csdn_mds.py. To get rid of this warning, pass the additional argument 'features="lxml"' to the BeautifulSoup constructor.
soup = BeautifulSoup(item_html.text)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
Traceback (most recent call last):
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 360, in _error_catcher
yield
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 669, in read_chunked
chunk = self._handle_chunk(amt)
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 624, in _handle_chunk
returned_chunk = self._fp._safe_read(self.chunk_left)
File "G:\python-3.7.1\lib\http\client.py", line 610, in _safe_read
chunk = self.fp.read(min(amt, MAXAMOUNT))
File "G:\python-3.7.1\lib\socket.py", line 589, in readinto
return self._sock.recv_into(b)
File "G:\python-3.7.1\lib\ssl.py", line 1052, in recv_into
return self.read(nbytes, buffer)
File "G:\python-3.7.1\lib\ssl.py", line 911, in read
return self._sslobj.read(len, buffer)
socket.timeout: The read operation timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "G:\python-3.7.1\lib\site-packages\requests\models.py", line 750, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 490, in stream
for line in self.read_chunked(amt, decode_content=decode_content):
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 694, in read_chunked
self._original_response.close()
File "G:\python-3.7.1\lib\contextlib.py", line 130, in exit
self.gen.throw(type, value, traceback)
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 365, in _error_catcher
raise ReadTimeoutError(self._pool, None, 'Read timed out.')
urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='blog.csdn.net', port=443): Read timed out.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "export_csdn_mds.py", line 171, in
start_spider(username)
File "export_csdn_mds.py", line 161, in start_spider
CrawlingItemBlog(base_url, articleid)
File "export_csdn_mds.py", line 26, in CrawlingItemBlog
item_html = request_get(url)
File "export_csdn_mds.py", line 18, in request_get
response = requests.get(url, headers=headers, timeout=3)
File "G:\python-3.7.1\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "G:\python-3.7.1\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "G:\python-3.7.1\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "G:\python-3.7.1\lib\site-packages\requests\sessions.py", line 686, in send
r.content
File "G:\python-3.7.1\lib\site-packages\requests\models.py", line 828, in content
self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
File "G:\python-3.7.1\lib\site-packages\requests\models.py", line 757, in generate
raise ConnectionError(e)
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='blog.csdn.net', port=443): Read timed out.
</details>
The text was updated successfully, but these errors were encountered:
建议:使用英文的冒号代替中文的冒号。
原文截图:
2. 好像被禁了,有个443:`urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='blog.csdn.net', port=443): Read timed out.`
```bash $ python export_csdn_mds.py export_csdn_mds.py:39: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("lxml"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.详细错误信息:
The code that caused this warning is on line 39 of the file export_csdn_mds.py. To get rid of this warning, pass the additional argument 'features="lxml"' to the BeautifulSoup constructor.
soup = BeautifulSoup(item_html.text)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
Traceback (most recent call last):
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 360, in _error_catcher
yield
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 669, in read_chunked
chunk = self._handle_chunk(amt)
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 624, in _handle_chunk
returned_chunk = self._fp._safe_read(self.chunk_left)
File "G:\python-3.7.1\lib\http\client.py", line 610, in _safe_read
chunk = self.fp.read(min(amt, MAXAMOUNT))
File "G:\python-3.7.1\lib\socket.py", line 589, in readinto
return self._sock.recv_into(b)
File "G:\python-3.7.1\lib\ssl.py", line 1052, in recv_into
return self.read(nbytes, buffer)
File "G:\python-3.7.1\lib\ssl.py", line 911, in read
return self._sslobj.read(len, buffer)
socket.timeout: The read operation timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "G:\python-3.7.1\lib\site-packages\requests\models.py", line 750, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 490, in stream
for line in self.read_chunked(amt, decode_content=decode_content):
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 694, in read_chunked
self._original_response.close()
File "G:\python-3.7.1\lib\contextlib.py", line 130, in exit
self.gen.throw(type, value, traceback)
File "G:\python-3.7.1\lib\site-packages\urllib3\response.py", line 365, in _error_catcher
raise ReadTimeoutError(self._pool, None, 'Read timed out.')
urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='blog.csdn.net', port=443): Read timed out.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "export_csdn_mds.py", line 171, in
start_spider(username)
File "export_csdn_mds.py", line 161, in start_spider
CrawlingItemBlog(base_url, articleid)
File "export_csdn_mds.py", line 26, in CrawlingItemBlog
item_html = request_get(url)
File "export_csdn_mds.py", line 18, in request_get
response = requests.get(url, headers=headers, timeout=3)
File "G:\python-3.7.1\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "G:\python-3.7.1\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "G:\python-3.7.1\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "G:\python-3.7.1\lib\site-packages\requests\sessions.py", line 686, in send
r.content
File "G:\python-3.7.1\lib\site-packages\requests\models.py", line 828, in content
self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
File "G:\python-3.7.1\lib\site-packages\requests\models.py", line 757, in generate
raise ConnectionError(e)
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='blog.csdn.net', port=443): Read timed out.
The text was updated successfully, but these errors were encountered: