import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
defaultURL = 'https://openapi.naver.com/v1/search/news.xml?'
sort = '&sort=sim'
start = '&start=1'
display = '&display=100'
query = '&query='+urllib.parse.quote_plus(str(input("검색어: ")))
fullURL = defaultURL + sort + start + display + query
print(fullURL)
file = open("C:\\Python34\\naver_news.txt","w",encoding='utf-8')
headers = {
'Host' : 'openapi.naver.com' ,
'User-Agent' : 'curl/7.43.0',
'Accept' : '*/*',
'Content-Type' : 'application/xml',
'X-Naver-Client-Id' : 'Naver Client Id',
'X-Naver-Client-Secret' : 'Naver Client Secret '
}
req = urllib.request.Request(fullURL, headers=headers)
f = urllib.request.urlopen(req)
resultXML = f.read( )
xmlsoup = BeautifulSoup(resultXML,'html.parser')
items = xmlsoup.find_all('item')
for item in items :
file.write('-----------------------------------------\n')
file.write('뉴스제목 : ' + item.title.get_text(strip=True) + '\n')
file.write('요약내용 : ' + item.description.get_text(strip=True) + '\n')
file.write('\n')
file.close( )
'Python' 카테고리의 다른 글
[네이버] 웹문서 섹션 크롤링 (0) | 2017.01.09 |
---|---|
[네이버] 파이썬 네이버 카페 크롤링 (4) | 2017.01.09 |
[파이썬3] beautiful soup 예제 (0) | 2017.01.06 |
[파이썬3] lxml 설치하기 (0) | 2017.01.06 |
[디시인사이드] 김소혜 게시판 댓글 웹 크롤링 (0) | 2017.01.06 |
댓글