본문 바로가기
Python

[네이버] 웹문서 섹션 크롤링

by 퍼포먼스마케팅코더 2017. 1. 9.
반응형

import urllib.request

import urllib.parse

from bs4 import BeautifulSoup


defaultURL = 'https://openapi.naver.com/v1/search/webkr.xml?' 

start = '&start=1'

display = '&display=100'

query = '&query='+urllib.parse.quote_plus(str(input("Keyword: "))) 


fullURL = defaultURL + start + display + query

print(fullURL)


file = open("C:\\Python34\\naver_web_4.txt","w",encoding='utf-8')


headers = {


    'Host' : 'openapi.naver.com' ,


    'User-Agent' : 'curl/7.49.1',


    'Accept' : '*/*',


    'Content-Type' : 'application/xml',


    'X-Naver-Client-Id' : 'Naver Client Id',


    'X-Naver-Client-Secret' : 'Naver Client Secret'


    }


req = urllib.request.Request(fullURL, headers=headers)

f = urllib.request.urlopen(req)

resultXML = f.read( )

xmlsoup = BeautifulSoup(resultXML,'html.parser')

items = xmlsoup.find_all('item')

for item in items :

     file.write('웹문서제목 : ' + item.title.get_text(strip=True)) + file.write('\\웹문서내용 : ' + item.description.get_text(strip=True)) + file.write('\\웹문서링크 : ' + item.link.get_text(strip=True) + '\n')

file.close( )


반응형

댓글