본문 바로가기
Python

[네이버] 섹션 크롤링3

by 퍼포먼스마케팅코더 2017. 1. 18.
반응형

#네이버 키워드 URL 만들기


from bs4 import BeautifulSoup

import urllib.parse

import urllib.request


f = open("C:/Users/eyeden-FF14/Desktop/Auction/all_keywords.txt", 'r')

f1 = open("C:/Users/eyeden-FF14/Desktop/Naver_URL_List.txt", 'w' , encoding='utf-8' )


keyword = f.readlines()

f.close()


Naver_first_URL = 'https://search.naver.com/search.naver?where=nexearch&query='

Naver_behind_URL = '&sm=top_hty&fbm=0&ie=utf8'


#네이버 검색할 URL 만들기


for i in keyword :

      a = i.replace("\n", "")

      b = str(a)

      c = urllib.parse.quote(b)

      URL = Naver_first_URL + c +Naver_behind_URL

      f1.write(URL + '\n')

f1.close()


#네이버 검색결과 섹션 크롤링

import urllib.request
import urllib.parse
from bs4 import BeautifulSoup

file = open("C:/Users/eyeden-FF14/Desktop/Naver_URL_List.txt", 'r', encoding='utf-8')
outcome = open("C:/Users/eyeden-FF14/Desktop/Naver_section_outcome.txt", 'w' , encoding='utf-8')
urls = file.readlines()

for page in urls[0:6475] :
      url = page.replace("\n", "")
      f = urllib.request.urlopen(url)
      html = f.read()
      bs = BeautifulSoup(html, 'html.parser')
      sections = bs.find_all("h2")
      for i in sections :
            outcome.write(str(url) + '@' +str(i) + '\n')
outcome.close()


반응형

댓글