Crawling

네이버 뉴스 url 크롤링_naver_news_url_crawl

km1n 2021. 12. 30. 16:32

1. 시작에 앞서

pwd
# 라이브러리 import
import pandas as pd
import numpy as np

from selenium import webdriver  # 라이브러리(모듈) 가져오라
from selenium.webdriver import ActionChains as AC
import chromedriver_autoinstaller
from tqdm import tqdm
from tqdm.notebook import tqdm
import re
from time import sleep
import time

# 워닝 무시
import warnings
warnings.filterwarnings('ignore')

 

# 데이터 수집할 키워드 지정

keyword = "삼성전자"
keyword

 

2. 네이버뉴스들 url 수집

# 크롬창 띄우기

chrome_path = chromedriver_autoinstaller.install()
driver = webdriver.Chrome(chrome_path)

driver.get("https://search.naver.com/search.naver?where=news&sm=tab_jum&query={}".format(keyword))
time.sleep(2)

 

# page1에서 네이버뉴스 url 수집하기

things = driver.find_elements_by_link_text('네이버뉴스')  

url_list = []
for thing in things:
    url = thing.get_attribute('href')
    url_list.append(url)
print(len(url_list))
url_list
df = pd.DataFrame({"url":url_list})
df.to_csv('navernews_urls.csv')