네이버 쇼핑 크롤링1_제목, url, 카테고리

#Step 0. 필요한 모듈과 라이브러리를 로딩합니다.
import sys # 시스템
import os  # 시스템

import pandas as pd  # 판다스 : 데이터분석 라이브러리
import numpy as np   # 넘파이 : 숫자, 행렬 데이터 라이브러리

from bs4 import BeautifulSoup    # html 데이터를 전처리
from selenium import webdriver   # 웹 브라우저 자동화
from selenium.webdriver.common.keys import Keys
import chromedriver_autoinstaller

import time    # 서버와 통신할 때 중간중간 시간 지연. 보통은 1초
from tqdm import tqdm
from tqdm import tqdm_notebook   # for문 돌릴 때 진행상황을 %게이지로 알려준다.

# 워닝 무시
import warnings
warnings.filterwarnings('ignore')

query_txt = input('1.크롤링할 키워드는 무엇입니까?: ')

query_txt

chrome_path = chromedriver_autoinstaller.install()
driver = webdriver.Chrome(chrome_path)

# 사이트 주소는 네이버쇼핑
driver.get('https://search.shopping.naver.com/search/all?query={}&cat_id=&frm=NVSHATC'.format(query_txt))
time.sleep(2)  # 2초간 정지

driver.find_element_by_link_text("리뷰 많은순").click()

# 스크롤 다운
def scroll_down(driver):
    driver.execute_script("window.scrollTo(0, 19431049)")
    time.sleep(1)

# n: 스크롤할 횟수 설정
n = 1
i = 0
while i < n: # 이 조건이 만족되는 동안 반복 실행
    scroll_down(driver) # 스크롤 다운
    i = i+1

# url 리스트 만들기
things = driver.find_elements_by_css_selector(".basicList_link__1MaTN")

url_list = []
for thing in things:
    url = thing.get_attribute('href')
    url_list.append(url)

# 제목 리스트 만들기
title_list = []
for thing in things:
    title = thing.text
    title_list.append(title)

# 카테고리 가져오기
things2 = driver.find_elements_by_class_name("basicList_depth__2QIie")

category_list = []
for thing in things2:
    category = thing.text
    category_list.append(category)
category_list

import re
regex = '패션의류/여성의류(.+)'

m = re.match(regex, category_list[0])

print(len(url_list))
url_list

df = pd.DataFrame({"url":url_list, "title":title_list, "category":category_list})

df

df.to_csv("네이버쇼핑_url.csv", encoding = 'utf-8-sig')

df.to_excel("네이버쇼핑_url.xlsx", encoding = 'utf-8-sig')

저작자표시 (새창열림)

'Crawling' 카테고리의 다른 글

웹 페이지 스크레이핑 (0)	2022.01.05
네이버 쇼핑 크롤링2_평점, 댓글수, (0)	2022.01.05
urllib, BeautifulSoup 으로 잡코리아 크롤링 (0)	2022.01.04
2. 네이버 블로그 내용 크롤링하기 (0)	2022.01.03
urllib.request 이용해 네이버뉴스 크롤링 (0)	2022.01.01

왕초보 우당탕탕 고군분투 코딩배우기

네이버 쇼핑 크롤링1_제목, url, 카테고리

'Crawling' 카테고리의 다른 글

티스토리툴바

네이버 쇼핑 크롤링1_제목, url, 카테고리

'Crawling' 카테고리의 다른 글

'Crawling' Related Articles

티스토리툴바