From 175d4fc52ea8c184aa916d239f0dc38f271a0508 Mon Sep 17 00:00:00 2001 From: Shivam9091 <72157255+Shivam9091@users.noreply.github.com> Date: Thu, 1 Oct 2020 03:13:23 -0700 Subject: [PATCH] improve spider --- spiderWanghong/db.sql | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/spiderWanghong/db.sql b/spiderWanghong/db.sql index 967928e..f7ec4d1 100644 --- a/spiderWanghong/db.sql +++ b/spiderWanghong/db.sql @@ -1,7 +1,7 @@ DROP DATABASE IF EXISTS `wanghong`; CREATE DATABASE `wanghong` DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_general_ci; USE `wanghong`; -set names utf8mb4; +set names utf8mb4 ; DROP TABLE IF EXISTS `user`; @@ -11,8 +11,7 @@ CREATE TABLE `user` ( `order` INT UNSIGNED, PRIMARY KEY (`id`) ); - -#DROP TABLE IF EXISTS `Tbl_Huajiao_Live`; +DROP TABLE IF EXISTS `Tbl_Huajiao_Live`; CREATE TABLE `Tbl_Huajiao_Live` ( `FLiveId` INT UNSIGNED NOT NULL, `FUserId` INT UNSIGNED NOT NULL, @@ -26,9 +25,10 @@ CREATE TABLE `Tbl_Huajiao_Live` ( `FLocation` VARCHAR(255) NOT NULL DEFAULT '' COMMENT '地点', `FScrapedTime` timestamp NOT NULL COMMENT '爬虫更新时间', PRIMARY KEY (`FLiveId`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; +); +ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -#DROP TABLE IF EXISTS `Tbl_Huajiao_User`; +DROP TABLE IF EXISTS `Tbl_Huajiao_User`; CREATE TABLE `Tbl_Huajiao_User` ( `FUserId` INT UNSIGNED NOT NULL, `FUserName` VARCHAR(255) NOT NULL DEFAULT '' COMMENT '昵称', @@ -40,11 +40,12 @@ CREATE TABLE `Tbl_Huajiao_User` ( `FAvatar` VARCHAR(255) NOT NULL DEFAULT '' COMMENT '头像地址', `FScrapedTime` timestamp NOT NULL COMMENT '爬虫时间', PRIMARY KEY (`FUserId`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; +); +ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; ## 主播汇总表 -#DROP TABLE IF EXISTS `Tbl_Actor`; +DROP TABLE IF EXISTS `Tbl_Actor`; CREATE TABLE `Tbl_Actor` ( `id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, `uid` VARCHAR(30) NOT NULL DEFAULT '' COMMENT '唯一标识用户', @@ -57,10 +58,11 @@ CREATE TABLE `Tbl_Actor` ( `scraped_time` timestamp NOT NULL COMMENT '爬虫更新时间', PRIMARY KEY (`id`), UNIQUE INDEX `INDEX_uid_pid` (`uid`, `pid`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; +); +ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -# 沃米优选主播表 -#DROP TABLE IF EXISTS `Tbl_WMYX_Actor`; +## 沃米优选主播表 +DROP TABLE IF EXISTS `Tbl_WMYX_Actor`; CREATE TABLE `Tbl_WMYX_Actor` ( `id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, `uuid` VARCHAR(30) NOT NULL DEFAULT '' COMMENT '沃米优选唯一id', @@ -80,10 +82,11 @@ CREATE TABLE `Tbl_WMYX_Actor` ( `scraped_time` timestamp NOT NULL COMMENT '爬虫更新时间', PRIMARY KEY (`id`), UNIQUE INDEX `INDEX_uuid` (`uuid`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; + ); + ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; ## 一下主播表 -#DROP TABLE IF EXISTS `Tbl_YiXia_Actor`; +DROP TABLE IF EXISTS `Tbl_YiXia_Actor`; CREATE TABLE `Tbl_YiXia_Actor` ( `id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, `uid` VARCHAR(30) NOT NULL DEFAULT '' COMMENT '唯一标识用户', @@ -100,10 +103,11 @@ CREATE TABLE `Tbl_YiXia_Actor` ( `scraped_time` timestamp NOT NULL COMMENT '爬虫更新时间', PRIMARY KEY (`id`), UNIQUE INDEX `INDEX_uid` (`uid`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; +); +ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; ## 一下视频表 -#DROP TABLE IF EXISTS `Tbl_YiXia_Video`; +DROP TABLE IF EXISTS `Tbl_YiXia_Video`; CREATE TABLE `Tbl_YiXia_Video` ( `id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, `scid` VARCHAR(30) NOT NULL DEFAULT '' COMMENT '唯一标识视频', @@ -113,12 +117,13 @@ CREATE TABLE `Tbl_YiXia_Video` ( `discussed` INT UNSIGNED NOT NULL DEFAULT 0 COMMENT '评论数', `img` VARCHAR(200) NOT NULL DEFAULT '' COMMENT '封面', `title` VARCHAR(255) NOT NULL DEFAULT '' COMMENT '标题', - `detail_page` VARCHAR(255) NOT NULL DEFAULT '' COMMENT '详情页', + `detail_page` VARCHAR(255) NOT NULL DEFAULT '0' COMMENT '详情页', `flash` VARCHAR(255) NOT NULL DEFAULT '' COMMENT 'falsh地址', `scraped_time` timestamp NOT NULL COMMENT '爬虫更新时间', PRIMARY KEY (`id`), UNIQUE INDEX `INDEX_scid` (`scid`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; +); +ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;