创建springboot项目
选择Spring Web,MySQL Driver,Lombok
添加Webmagic依赖
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| <dependency> <groupId>us.codecraft</groupId> <artifactId>webmagic-core</artifactId> <version>0.10.0</version> <exclusions> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>us.codecraft</groupId> <artifactId>webmagic-extension</artifactId> <version>0.10.0</version> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>16.0</version> </dependency>
|
添加mybatis-plus依赖
1 2 3 4 5 6 7 8 9 10 11
| <dependency> <groupId>com.baomidou</groupId> <artifactId>mybatis-plus-boot-starter</artifactId> <version>3.5.5</version> </dependency> <dependency> <groupId>org.mybatis</groupId> <artifactId>mybatis-spring</artifactId> <version>3.0.3</version> </dependency>
|
配置application.yml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| server: port: 8080
spring: main: allow-circular-references: true datasource: driver-class-name: com.mysql.cj.jdbc.Driver url: jdbc:mysql://localhost:3306/{database}?serverTimezone=Asia/Shanghai&useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull&useSSL=false&allowPublicKeyRetrieval=true username: root password: j
mybatis-plus: type-aliases-package: com.bangumi_crawler.pojo mapper-locations: "classpath*:/mapper/**/*.xml" configuration: map-underscore-to-camel-case: false cache-enabled: false global-config: db-config: update-strategy: not_null
|
手动获取Bean的Utils
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
|
@Component public class BeanUtils implements ApplicationContextAware { protected static ApplicationContext applicationContext ;
@Override public void setApplicationContext(ApplicationContext arg0) throws BeansException { if (applicationContext == null) { applicationContext = arg0; }
} public static Object getBean(String name) { return applicationContext.getBean(name); }
public static <T> T getBean(Class<T> clazz) { return applicationContext.getBean(clazz); } }
|
task模板
1 2 3 4 5 6 7 8 9 10 11 12 13 14
|
@Component public class BangumiPipeline implements Pipeline {
@Override public void process(ResultItems resultItems, Task task) {
} }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
|
@Component public class Processor implements PageProcessor {
@Override public void process(Page page){ }
private Site site = Site.me() .setCharset("UTF-8") .setSleepTime(1) .setTimeOut(1000*10) .setRetrySleepTime(3000) .setRetryTimes(3);
@Override public Site getSite() { return site; }
private String url = "";
@Autowired private Pipeline pipeline;
@Scheduled(initialDelay = 1000,fixedDelay = 100*1000) public void process(){ HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); Spider.create(new Processor()) .addUrl(url) .setScheduler(new QueueScheduler().setDuplicateRemover(new BloomFilterDuplicateRemover(1000000))) .thread(10) .setDownloader(httpClientDownloader) .addPipeline(pipeline) .run(); } }
|
启动类开启定时任务
在application上添加@EnableScheduling注解
完结撒花