Skip to content

Commit

Permalink
fix timeout bug & added a few command line param
Browse files Browse the repository at this point in the history
  • Loading branch information
mic4126 committed Aug 14, 2021
1 parent 22f0e46 commit ecd4214
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 53 deletions.
26 changes: 26 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
// 使用 IntelliSense 以得知可用的屬性。
// 暫留以檢視現有屬性的描述。
// 如需詳細資訊,請瀏覽: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "java",
"name": "Launch Current File",
"request": "launch",
"mainClass": "${file}",
"args": [
"-url",
"https://www.esjzone.cc/detail/1591450546.html"
]
},
{
"type": "java",
"name": "Launch App",
"request": "launch",
"mainClass": "com.example.App",
"projectName": "demo",
"args": ""
}
]
}
6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@
<artifactId>selenium-chrome-driver</artifactId>
<version>3.141.59</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-cli/commons-cli -->
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.4</version>
</dependency>

</dependencies>
<build>
Expand Down
155 changes: 102 additions & 53 deletions src/main/java/com/example/App.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,83 +2,132 @@

import java.io.File;
import java.io.FileWriter;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.concurrent.TimeUnit;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;

public class App {
public static void main(String[] args) {
System.out.print("Paste ESJ link:");
Scanner sc = new Scanner(System.in);
String url = sc.nextLine();

final String catalog_regex = "http(s)?:\\/\\/www\\.esjzone\\.cc\\/detail\\/\\d+\\.html";
final String novel_regex = "https:\\/\\/www\\.esjzone\\.cc\\/forum\\/\\d+\\/\\d+\\.html";
ArrayList<String> links = new ArrayList<>();

if (url.matches(catalog_regex)) {
WebDriver driver = new ChromeDriver();
driver.get(url);
List<WebElement> els = driver.findElements(By.xpath("//*[@id=\"chapterList\"]/a"));
for (WebElement webElement : els) {
String href = webElement.getAttribute("href");
if (href.matches(novel_regex)) {
links.add(href);
}
// create Options object
Options options = new Options();

// add t option
options.addOption("d", false, "create directory by novel name");
options.addOption(new Option("t", "milliseconds wait before loading next page"));
options.addOption(Option.builder("url").hasArg().required(false).build());
int wait = 1000;
CommandLineParser parser = new DefaultParser();
try {
CommandLine cmd = parser.parse(options, args);
try {
wait = Integer.parseInt(cmd.getOptionValue("t", "1000"));
} catch (NumberFormatException e) {
System.err.println("please input decimal number");
}
String url;
Scanner sc;
if (!cmd.hasOption("url")) {

System.out.print("Paste ESJ link:");
sc = new Scanner(System.in);
url = sc.nextLine();
} else {
url = cmd.getOptionValue("url");
}

for (String link : links) {
driver.get(link);
String ch_name = driver.findElement(By.xpath("/html/body/div[3]/section/div/div[1]/h2")).getText();
System.out.println("Now backup: "+ch_name);
String content = driver.findElement(By.xpath("//*[contains(@class,'forum-content')]"))
.getAttribute("innerHTML");
String text = driver.findElement(By.xpath("//*[contains(@class,'forum-content')]")).getText();
File novel_html = new File(ch_name + ".html");
File novel_txt = new File(ch_name + ".txt");
try {
if (novel_html.createNewFile()) {
FileWriter fw = new FileWriter(novel_html);
fw.write(content);
fw.close();
final String catalog_regex = "http(s)?:\\/\\/www\\.esjzone\\.cc\\/detail\\/\\d+\\.html";
final String novel_regex = "https:\\/\\/www\\.esjzone\\.cc\\/forum\\/\\d+\\/\\d+\\.html";
ArrayList<String> links = new ArrayList<>();

String save_folder = System.getProperty("user.dir");

if (url.matches(catalog_regex)) {
WebDriver driver = new ChromeDriver();
driver.manage().timeouts().implicitlyWait(5, TimeUnit.MINUTES);
driver.get(url);
List<WebElement> els = driver.findElements(By.xpath("//*[@id=\"chapterList\"]/a"));
for (WebElement webElement : els) {
String href = webElement.getAttribute("href");
if (href.matches(novel_regex)) {
links.add(href);
}

} catch (Exception e) {
// TODO: handle exception
} finally {
}

if (cmd.hasOption("d")) {
String name = driver.findElement(By.xpath("/html/body/div[3]/section/div/div[1]/div[1]/div[2]/h2"))
.getText();
File folder = new File(name);
if (!folder.mkdir() || !folder.exists()) {
return;
}
save_folder = folder.getAbsolutePath();
}

try {
if (novel_txt.createNewFile()) {
FileWriter fw = new FileWriter(novel_txt);
fw.write(text);
fw.close();
for (String link : links) {
driver.get(link);
String ch_name = driver.findElement(By.xpath("/html/body/div[3]/section/div/div[1]/h2")).getText();
System.out.println("Now backup: " + ch_name);
String content = driver.findElement(By.xpath("//*[contains(@class,'forum-content')]"))
.getAttribute("innerHTML");
String text = driver.findElement(By.xpath("//*[contains(@class,'forum-content')]")).getText();
File novel_html = new File(save_folder + File.separator + ch_name + ".html");
File novel_txt = new File(save_folder + File.separator + ch_name + ".txt");
try {
if (novel_html.createNewFile()) {
FileWriter fw = new FileWriter(novel_html);
fw.write(content);
fw.close();
}

} catch (Exception e) {

} finally {

}

} catch (Exception e) {
// TODO: handle exception
} finally {

}
try {
if (novel_txt.createNewFile()) {
FileWriter fw = new FileWriter(novel_txt);
fw.write(text);
fw.close();
}

try {
Thread.sleep(10000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e) {
// TODO: handle exception
} finally {

}

try {
Thread.sleep(wait);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
driver.quit();
} else {
System.err.println("Incorrct url.");
}

} else {
System.err.println("Incorrct url.");
} catch (ParseException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
return;
}

sc.close();
}
}

0 comments on commit ecd4214

Please sign in to comment.