-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.go
136 lines (135 loc) · 3.85 KB
/
util.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"log"
"net/http"
"strings"
"sync"
)
func getHTMLPage(url string)*goquery.Document{
res,err :=http.Get(url)
if err !=nil{
println("Error")
return nil
}
if res.StatusCode!=200{
println("Error res status")
return nil
}
doc,err := goquery.NewDocumentFromReader(res.Body)
if err !=nil{
return nil
}
return doc
}
func (files *Files) getAllFileInformation(doc *goquery.Document,results chan <- File,category string,errors chan <- error){
var wg sync.WaitGroup
doc.Find(".lib-grid .top a").Each(func(i int, s *goquery.Selection){ //lấy danh sách các file trong 1 category của 1 page
fileLink,_ := s.Attr("href")
wg.Add(1)
//fmt.Println(fileLink)
go files.getFileInformation("https://hocmai.vn"+fileLink,results,category,&wg,errors)
})
wg.Wait()
}
//func main() {
// //results :=make(chan Category,100)
// errors :=make(chan error,1000)
// files := newFiles()
// files.getAllFileInformation(getHTMLPage("https://hocmai.vn/kho-tai-lieu/list.php?category=204"),"ád",errors)
//}
func (files *Files) getNextUrl(doc *goquery.Document) string{
nextPageLink,_:=doc.Find(".paging a:last-child").Attr("href")
//fmt.Println(nextPageLink)
if nextPageLink == ""{
println("End of Category")
return ""
}
return "https://hocmai.vn/kho-tai-lieu/"+nextPageLink
}
//func main() {
// files := newFiles()
// fmt.Println(files.getNextUrl(getHTMLPage("https://hocmai.vn/kho-tai-lieu/list.php?type=category&category=301&page=0")))
//}
func (files *Files) getFileInformation(fileLink string,results chan <- File,category string,wg *sync.WaitGroup,errors chan <- error){
//đọc thông tin từng file
defer wg.Done()
res:=getHTMLPage(fileLink)
if res==nil{
errors <- fmt.Errorf("Page not found")
return
}
title := res.Find(".lib-section .head h4").Text()
numberPage := res.Find(".lib-meta ul li:first-child span").Text()
numberViewed := res.Find(".lib-meta ul li:nth-child(2) span").Text()
numberDownloaded := res.Find(".lib-meta ul li:nth-child(3) span").Text()
author := res.Find(".lib-meta ul li:nth-child(4) span").Text()
date := res.Find(".lib-meta ul li:nth-child(5) span").Text()
numberPage = strings.TrimSpace(strings.Split(numberPage,":")[1])
numberViewed = strings.TrimSpace(strings.Split(numberViewed,":")[1])
numberDownloaded = strings.TrimSpace(strings.Split(numberDownloaded,":")[1])
author = strings.TrimSpace(strings.Split(author,":")[1])
var flagCheck bool
flagCheck = true
if author == ""{
errors <- fmt.Errorf("Author is Empty")
flagCheck = false
}
if title == ""{
errors <- fmt.Errorf("Title is Empty")
flagCheck = false
}
if numberPage == ""{
errors <- fmt.Errorf("numberPage is Empty")
flagCheck = false
}
if numberViewed == ""{
errors <- fmt.Errorf("numberViewed is Empty")
flagCheck = false
}
if numberDownloaded == ""{
errors <- fmt.Errorf("numberDownloaded is Empty")
flagCheck = false
}
if date == ""{
errors <- fmt.Errorf("date is Empty")
flagCheck = false
}
if flagCheck ==true {
urlString := fileLink
ID:= strings.Split(urlString,"?")[1]
file:=File{
CategoryName: strings.TrimSpace(category),
ID: ID,
Title: title,
numberPage: numberPage,
numberViewed: numberViewed,
numberDownloaded: numberDownloaded,
Author: author,
Date: date,
}
//fileJson,err := json.Marshal(file)
//checkError(err)
files.TotalPages++
files.CategoryName = category
files.List = append(files.List,file)
results <- file
//fmt.Println(file)
}
}
func checkError(err error) {
if err != nil {
print("Error: " + err.Error())
log.Println(err)
}
}
//func main() {
// var wg sync.WaitGroup
// var category string
// category = "ss"
// files := newFiles()
// wg.Add(1)
// files.getFileInformation("https://hocmai.vn/kho-tai-lieu/read.php?id=14595",category,&wg)
// files.getNextUrl(getHTMLPage("https://hocmai.vn/kho-tai-lieu/list.php?category=244"))
//}