Counting different kinds of line terminator characters in files
Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
2
down vote
favorite
Can someone take a look if this code piece is ok. I'm most interested whether the goroutines are used correctly, but also general Go best practices.
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
)
type Endings struct
crlf, lf uint
func check(e error)
if e != nil
panic(e)
func getFileList(searchDir string) string
fileList := string
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
fileList = append(fileList, path)
return nil
)
return fileList
func getFileEndings(filename string, c chan Endings)
fileContent, err := ioutil.ReadFile(filename)
check(err)
c <- countEndings(string(fileContent))
func countEndings(s string) Endings
crlf := regexp.MustCompile("rn")
lf := regexp.MustCompile("n")
x := len(crlf.FindAllStringIndex(s, -1))
y := len(lf.FindAllStringIndex(s, -1))
return Endingsuint(x), uint(y - x)
func splitIntoChunks(arr string, chunkSize int) string
if chunkSize <= 0
panic("chunk size too small")
if len(arr) <= chunkSize
return stringarr
numChunks := int(len(arr)/chunkSize) + 1
chunks := make(string, numChunks)
for i := 0; i < numChunks; i++
l := i * chunkSize
u := l + chunkSize
if u > len(arr)
u = len(arr)
chunks[i] = arr[l:u]
return chunks
func main()
searchDir := os.Args[1]
c := make(chan Endings)
chunkSize := 1000
fileList := getFileList(searchDir)
count := Endings0, 0
for _, chunk := range splitIntoChunks(fileList, chunkSize)
for _, file := range chunk
go getFileEndings(file, c)
for _ = range chunk
result := <-c
count.crlf += result.crlf
count.lf += result.lf
fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
regex file go
add a comment |Â
up vote
2
down vote
favorite
Can someone take a look if this code piece is ok. I'm most interested whether the goroutines are used correctly, but also general Go best practices.
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
)
type Endings struct
crlf, lf uint
func check(e error)
if e != nil
panic(e)
func getFileList(searchDir string) string
fileList := string
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
fileList = append(fileList, path)
return nil
)
return fileList
func getFileEndings(filename string, c chan Endings)
fileContent, err := ioutil.ReadFile(filename)
check(err)
c <- countEndings(string(fileContent))
func countEndings(s string) Endings
crlf := regexp.MustCompile("rn")
lf := regexp.MustCompile("n")
x := len(crlf.FindAllStringIndex(s, -1))
y := len(lf.FindAllStringIndex(s, -1))
return Endingsuint(x), uint(y - x)
func splitIntoChunks(arr string, chunkSize int) string
if chunkSize <= 0
panic("chunk size too small")
if len(arr) <= chunkSize
return stringarr
numChunks := int(len(arr)/chunkSize) + 1
chunks := make(string, numChunks)
for i := 0; i < numChunks; i++
l := i * chunkSize
u := l + chunkSize
if u > len(arr)
u = len(arr)
chunks[i] = arr[l:u]
return chunks
func main()
searchDir := os.Args[1]
c := make(chan Endings)
chunkSize := 1000
fileList := getFileList(searchDir)
count := Endings0, 0
for _, chunk := range splitIntoChunks(fileList, chunkSize)
for _, file := range chunk
go getFileEndings(file, c)
for _ = range chunk
result := <-c
count.crlf += result.crlf
count.lf += result.lf
fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
regex file go
add a comment |Â
up vote
2
down vote
favorite
up vote
2
down vote
favorite
Can someone take a look if this code piece is ok. I'm most interested whether the goroutines are used correctly, but also general Go best practices.
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
)
type Endings struct
crlf, lf uint
func check(e error)
if e != nil
panic(e)
func getFileList(searchDir string) string
fileList := string
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
fileList = append(fileList, path)
return nil
)
return fileList
func getFileEndings(filename string, c chan Endings)
fileContent, err := ioutil.ReadFile(filename)
check(err)
c <- countEndings(string(fileContent))
func countEndings(s string) Endings
crlf := regexp.MustCompile("rn")
lf := regexp.MustCompile("n")
x := len(crlf.FindAllStringIndex(s, -1))
y := len(lf.FindAllStringIndex(s, -1))
return Endingsuint(x), uint(y - x)
func splitIntoChunks(arr string, chunkSize int) string
if chunkSize <= 0
panic("chunk size too small")
if len(arr) <= chunkSize
return stringarr
numChunks := int(len(arr)/chunkSize) + 1
chunks := make(string, numChunks)
for i := 0; i < numChunks; i++
l := i * chunkSize
u := l + chunkSize
if u > len(arr)
u = len(arr)
chunks[i] = arr[l:u]
return chunks
func main()
searchDir := os.Args[1]
c := make(chan Endings)
chunkSize := 1000
fileList := getFileList(searchDir)
count := Endings0, 0
for _, chunk := range splitIntoChunks(fileList, chunkSize)
for _, file := range chunk
go getFileEndings(file, c)
for _ = range chunk
result := <-c
count.crlf += result.crlf
count.lf += result.lf
fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
regex file go
Can someone take a look if this code piece is ok. I'm most interested whether the goroutines are used correctly, but also general Go best practices.
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
)
type Endings struct
crlf, lf uint
func check(e error)
if e != nil
panic(e)
func getFileList(searchDir string) string
fileList := string
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
fileList = append(fileList, path)
return nil
)
return fileList
func getFileEndings(filename string, c chan Endings)
fileContent, err := ioutil.ReadFile(filename)
check(err)
c <- countEndings(string(fileContent))
func countEndings(s string) Endings
crlf := regexp.MustCompile("rn")
lf := regexp.MustCompile("n")
x := len(crlf.FindAllStringIndex(s, -1))
y := len(lf.FindAllStringIndex(s, -1))
return Endingsuint(x), uint(y - x)
func splitIntoChunks(arr string, chunkSize int) string
if chunkSize <= 0
panic("chunk size too small")
if len(arr) <= chunkSize
return stringarr
numChunks := int(len(arr)/chunkSize) + 1
chunks := make(string, numChunks)
for i := 0; i < numChunks; i++
l := i * chunkSize
u := l + chunkSize
if u > len(arr)
u = len(arr)
chunks[i] = arr[l:u]
return chunks
func main()
searchDir := os.Args[1]
c := make(chan Endings)
chunkSize := 1000
fileList := getFileList(searchDir)
count := Endings0, 0
for _, chunk := range splitIntoChunks(fileList, chunkSize)
for _, file := range chunk
go getFileEndings(file, c)
for _ = range chunk
result := <-c
count.crlf += result.crlf
count.lf += result.lf
fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
regex file go
edited Mar 2 at 22:41
asked Mar 1 at 14:57
Chris
20516
20516
add a comment |Â
add a comment |Â
1 Answer
1
active
oldest
votes
up vote
1
down vote
accepted
Your program works fine and you are correctly using goroutines, but the code isn't very idiomatic.
Producer/Consumer architecture
What we have here is a typical producer/consumer scenario. The producer is filepath.Walk()
wich returns a list of files, and the consumers are goroutines executing regex
on file content.
So instead of the splitIntoChunks()
method, we could just have a chan string
: the producer sends file names to the channel, and the consumers loop over it and parse the file content.
This have two main advantages:
- code is easier to read
- producer and consumer can work in parallel
So the code would looks like
results := make(chan Endings, 2) // chan to get the results of each worker
var wg sync.WaitGroup
wg.Add(2)
files := make(chan string, 1000) // channel containing file list
for i := 0; i < 2; i++
go run(files, results, &wg) // start the producers
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
files <- path // feed the channel
return nil
)
close(files)
wg.Wait()
close(results)
Work on byte rather than on string
This is a general advice regarding performances: always prefer working on byte
instead of working on string
to avoid extra allocations.
The regex package has methods to work on string or on byte slice, so instead of
fileContent, err := ioutil.ReadFile(filename)
c <- countEndings(string(fileContent))
...
x := len(crlf.FindAllStringIndex(s, -1))
we can just use
fileContent, err := ioutil.ReadFile(fileName)
...
x := len(crlf.FindAllIndex(fileContent, -1))
Don't panic
The code shouldn't panic on every error. For example, if the user running the program don't have the permission to read a file in searchDir
, the program should not crash but rather log the error
so avoid method like this:
func check(e error)
if e != nil
panic(e)
instead handle the error localy:
fileContent, err := ioutil.ReadFile(fileName)
if err != nil
fmt.Printf("fail to read file %s: %vn", fileName, err)
Other consideration
It's better to define regex as global var:
var (
crlf = regexp.MustCompile("rn")
lf = regexp.MustCompile("n")
)
It's also better to use the flag
package to parse arguments instead of relying on os.Args
Here is the final version of the code:
package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"sync"
)
var (
crlf = regexp.MustCompile("rn")
lf = regexp.MustCompile("n")
)
type Endings struct
crlf, lf uint
func (e *Endings) countEndings(fileName string)
c, err := ioutil.ReadFile(fileName)
if err != nil
fmt.Printf("fail to read file %s: %vn", fileName, err)
x := len(crlf.FindAllIndex(c, -1))
y := len(lf.FindAllIndex(c, -1))
e.crlf += uint(x)
e.lf += uint(y - x)
func run(files <-chan string, results chan<- Endings, wg *sync.WaitGroup)
e := &Endings
for f := range files
e.countEndings(f)
results <- *e
wg.Done()
func main()
searchDir := flag.String("dir", "dir", "directory to search")
nWorker := flag.Int("n", 2, "number of worker")
flag.Parse()
results := make(chan Endings, *nWorker)
var wg sync.WaitGroup
wg.Add(*nWorker)
files := make(chan string, 1000)
for i := 0; i < *nWorker; i++
go run(files, results, &wg)
filepath.Walk(*searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
files <- path
return nil
)
close(files)
wg.Wait()
close(results)
count := &Endings
for e := range results
count.crlf += e.crlf
count.lf += e.lf
fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
Performance
The new code is slightly faster:
old:
$ time ./test ~/go/src/golang.org
crlf: 6231, lf: 1589462
./test ~/go/src/golang.org 1,78s user 0,23s system 167% cpu 1,201 total
new:
$ time ./test -dir ~/go/src/golang.org -n 6
crlf: 6231, lf: 1589462
./test -dir ~/go/src/golang.org -n 6 1,48s user 0,18s system 181% cpu 0,911 total
add a comment |Â
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
1
down vote
accepted
Your program works fine and you are correctly using goroutines, but the code isn't very idiomatic.
Producer/Consumer architecture
What we have here is a typical producer/consumer scenario. The producer is filepath.Walk()
wich returns a list of files, and the consumers are goroutines executing regex
on file content.
So instead of the splitIntoChunks()
method, we could just have a chan string
: the producer sends file names to the channel, and the consumers loop over it and parse the file content.
This have two main advantages:
- code is easier to read
- producer and consumer can work in parallel
So the code would looks like
results := make(chan Endings, 2) // chan to get the results of each worker
var wg sync.WaitGroup
wg.Add(2)
files := make(chan string, 1000) // channel containing file list
for i := 0; i < 2; i++
go run(files, results, &wg) // start the producers
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
files <- path // feed the channel
return nil
)
close(files)
wg.Wait()
close(results)
Work on byte rather than on string
This is a general advice regarding performances: always prefer working on byte
instead of working on string
to avoid extra allocations.
The regex package has methods to work on string or on byte slice, so instead of
fileContent, err := ioutil.ReadFile(filename)
c <- countEndings(string(fileContent))
...
x := len(crlf.FindAllStringIndex(s, -1))
we can just use
fileContent, err := ioutil.ReadFile(fileName)
...
x := len(crlf.FindAllIndex(fileContent, -1))
Don't panic
The code shouldn't panic on every error. For example, if the user running the program don't have the permission to read a file in searchDir
, the program should not crash but rather log the error
so avoid method like this:
func check(e error)
if e != nil
panic(e)
instead handle the error localy:
fileContent, err := ioutil.ReadFile(fileName)
if err != nil
fmt.Printf("fail to read file %s: %vn", fileName, err)
Other consideration
It's better to define regex as global var:
var (
crlf = regexp.MustCompile("rn")
lf = regexp.MustCompile("n")
)
It's also better to use the flag
package to parse arguments instead of relying on os.Args
Here is the final version of the code:
package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"sync"
)
var (
crlf = regexp.MustCompile("rn")
lf = regexp.MustCompile("n")
)
type Endings struct
crlf, lf uint
func (e *Endings) countEndings(fileName string)
c, err := ioutil.ReadFile(fileName)
if err != nil
fmt.Printf("fail to read file %s: %vn", fileName, err)
x := len(crlf.FindAllIndex(c, -1))
y := len(lf.FindAllIndex(c, -1))
e.crlf += uint(x)
e.lf += uint(y - x)
func run(files <-chan string, results chan<- Endings, wg *sync.WaitGroup)
e := &Endings
for f := range files
e.countEndings(f)
results <- *e
wg.Done()
func main()
searchDir := flag.String("dir", "dir", "directory to search")
nWorker := flag.Int("n", 2, "number of worker")
flag.Parse()
results := make(chan Endings, *nWorker)
var wg sync.WaitGroup
wg.Add(*nWorker)
files := make(chan string, 1000)
for i := 0; i < *nWorker; i++
go run(files, results, &wg)
filepath.Walk(*searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
files <- path
return nil
)
close(files)
wg.Wait()
close(results)
count := &Endings
for e := range results
count.crlf += e.crlf
count.lf += e.lf
fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
Performance
The new code is slightly faster:
old:
$ time ./test ~/go/src/golang.org
crlf: 6231, lf: 1589462
./test ~/go/src/golang.org 1,78s user 0,23s system 167% cpu 1,201 total
new:
$ time ./test -dir ~/go/src/golang.org -n 6
crlf: 6231, lf: 1589462
./test -dir ~/go/src/golang.org -n 6 1,48s user 0,18s system 181% cpu 0,911 total
add a comment |Â
up vote
1
down vote
accepted
Your program works fine and you are correctly using goroutines, but the code isn't very idiomatic.
Producer/Consumer architecture
What we have here is a typical producer/consumer scenario. The producer is filepath.Walk()
wich returns a list of files, and the consumers are goroutines executing regex
on file content.
So instead of the splitIntoChunks()
method, we could just have a chan string
: the producer sends file names to the channel, and the consumers loop over it and parse the file content.
This have two main advantages:
- code is easier to read
- producer and consumer can work in parallel
So the code would looks like
results := make(chan Endings, 2) // chan to get the results of each worker
var wg sync.WaitGroup
wg.Add(2)
files := make(chan string, 1000) // channel containing file list
for i := 0; i < 2; i++
go run(files, results, &wg) // start the producers
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
files <- path // feed the channel
return nil
)
close(files)
wg.Wait()
close(results)
Work on byte rather than on string
This is a general advice regarding performances: always prefer working on byte
instead of working on string
to avoid extra allocations.
The regex package has methods to work on string or on byte slice, so instead of
fileContent, err := ioutil.ReadFile(filename)
c <- countEndings(string(fileContent))
...
x := len(crlf.FindAllStringIndex(s, -1))
we can just use
fileContent, err := ioutil.ReadFile(fileName)
...
x := len(crlf.FindAllIndex(fileContent, -1))
Don't panic
The code shouldn't panic on every error. For example, if the user running the program don't have the permission to read a file in searchDir
, the program should not crash but rather log the error
so avoid method like this:
func check(e error)
if e != nil
panic(e)
instead handle the error localy:
fileContent, err := ioutil.ReadFile(fileName)
if err != nil
fmt.Printf("fail to read file %s: %vn", fileName, err)
Other consideration
It's better to define regex as global var:
var (
crlf = regexp.MustCompile("rn")
lf = regexp.MustCompile("n")
)
It's also better to use the flag
package to parse arguments instead of relying on os.Args
Here is the final version of the code:
package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"sync"
)
var (
crlf = regexp.MustCompile("rn")
lf = regexp.MustCompile("n")
)
type Endings struct
crlf, lf uint
func (e *Endings) countEndings(fileName string)
c, err := ioutil.ReadFile(fileName)
if err != nil
fmt.Printf("fail to read file %s: %vn", fileName, err)
x := len(crlf.FindAllIndex(c, -1))
y := len(lf.FindAllIndex(c, -1))
e.crlf += uint(x)
e.lf += uint(y - x)
func run(files <-chan string, results chan<- Endings, wg *sync.WaitGroup)
e := &Endings
for f := range files
e.countEndings(f)
results <- *e
wg.Done()
func main()
searchDir := flag.String("dir", "dir", "directory to search")
nWorker := flag.Int("n", 2, "number of worker")
flag.Parse()
results := make(chan Endings, *nWorker)
var wg sync.WaitGroup
wg.Add(*nWorker)
files := make(chan string, 1000)
for i := 0; i < *nWorker; i++
go run(files, results, &wg)
filepath.Walk(*searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
files <- path
return nil
)
close(files)
wg.Wait()
close(results)
count := &Endings
for e := range results
count.crlf += e.crlf
count.lf += e.lf
fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
Performance
The new code is slightly faster:
old:
$ time ./test ~/go/src/golang.org
crlf: 6231, lf: 1589462
./test ~/go/src/golang.org 1,78s user 0,23s system 167% cpu 1,201 total
new:
$ time ./test -dir ~/go/src/golang.org -n 6
crlf: 6231, lf: 1589462
./test -dir ~/go/src/golang.org -n 6 1,48s user 0,18s system 181% cpu 0,911 total
add a comment |Â
up vote
1
down vote
accepted
up vote
1
down vote
accepted
Your program works fine and you are correctly using goroutines, but the code isn't very idiomatic.
Producer/Consumer architecture
What we have here is a typical producer/consumer scenario. The producer is filepath.Walk()
wich returns a list of files, and the consumers are goroutines executing regex
on file content.
So instead of the splitIntoChunks()
method, we could just have a chan string
: the producer sends file names to the channel, and the consumers loop over it and parse the file content.
This have two main advantages:
- code is easier to read
- producer and consumer can work in parallel
So the code would looks like
results := make(chan Endings, 2) // chan to get the results of each worker
var wg sync.WaitGroup
wg.Add(2)
files := make(chan string, 1000) // channel containing file list
for i := 0; i < 2; i++
go run(files, results, &wg) // start the producers
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
files <- path // feed the channel
return nil
)
close(files)
wg.Wait()
close(results)
Work on byte rather than on string
This is a general advice regarding performances: always prefer working on byte
instead of working on string
to avoid extra allocations.
The regex package has methods to work on string or on byte slice, so instead of
fileContent, err := ioutil.ReadFile(filename)
c <- countEndings(string(fileContent))
...
x := len(crlf.FindAllStringIndex(s, -1))
we can just use
fileContent, err := ioutil.ReadFile(fileName)
...
x := len(crlf.FindAllIndex(fileContent, -1))
Don't panic
The code shouldn't panic on every error. For example, if the user running the program don't have the permission to read a file in searchDir
, the program should not crash but rather log the error
so avoid method like this:
func check(e error)
if e != nil
panic(e)
instead handle the error localy:
fileContent, err := ioutil.ReadFile(fileName)
if err != nil
fmt.Printf("fail to read file %s: %vn", fileName, err)
Other consideration
It's better to define regex as global var:
var (
crlf = regexp.MustCompile("rn")
lf = regexp.MustCompile("n")
)
It's also better to use the flag
package to parse arguments instead of relying on os.Args
Here is the final version of the code:
package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"sync"
)
var (
crlf = regexp.MustCompile("rn")
lf = regexp.MustCompile("n")
)
type Endings struct
crlf, lf uint
func (e *Endings) countEndings(fileName string)
c, err := ioutil.ReadFile(fileName)
if err != nil
fmt.Printf("fail to read file %s: %vn", fileName, err)
x := len(crlf.FindAllIndex(c, -1))
y := len(lf.FindAllIndex(c, -1))
e.crlf += uint(x)
e.lf += uint(y - x)
func run(files <-chan string, results chan<- Endings, wg *sync.WaitGroup)
e := &Endings
for f := range files
e.countEndings(f)
results <- *e
wg.Done()
func main()
searchDir := flag.String("dir", "dir", "directory to search")
nWorker := flag.Int("n", 2, "number of worker")
flag.Parse()
results := make(chan Endings, *nWorker)
var wg sync.WaitGroup
wg.Add(*nWorker)
files := make(chan string, 1000)
for i := 0; i < *nWorker; i++
go run(files, results, &wg)
filepath.Walk(*searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
files <- path
return nil
)
close(files)
wg.Wait()
close(results)
count := &Endings
for e := range results
count.crlf += e.crlf
count.lf += e.lf
fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
Performance
The new code is slightly faster:
old:
$ time ./test ~/go/src/golang.org
crlf: 6231, lf: 1589462
./test ~/go/src/golang.org 1,78s user 0,23s system 167% cpu 1,201 total
new:
$ time ./test -dir ~/go/src/golang.org -n 6
crlf: 6231, lf: 1589462
./test -dir ~/go/src/golang.org -n 6 1,48s user 0,18s system 181% cpu 0,911 total
Your program works fine and you are correctly using goroutines, but the code isn't very idiomatic.
Producer/Consumer architecture
What we have here is a typical producer/consumer scenario. The producer is filepath.Walk()
wich returns a list of files, and the consumers are goroutines executing regex
on file content.
So instead of the splitIntoChunks()
method, we could just have a chan string
: the producer sends file names to the channel, and the consumers loop over it and parse the file content.
This have two main advantages:
- code is easier to read
- producer and consumer can work in parallel
So the code would looks like
results := make(chan Endings, 2) // chan to get the results of each worker
var wg sync.WaitGroup
wg.Add(2)
files := make(chan string, 1000) // channel containing file list
for i := 0; i < 2; i++
go run(files, results, &wg) // start the producers
filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
files <- path // feed the channel
return nil
)
close(files)
wg.Wait()
close(results)
Work on byte rather than on string
This is a general advice regarding performances: always prefer working on byte
instead of working on string
to avoid extra allocations.
The regex package has methods to work on string or on byte slice, so instead of
fileContent, err := ioutil.ReadFile(filename)
c <- countEndings(string(fileContent))
...
x := len(crlf.FindAllStringIndex(s, -1))
we can just use
fileContent, err := ioutil.ReadFile(fileName)
...
x := len(crlf.FindAllIndex(fileContent, -1))
Don't panic
The code shouldn't panic on every error. For example, if the user running the program don't have the permission to read a file in searchDir
, the program should not crash but rather log the error
so avoid method like this:
func check(e error)
if e != nil
panic(e)
instead handle the error localy:
fileContent, err := ioutil.ReadFile(fileName)
if err != nil
fmt.Printf("fail to read file %s: %vn", fileName, err)
Other consideration
It's better to define regex as global var:
var (
crlf = regexp.MustCompile("rn")
lf = regexp.MustCompile("n")
)
It's also better to use the flag
package to parse arguments instead of relying on os.Args
Here is the final version of the code:
package main
import (
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"sync"
)
var (
crlf = regexp.MustCompile("rn")
lf = regexp.MustCompile("n")
)
type Endings struct
crlf, lf uint
func (e *Endings) countEndings(fileName string)
c, err := ioutil.ReadFile(fileName)
if err != nil
fmt.Printf("fail to read file %s: %vn", fileName, err)
x := len(crlf.FindAllIndex(c, -1))
y := len(lf.FindAllIndex(c, -1))
e.crlf += uint(x)
e.lf += uint(y - x)
func run(files <-chan string, results chan<- Endings, wg *sync.WaitGroup)
e := &Endings
for f := range files
e.countEndings(f)
results <- *e
wg.Done()
func main()
searchDir := flag.String("dir", "dir", "directory to search")
nWorker := flag.Int("n", 2, "number of worker")
flag.Parse()
results := make(chan Endings, *nWorker)
var wg sync.WaitGroup
wg.Add(*nWorker)
files := make(chan string, 1000)
for i := 0; i < *nWorker; i++
go run(files, results, &wg)
filepath.Walk(*searchDir, func(path string, f os.FileInfo, err error) error
if !f.IsDir()
files <- path
return nil
)
close(files)
wg.Wait()
close(results)
count := &Endings
for e := range results
count.crlf += e.crlf
count.lf += e.lf
fmt.Printf("crlf: %d, lf: %dn", count.crlf, count.lf)
Performance
The new code is slightly faster:
old:
$ time ./test ~/go/src/golang.org
crlf: 6231, lf: 1589462
./test ~/go/src/golang.org 1,78s user 0,23s system 167% cpu 1,201 total
new:
$ time ./test -dir ~/go/src/golang.org -n 6
crlf: 6231, lf: 1589462
./test -dir ~/go/src/golang.org -n 6 1,48s user 0,18s system 181% cpu 0,911 total
edited Mar 21 at 6:47
answered Mar 20 at 10:59
felix
67829
67829
add a comment |Â
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f188616%2fcounting-different-kinds-of-line-terminator-characters-in-files%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password