csv 3.3.0 → 3.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +45 -0
- data/doc/csv/recipes/filtering.rdoc +85 -17
- data/doc/csv/recipes/generating.rdoc +1 -1
- data/doc/csv/recipes/parsing.rdoc +12 -3
- data/lib/csv/core_ext/array.rb +1 -1
- data/lib/csv/core_ext/string.rb +1 -1
- data/lib/csv/fields_converter.rb +8 -1
- data/lib/csv/parser.rb +16 -21
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +1 -2
- data/lib/csv.rb +152 -17
- metadata +7 -60
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 810bc9a744fd02ede6fd069e5fa11c42307533f070f096b0614466a62c37bf1d
|
4
|
+
data.tar.gz: a69e61cc7f0e81f00a8e1d22a33bd90e43e7859444c866393dda01bb61426630
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 69d5f90a13cb9f4411a2d4b127211cabd060889f3f6f55de72061cda4994b62160ca264a6e7601639f056d1c0a148120a24c4b1385e10e66e2425adb01d7be73
|
7
|
+
data.tar.gz: b0d2bf9c262db845b5a8f7af157ef127d5856585af3bb0ea1dd420b2aee114ad6cdf870f246f6f6a80ebaac30634cab3745fe37669799e559d0dfb89e2d7b3ed
|
data/NEWS.md
CHANGED
@@ -1,5 +1,50 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.3.1 - 2024-12-15
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* `CSV.open`: Changed to detect BOM by default. Note that this isn't
|
8
|
+
enabled on Windows because Ruby may have a bug. See also:
|
9
|
+
https://bugs.ruby-lang.org/issues/20526
|
10
|
+
* GH-301
|
11
|
+
* Reported by Junichi Ito
|
12
|
+
|
13
|
+
* Improved performance.
|
14
|
+
* GH-311
|
15
|
+
* GH-312
|
16
|
+
* Patch by Vladimir Kochnev
|
17
|
+
|
18
|
+
* `CSV.open`: Added support for `StringIO` as an input.
|
19
|
+
* GH-300
|
20
|
+
* GH-302
|
21
|
+
* Patch by Marcelo
|
22
|
+
|
23
|
+
* Added a built-in time converter. You can use it by `converters:
|
24
|
+
:time`.
|
25
|
+
* GH-313
|
26
|
+
* Patch by Bart de Water
|
27
|
+
|
28
|
+
* Added `CSV::TSV` for tab-separated values.
|
29
|
+
* GH-272
|
30
|
+
* GH-319
|
31
|
+
* Reported by kojix2
|
32
|
+
* Patch by Jas
|
33
|
+
|
34
|
+
### Thanks
|
35
|
+
|
36
|
+
* Junichi Ito
|
37
|
+
|
38
|
+
* Vladimir Kochnev
|
39
|
+
|
40
|
+
* Marcelo
|
41
|
+
|
42
|
+
* Bart de Water
|
43
|
+
|
44
|
+
* kojix2
|
45
|
+
|
46
|
+
* Jas
|
47
|
+
|
3
48
|
## 3.3.0 - 2024-03-22
|
4
49
|
|
5
50
|
### Fixes
|
@@ -11,16 +11,20 @@ All code snippets on this page assume that the following has been executed:
|
|
11
11
|
|
12
12
|
- {Source and Output Formats}[#label-Source+and+Output+Formats]
|
13
13
|
- {Filtering String to String}[#label-Filtering+String+to+String]
|
14
|
-
- {Recipe: Filter String to String
|
14
|
+
- {Recipe: Filter String to String parsing Headers}[#label-Recipe-3A+Filter+String+to+String+parsing+Headers]
|
15
|
+
- {Recipe: Filter String to String parsing and writing Headers}[#label-Recipe-3A+Filter+String+to+String+parsing+and+writing+Headers]
|
15
16
|
- {Recipe: Filter String to String Without Headers}[#label-Recipe-3A+Filter+String+to+String+Without+Headers]
|
16
17
|
- {Filtering String to IO Stream}[#label-Filtering+String+to+IO+Stream]
|
17
|
-
- {Recipe: Filter String to IO Stream
|
18
|
+
- {Recipe: Filter String to IO Stream parsing Headers}[#label-Recipe-3A+Filter+String+to+IO+Stream+parsing+Headers]
|
19
|
+
- {Recipe: Filter String to IO Stream parsing and writing Headers}[#label-Recipe-3A+Filter+String+to+IO+Stream+parsing+and+writing+Headers]
|
18
20
|
- {Recipe: Filter String to IO Stream Without Headers}[#label-Recipe-3A+Filter+String+to+IO+Stream+Without+Headers]
|
19
21
|
- {Filtering IO Stream to String}[#label-Filtering+IO+Stream+to+String]
|
20
|
-
- {Recipe: Filter IO Stream to String
|
22
|
+
- {Recipe: Filter IO Stream to String parsing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+String+parsing+Headers]
|
23
|
+
- {Recipe: Filter IO Stream to String parsing and writing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+String+parsing+and+writing+Headers]
|
21
24
|
- {Recipe: Filter IO Stream to String Without Headers}[#label-Recipe-3A+Filter+IO+Stream+to+String+Without+Headers]
|
22
25
|
- {Filtering IO Stream to IO Stream}[#label-Filtering+IO+Stream+to+IO+Stream]
|
23
|
-
- {Recipe: Filter IO Stream to IO Stream
|
26
|
+
- {Recipe: Filter IO Stream to IO Stream parsing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+IO+Stream+parsing+Headers]
|
27
|
+
- {Recipe: Filter IO Stream to IO Stream parsing and writing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+IO+Stream+parsing+and+writing+Headers]
|
24
28
|
- {Recipe: Filter IO Stream to IO Stream Without Headers}[#label-Recipe-3A+Filter+IO+Stream+to+IO+Stream+Without+Headers]
|
25
29
|
|
26
30
|
=== Source and Output Formats
|
@@ -33,14 +37,27 @@ The input and output \CSV data may be any mixture of \Strings and \IO streams.
|
|
33
37
|
|
34
38
|
You can filter one \String to another, with or without headers.
|
35
39
|
|
36
|
-
===== Recipe: Filter \String to \String
|
40
|
+
===== Recipe: Filter \String to \String parsing Headers
|
37
41
|
|
38
42
|
Use class method CSV.filter with option +headers+ to filter a \String to another \String:
|
39
43
|
in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
40
44
|
out_string = ''
|
41
45
|
CSV.filter(in_string, out_string, headers: true) do |row|
|
42
|
-
row[
|
43
|
-
row[
|
46
|
+
row['Name'] = row['Name'].upcase
|
47
|
+
row['Value'] *= 4
|
48
|
+
end
|
49
|
+
out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n"
|
50
|
+
|
51
|
+
===== Recipe: Filter \String to \String parsing and writing Headers
|
52
|
+
|
53
|
+
Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter a \String to another \String including header row:
|
54
|
+
in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
55
|
+
out_string = ''
|
56
|
+
CSV.filter(in_string, out_string, headers: true, out_write_headers: true) do |row|
|
57
|
+
unless row.is_a?(Array)
|
58
|
+
row['Name'] = row['Name'].upcase
|
59
|
+
row['Value'] *= 4
|
60
|
+
end
|
44
61
|
end
|
45
62
|
out_string # => "Name,Value\nFOO,0000\nBAR,1111\nBAZ,2222\n"
|
46
63
|
|
@@ -59,15 +76,30 @@ Use class method CSV.filter without option +headers+ to filter a \String to anot
|
|
59
76
|
|
60
77
|
You can filter a \String to an \IO stream, with or without headers.
|
61
78
|
|
62
|
-
===== Recipe: Filter \String to \IO Stream
|
79
|
+
===== Recipe: Filter \String to \IO Stream parsing Headers
|
63
80
|
|
64
81
|
Use class method CSV.filter with option +headers+ to filter a \String to an \IO stream:
|
65
82
|
in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
66
83
|
path = 't.csv'
|
67
84
|
File.open(path, 'w') do |out_io|
|
68
85
|
CSV.filter(in_string, out_io, headers: true) do |row|
|
69
|
-
row[
|
70
|
-
row[
|
86
|
+
row['Name'] = row['Name'].upcase
|
87
|
+
row['Value'] *= 4
|
88
|
+
end
|
89
|
+
end
|
90
|
+
p File.read(path) # => "FOO,0000\nBAR,1111\nBAZ,2222\n"
|
91
|
+
|
92
|
+
===== Recipe: Filter \String to \IO Stream parsing and writing Headers
|
93
|
+
|
94
|
+
Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter a \String to an \IO stream including header row:
|
95
|
+
in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
96
|
+
path = 't.csv'
|
97
|
+
File.open(path, 'w') do |out_io|
|
98
|
+
CSV.filter(in_string, out_io, headers: true, out_write_headers: true ) do |row|
|
99
|
+
unless row.is_a?(Array)
|
100
|
+
row['Name'] = row['Name'].upcase
|
101
|
+
row['Value'] *= 4
|
102
|
+
end
|
71
103
|
end
|
72
104
|
end
|
73
105
|
p File.read(path) # => "Name,Value\nFOO,0000\nBAR,1111\nBAZ,2222\n"
|
@@ -89,17 +121,34 @@ Use class method CSV.filter without option +headers+ to filter a \String to an \
|
|
89
121
|
|
90
122
|
You can filter an \IO stream to a \String, with or without headers.
|
91
123
|
|
92
|
-
===== Recipe: Filter \IO Stream to \String
|
124
|
+
===== Recipe: Filter \IO Stream to \String parsing Headers
|
93
125
|
|
94
126
|
Use class method CSV.filter with option +headers+ to filter an \IO stream to a \String:
|
95
127
|
in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
96
128
|
path = 't.csv'
|
97
129
|
File.write(path, in_string)
|
98
130
|
out_string = ''
|
99
|
-
File.open(path
|
131
|
+
File.open(path) do |in_io|
|
100
132
|
CSV.filter(in_io, out_string, headers: true) do |row|
|
101
|
-
row[
|
102
|
-
row[
|
133
|
+
row['Name'] = row['Name'].upcase
|
134
|
+
row['Value'] *= 4
|
135
|
+
end
|
136
|
+
end
|
137
|
+
out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n"
|
138
|
+
|
139
|
+
===== Recipe: Filter \IO Stream to \String parsing and writing Headers
|
140
|
+
|
141
|
+
Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter an \IO stream to a \String including header row:
|
142
|
+
in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
143
|
+
path = 't.csv'
|
144
|
+
File.write(path, in_string)
|
145
|
+
out_string = ''
|
146
|
+
File.open(path) do |in_io|
|
147
|
+
CSV.filter(in_io, out_string, headers: true, out_write_headers: true) do |row|
|
148
|
+
unless row.is_a?(Array)
|
149
|
+
row['Name'] = row['Name'].upcase
|
150
|
+
row['Value'] *= 4
|
151
|
+
end
|
103
152
|
end
|
104
153
|
end
|
105
154
|
out_string # => "Name,Value\nFOO,0000\nBAR,1111\nBAZ,2222\n"
|
@@ -123,7 +172,7 @@ Use class method CSV.filter without option +headers+ to filter an \IO stream to
|
|
123
172
|
|
124
173
|
You can filter an \IO stream to another \IO stream, with or without headers.
|
125
174
|
|
126
|
-
===== Recipe: Filter \IO Stream to \IO Stream
|
175
|
+
===== Recipe: Filter \IO Stream to \IO Stream parsing Headers
|
127
176
|
|
128
177
|
Use class method CSV.filter with option +headers+ to filter an \IO stream to another \IO stream:
|
129
178
|
in_path = 't.csv'
|
@@ -133,8 +182,27 @@ Use class method CSV.filter with option +headers+ to filter an \IO stream to ano
|
|
133
182
|
File.open(in_path) do |in_io|
|
134
183
|
File.open(out_path, 'w') do |out_io|
|
135
184
|
CSV.filter(in_io, out_io, headers: true) do |row|
|
136
|
-
row[
|
137
|
-
row[
|
185
|
+
row['Name'] = row['Name'].upcase
|
186
|
+
row['Value'] *= 4
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
p File.read(out_path) # => "FOO,0000\nBAR,1111\nBAZ,2222\n"
|
191
|
+
|
192
|
+
===== Recipe: Filter \IO Stream to \IO Stream parsing and writing Headers
|
193
|
+
|
194
|
+
Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter an \IO stream to another \IO stream including header row:
|
195
|
+
in_path = 't.csv'
|
196
|
+
in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
197
|
+
File.write(in_path, in_string)
|
198
|
+
out_path = 'u.csv'
|
199
|
+
File.open(in_path) do |in_io|
|
200
|
+
File.open(out_path, 'w') do |out_io|
|
201
|
+
CSV.filter(in_io, out_io, headers: true, out_write_headers: true) do |row|
|
202
|
+
unless row.is_a?(Array)
|
203
|
+
row['Name'] = row['Name'].upcase
|
204
|
+
row['Value'] *= 4
|
205
|
+
end
|
138
206
|
end
|
139
207
|
end
|
140
208
|
end
|
@@ -165,7 +165,7 @@ This example defines and uses two custom write converters to strip and upcase ge
|
|
165
165
|
=== RFC 4180 Compliance
|
166
166
|
|
167
167
|
By default, \CSV generates data that is compliant with
|
168
|
-
{RFC 4180}[https://
|
168
|
+
{RFC 4180}[https://www.rfc-editor.org/rfc/rfc4180]
|
169
169
|
with respect to:
|
170
170
|
- Column separator.
|
171
171
|
- Quote character.
|
@@ -45,6 +45,7 @@ All code snippets on this page assume that the following has been executed:
|
|
45
45
|
- {Recipe: Convert Fields to Numerics}[#label-Recipe-3A+Convert+Fields+to+Numerics]
|
46
46
|
- {Recipe: Convert Fields to Dates}[#label-Recipe-3A+Convert+Fields+to+Dates]
|
47
47
|
- {Recipe: Convert Fields to DateTimes}[#label-Recipe-3A+Convert+Fields+to+DateTimes]
|
48
|
+
- {Recipe: Convert Fields to Times}[#label-Recipe-3A+Convert+Fields+to+Times]
|
48
49
|
- {Recipe: Convert Assorted Fields to Objects}[#label-Recipe-3A+Convert+Assorted+Fields+to+Objects]
|
49
50
|
- {Recipe: Convert Fields to Other Objects}[#label-Recipe-3A+Convert+Fields+to+Other+Objects]
|
50
51
|
- {Recipe: Filter Field Strings}[#label-Recipe-3A+Filter+Field+Strings]
|
@@ -110,7 +111,7 @@ You can parse \CSV data from a \File, with or without headers.
|
|
110
111
|
|
111
112
|
===== Recipe: Parse from \File with Headers
|
112
113
|
|
113
|
-
Use
|
114
|
+
Use class method CSV.read with option +headers+ to read a file all at once:
|
114
115
|
string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
115
116
|
path = 't.csv'
|
116
117
|
File.write(path, string)
|
@@ -191,7 +192,7 @@ Output:
|
|
191
192
|
=== RFC 4180 Compliance
|
192
193
|
|
193
194
|
By default, \CSV parses data that is compliant with
|
194
|
-
{RFC 4180}[https://
|
195
|
+
{RFC 4180}[https://www.rfc-editor.org/rfc/rfc4180]
|
195
196
|
with respect to:
|
196
197
|
- Row separator.
|
197
198
|
- Column separator.
|
@@ -339,6 +340,7 @@ There are built-in field converters for converting to objects of certain classes
|
|
339
340
|
- \Integer
|
340
341
|
- \Date
|
341
342
|
- \DateTime
|
343
|
+
- \Time
|
342
344
|
|
343
345
|
Other built-in field converters include:
|
344
346
|
- +:numeric+: converts to \Integer and \Float.
|
@@ -381,6 +383,13 @@ Convert fields to \DateTime objects using built-in converter +:date_time+:
|
|
381
383
|
parsed = CSV.parse(source, headers: true, converters: :date_time)
|
382
384
|
parsed.map {|row| row['DateTime'].class} # => [DateTime, DateTime, DateTime]
|
383
385
|
|
386
|
+
===== Recipe: Convert Fields to Times
|
387
|
+
|
388
|
+
Convert fields to \Time objects using built-in converter +:time+:
|
389
|
+
source = "Name,Time\nfoo,2001-02-03\nbar,2001-02-04\nbaz,2020-05-07T14:59:00-05:00\n"
|
390
|
+
parsed = CSV.parse(source, headers: true, converters: :time)
|
391
|
+
parsed.map {|row| row['Time'].class} # => [Time, Time, Time]
|
392
|
+
|
384
393
|
===== Recipe: Convert Assorted Fields to Objects
|
385
394
|
|
386
395
|
Convert assorted fields to objects using built-in converter +:all+:
|
@@ -542,4 +551,4 @@ Output:
|
|
542
551
|
#<struct CSV::FieldInfo index=0, line=2, header=nil>
|
543
552
|
#<struct CSV::FieldInfo index=1, line=2, header=nil>
|
544
553
|
#<struct CSV::FieldInfo index=0, line=3, header=nil>
|
545
|
-
#<struct CSV::FieldInfo index=1, line=3, header=nil>
|
554
|
+
#<struct CSV::FieldInfo index=1, line=3, header=nil>
|
data/lib/csv/core_ext/array.rb
CHANGED
data/lib/csv/core_ext/string.rb
CHANGED
data/lib/csv/fields_converter.rb
CHANGED
@@ -4,6 +4,13 @@ class CSV
|
|
4
4
|
# Note: Don't use this class directly. This is an internal class.
|
5
5
|
class FieldsConverter
|
6
6
|
include Enumerable
|
7
|
+
|
8
|
+
NO_QUOTED_FIELDS = [] # :nodoc:
|
9
|
+
def NO_QUOTED_FIELDS.[](_index)
|
10
|
+
false
|
11
|
+
end
|
12
|
+
NO_QUOTED_FIELDS.freeze
|
13
|
+
|
7
14
|
#
|
8
15
|
# A CSV::FieldsConverter is a data structure for storing the
|
9
16
|
# fields converter properties to be passed as a parameter
|
@@ -44,7 +51,7 @@ class CSV
|
|
44
51
|
@converters.empty?
|
45
52
|
end
|
46
53
|
|
47
|
-
def convert(fields, headers, lineno, quoted_fields)
|
54
|
+
def convert(fields, headers, lineno, quoted_fields=NO_QUOTED_FIELDS)
|
48
55
|
return fields unless need_convert?
|
49
56
|
|
50
57
|
fields.collect.with_index do |field, index|
|
data/lib/csv/parser.rb
CHANGED
@@ -409,13 +409,7 @@ class CSV
|
|
409
409
|
|
410
410
|
begin
|
411
411
|
@scanner ||= build_scanner
|
412
|
-
|
413
|
-
parse_no_quote(&block)
|
414
|
-
elsif @need_robust_parsing
|
415
|
-
parse_quotable_robust(&block)
|
416
|
-
else
|
417
|
-
parse_quotable_loose(&block)
|
418
|
-
end
|
412
|
+
__send__(@parse_method, &block)
|
419
413
|
rescue InvalidEncoding
|
420
414
|
if @scanner
|
421
415
|
ignore_broken_line
|
@@ -459,7 +453,6 @@ class CSV
|
|
459
453
|
end
|
460
454
|
|
461
455
|
def prepare_variable
|
462
|
-
@need_robust_parsing = false
|
463
456
|
@encoding = @options[:encoding]
|
464
457
|
liberal_parsing = @options[:liberal_parsing]
|
465
458
|
if liberal_parsing
|
@@ -472,7 +465,6 @@ class CSV
|
|
472
465
|
@double_quote_outside_quote = false
|
473
466
|
@backslash_quote = false
|
474
467
|
end
|
475
|
-
@need_robust_parsing = true
|
476
468
|
else
|
477
469
|
@liberal_parsing = false
|
478
470
|
@backslash_quote = false
|
@@ -554,7 +546,6 @@ class CSV
|
|
554
546
|
@rstrip_value = Regexp.new(@escaped_strip +
|
555
547
|
"+\\z".encode(@encoding))
|
556
548
|
end
|
557
|
-
@need_robust_parsing = true
|
558
549
|
elsif @strip
|
559
550
|
strip_values = " \t\f\v"
|
560
551
|
@escaped_strip = strip_values.encode(@encoding)
|
@@ -562,7 +553,6 @@ class CSV
|
|
562
553
|
@strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
|
563
554
|
@rstrip_value = Regexp.new("[#{strip_values}]+\\z".encode(@encoding))
|
564
555
|
end
|
565
|
-
@need_robust_parsing = true
|
566
556
|
end
|
567
557
|
end
|
568
558
|
|
@@ -767,7 +757,7 @@ class CSV
|
|
767
757
|
case headers
|
768
758
|
when Array
|
769
759
|
@raw_headers = headers
|
770
|
-
quoted_fields =
|
760
|
+
quoted_fields = FieldsConverter::NO_QUOTED_FIELDS
|
771
761
|
@use_headers = true
|
772
762
|
when String
|
773
763
|
@raw_headers, quoted_fields = parse_headers(headers)
|
@@ -808,6 +798,13 @@ class CSV
|
|
808
798
|
|
809
799
|
def prepare_parser
|
810
800
|
@may_quoted = may_quoted?
|
801
|
+
if @quote_character.nil?
|
802
|
+
@parse_method = :parse_no_quote
|
803
|
+
elsif @liberal_parsing or @strip
|
804
|
+
@parse_method = :parse_quotable_robust
|
805
|
+
else
|
806
|
+
@parse_method = :parse_quotable_loose
|
807
|
+
end
|
811
808
|
end
|
812
809
|
|
813
810
|
def may_quoted?
|
@@ -944,11 +941,9 @@ class CSV
|
|
944
941
|
if line.empty?
|
945
942
|
next if @skip_blanks
|
946
943
|
row = []
|
947
|
-
quoted_fields = []
|
948
944
|
else
|
949
945
|
line = strip_value(line)
|
950
946
|
row = line.split(@split_column_separator, -1)
|
951
|
-
quoted_fields = [false] * row.size
|
952
947
|
if @max_field_size
|
953
948
|
row.each do |column|
|
954
949
|
validate_field_size(column)
|
@@ -962,7 +957,7 @@ class CSV
|
|
962
957
|
end
|
963
958
|
end
|
964
959
|
@last_line = original_line
|
965
|
-
emit_row(row,
|
960
|
+
emit_row(row, &block)
|
966
961
|
end
|
967
962
|
end
|
968
963
|
|
@@ -984,10 +979,10 @@ class CSV
|
|
984
979
|
next
|
985
980
|
end
|
986
981
|
row = []
|
987
|
-
quoted_fields =
|
982
|
+
quoted_fields = FieldsConverter::NO_QUOTED_FIELDS
|
988
983
|
elsif line.include?(@cr) or line.include?(@lf)
|
989
984
|
@scanner.keep_back
|
990
|
-
@
|
985
|
+
@parse_method = :parse_quotable_robust
|
991
986
|
return parse_quotable_robust(&block)
|
992
987
|
else
|
993
988
|
row = line.split(@split_column_separator, -1)
|
@@ -1011,7 +1006,7 @@ class CSV
|
|
1011
1006
|
row[i] = column[1..-2]
|
1012
1007
|
else
|
1013
1008
|
@scanner.keep_back
|
1014
|
-
@
|
1009
|
+
@parse_method = :parse_quotable_robust
|
1015
1010
|
return parse_quotable_robust(&block)
|
1016
1011
|
end
|
1017
1012
|
validate_field_size(row[i])
|
@@ -1046,13 +1041,13 @@ class CSV
|
|
1046
1041
|
quoted_fields << @quoted_column_value
|
1047
1042
|
elsif parse_row_end
|
1048
1043
|
if row.empty? and value.nil?
|
1049
|
-
emit_row(
|
1044
|
+
emit_row(row, &block) unless @skip_blanks
|
1050
1045
|
else
|
1051
1046
|
row << value
|
1052
1047
|
quoted_fields << @quoted_column_value
|
1053
1048
|
emit_row(row, quoted_fields, &block)
|
1054
1049
|
row = []
|
1055
|
-
quoted_fields
|
1050
|
+
quoted_fields.clear
|
1056
1051
|
end
|
1057
1052
|
skip_needless_lines
|
1058
1053
|
start_row
|
@@ -1257,7 +1252,7 @@ class CSV
|
|
1257
1252
|
@scanner.keep_start
|
1258
1253
|
end
|
1259
1254
|
|
1260
|
-
def emit_row(row, quoted_fields, &block)
|
1255
|
+
def emit_row(row, quoted_fields=FieldsConverter::NO_QUOTED_FIELDS, &block)
|
1261
1256
|
@lineno += 1
|
1262
1257
|
|
1263
1258
|
raw_row = row
|
data/lib/csv/version.rb
CHANGED
data/lib/csv/writer.rb
CHANGED
data/lib/csv.rb
CHANGED
@@ -91,6 +91,7 @@
|
|
91
91
|
|
92
92
|
require "forwardable"
|
93
93
|
require "date"
|
94
|
+
require "time"
|
94
95
|
require "stringio"
|
95
96
|
|
96
97
|
require_relative "csv/fields_converter"
|
@@ -521,6 +522,7 @@ require_relative "csv/writer"
|
|
521
522
|
# - <tt>:float</tt>: converts each \String-embedded float into a true \Float.
|
522
523
|
# - <tt>:date</tt>: converts each \String-embedded date into a true \Date.
|
523
524
|
# - <tt>:date_time</tt>: converts each \String-embedded date-time into a true \DateTime
|
525
|
+
# - <tt>:time</tt>: converts each \String-embedded time into a true \Time
|
524
526
|
# .
|
525
527
|
# This example creates a converter proc, then stores it:
|
526
528
|
# strip_converter = proc {|field| field.strip }
|
@@ -631,6 +633,7 @@ require_relative "csv/writer"
|
|
631
633
|
# [:numeric, [:integer, :float]]
|
632
634
|
# [:date, Proc]
|
633
635
|
# [:date_time, Proc]
|
636
|
+
# [:time, Proc]
|
634
637
|
# [:all, [:date_time, :numeric]]
|
635
638
|
#
|
636
639
|
# Each of these converters transcodes values to UTF-8 before attempting conversion.
|
@@ -675,6 +678,15 @@ require_relative "csv/writer"
|
|
675
678
|
# csv = CSV.parse_line(data, converters: :date_time)
|
676
679
|
# csv # => [#<DateTime: 2020-05-07T14:59:00-05:00 ((2458977j,71940s,0n),-18000s,2299161j)>, "x"]
|
677
680
|
#
|
681
|
+
# Converter +time+ converts each field that Time::parse accepts:
|
682
|
+
# data = '2020-05-07T14:59:00-05:00,x'
|
683
|
+
# # Without the converter
|
684
|
+
# csv = CSV.parse_line(data)
|
685
|
+
# csv # => ["2020-05-07T14:59:00-05:00", "x"]
|
686
|
+
# # With the converter
|
687
|
+
# csv = CSV.parse_line(data, converters: :time)
|
688
|
+
# csv # => [2020-05-07 14:59:00 -0500, "x"]
|
689
|
+
#
|
678
690
|
# Converter +:numeric+ converts with both +:date_time+ and +:numeric+..
|
679
691
|
#
|
680
692
|
# As seen above, method #convert adds \converters to a \CSV instance,
|
@@ -871,10 +883,10 @@ class CSV
|
|
871
883
|
# A Regexp used to find and convert some common Date formats.
|
872
884
|
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
|
873
885
|
\d{4}-\d{2}-\d{2} )\z /x
|
874
|
-
# A Regexp used to find and convert some common
|
886
|
+
# A Regexp used to find and convert some common (Date)Time formats.
|
875
887
|
DateTimeMatcher =
|
876
888
|
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
|
877
|
-
# ISO-8601 and RFC-3339 (space instead of T) recognized by
|
889
|
+
# ISO-8601 and RFC-3339 (space instead of T) recognized by (Date)Time.parse
|
878
890
|
\d{4}-\d{2}-\d{2}
|
879
891
|
(?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
880
892
|
)\z /x
|
@@ -912,6 +924,14 @@ class CSV
|
|
912
924
|
f
|
913
925
|
end
|
914
926
|
},
|
927
|
+
time: lambda { |f|
|
928
|
+
begin
|
929
|
+
e = f.encode(ConverterEncoding)
|
930
|
+
e.match?(DateTimeMatcher) ? Time.parse(e) : f
|
931
|
+
rescue # encoding conversion or parse errors
|
932
|
+
f
|
933
|
+
end
|
934
|
+
},
|
915
935
|
all: [:date_time, :numeric],
|
916
936
|
}
|
917
937
|
|
@@ -1198,7 +1218,44 @@ class CSV
|
|
1198
1218
|
# * Argument +in_string_or_io+ must be a \String or an \IO stream.
|
1199
1219
|
# * Argument +out_string_or_io+ must be a \String or an \IO stream.
|
1200
1220
|
# * Arguments <tt>**options</tt> must be keyword options.
|
1201
|
-
#
|
1221
|
+
#
|
1222
|
+
# - Each option defined as an {option for parsing}[#class-CSV-label-Options+for+Parsing]
|
1223
|
+
# is used for parsing the filter input.
|
1224
|
+
# - Each option defined as an {option for generating}[#class-CSV-label-Options+for+Generating]
|
1225
|
+
# is used for generator the filter input.
|
1226
|
+
#
|
1227
|
+
# However, there are three options that may be used for both parsing and generating:
|
1228
|
+
# +col_sep+, +quote_char+, and +row_sep+.
|
1229
|
+
#
|
1230
|
+
# Therefore for method +filter+ (and method +filter+ only),
|
1231
|
+
# there are special options that allow these parsing and generating options
|
1232
|
+
# to be specified separately:
|
1233
|
+
#
|
1234
|
+
# - Options +input_col_sep+ and +output_col_sep+
|
1235
|
+
# (and their aliases +in_col_sep+ and +out_col_sep+)
|
1236
|
+
# specify the column separators for parsing and generating.
|
1237
|
+
# - Options +input_quote_char+ and +output_quote_char+
|
1238
|
+
# (and their aliases +in_quote_char+ and +out_quote_char+)
|
1239
|
+
# specify the quote characters for parsing and generting.
|
1240
|
+
# - Options +input_row_sep+ and +output_row_sep+
|
1241
|
+
# (and their aliases +in_row_sep+ and +out_row_sep+)
|
1242
|
+
# specify the row separators for parsing and generating.
|
1243
|
+
#
|
1244
|
+
# Example options (for column separators):
|
1245
|
+
#
|
1246
|
+
# CSV.filter # Default for both parsing and generating.
|
1247
|
+
# CSV.filter(in_col_sep: ';') # ';' for parsing, default for generating.
|
1248
|
+
# CSV.filter(out_col_sep: '|') # Default for parsing, '|' for generating.
|
1249
|
+
# CSV.filter(in_col_sep: ';', out_col_sep: '|') # ';' for parsing, '|' for generating.
|
1250
|
+
#
|
1251
|
+
# Note that for a special option (e.g., +input_col_sep+)
|
1252
|
+
# and its corresponding "regular" option (e.g., +col_sep+),
|
1253
|
+
# the two are mutually overriding.
|
1254
|
+
#
|
1255
|
+
# Another example (possibly surprising):
|
1256
|
+
#
|
1257
|
+
# CSV.filter(in_col_sep: ';', col_sep: '|') # '|' for both parsing(!) and generating.
|
1258
|
+
#
|
1202
1259
|
def filter(input=nil, output=nil, **options)
|
1203
1260
|
# parse options for input, output, or both
|
1204
1261
|
in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value}
|
@@ -1508,10 +1565,8 @@ class CSV
|
|
1508
1565
|
|
1509
1566
|
#
|
1510
1567
|
# :call-seq:
|
1511
|
-
# open(
|
1512
|
-
# open(
|
1513
|
-
# open(file_path, mode = "rb", **options ) { |csv| ... } -> object
|
1514
|
-
# open(io, mode = "rb", **options ) { |csv| ... } -> object
|
1568
|
+
# open(path_or_io, mode = "rb", **options ) -> new_csv
|
1569
|
+
# open(path_or_io, mode = "rb", **options ) { |csv| ... } -> object
|
1515
1570
|
#
|
1516
1571
|
# possible options elements:
|
1517
1572
|
# keyword form:
|
@@ -1520,7 +1575,7 @@ class CSV
|
|
1520
1575
|
# :undef => :replace # replace undefined conversion
|
1521
1576
|
# :replace => string # replacement string ("?" or "\uFFFD" if not specified)
|
1522
1577
|
#
|
1523
|
-
# * Argument +
|
1578
|
+
# * Argument +path_or_io+, must be a file path or an \IO stream.
|
1524
1579
|
# :include: ../doc/csv/arguments/io.rdoc
|
1525
1580
|
# * Argument +mode+, if given, must be a \File mode.
|
1526
1581
|
# See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
|
@@ -1544,6 +1599,9 @@ class CSV
|
|
1544
1599
|
# path = 't.csv'
|
1545
1600
|
# File.write(path, string)
|
1546
1601
|
#
|
1602
|
+
# string_io = StringIO.new
|
1603
|
+
# string_io << "foo,0\nbar,1\nbaz,2\n"
|
1604
|
+
#
|
1547
1605
|
# ---
|
1548
1606
|
#
|
1549
1607
|
# With no block given, returns a new \CSV object.
|
@@ -1556,6 +1614,9 @@ class CSV
|
|
1556
1614
|
# csv = CSV.open(File.open(path))
|
1557
1615
|
# csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1558
1616
|
#
|
1617
|
+
# Create a \CSV object using a \StringIO:
|
1618
|
+
# csv = CSV.open(string_io)
|
1619
|
+
# csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1559
1620
|
# ---
|
1560
1621
|
#
|
1561
1622
|
# With a block given, calls the block with the created \CSV object;
|
@@ -1573,15 +1634,25 @@ class CSV
|
|
1573
1634
|
# Output:
|
1574
1635
|
# #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1575
1636
|
#
|
1637
|
+
# Using a \StringIO:
|
1638
|
+
# csv = CSV.open(string_io) {|csv| p csv}
|
1639
|
+
# csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1640
|
+
# Output:
|
1641
|
+
# #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1576
1642
|
# ---
|
1577
1643
|
#
|
1578
1644
|
# Raises an exception if the argument is not a \String object or \IO object:
|
1579
1645
|
# # Raises TypeError (no implicit conversion of Symbol into String)
|
1580
1646
|
# CSV.open(:foo)
|
1581
|
-
def open(
|
1647
|
+
def open(filename_or_io, mode="r", **options)
|
1582
1648
|
# wrap a File opened with the remaining +args+ with no newline
|
1583
1649
|
# decorator
|
1584
|
-
file_opts =
|
1650
|
+
file_opts = {}
|
1651
|
+
may_enable_bom_detection_automatically(filename_or_io,
|
1652
|
+
mode,
|
1653
|
+
options,
|
1654
|
+
file_opts)
|
1655
|
+
file_opts.merge!(options)
|
1585
1656
|
unless file_opts.key?(:newline)
|
1586
1657
|
file_opts[:universal_newline] ||= false
|
1587
1658
|
end
|
@@ -1590,14 +1661,19 @@ class CSV
|
|
1590
1661
|
options.delete(:replace)
|
1591
1662
|
options.delete_if {|k, _| /newline\z/.match?(k)}
|
1592
1663
|
|
1593
|
-
|
1594
|
-
f =
|
1595
|
-
|
1596
|
-
|
1597
|
-
|
1598
|
-
|
1599
|
-
|
1664
|
+
if filename_or_io.is_a?(StringIO)
|
1665
|
+
f = create_stringio(filename_or_io.string, mode, **file_opts)
|
1666
|
+
else
|
1667
|
+
begin
|
1668
|
+
f = File.open(filename_or_io, mode, **file_opts)
|
1669
|
+
rescue ArgumentError => e
|
1670
|
+
raise unless /needs binmode/.match?(e.message) and mode == "r"
|
1671
|
+
mode = "rb"
|
1672
|
+
file_opts = {encoding: Encoding.default_external}.merge(file_opts)
|
1673
|
+
retry
|
1674
|
+
end
|
1600
1675
|
end
|
1676
|
+
|
1601
1677
|
begin
|
1602
1678
|
csv = new(f, **options)
|
1603
1679
|
rescue Exception
|
@@ -1729,6 +1805,23 @@ class CSV
|
|
1729
1805
|
# Raises an exception if the argument is not a \String object or \IO object:
|
1730
1806
|
# # Raises NoMethodError (undefined method `close' for :foo:Symbol)
|
1731
1807
|
# CSV.parse(:foo)
|
1808
|
+
#
|
1809
|
+
# ---
|
1810
|
+
#
|
1811
|
+
# Please make sure if your text contains \BOM or not. CSV.parse will not remove
|
1812
|
+
# \BOM automatically. You might want to remove \BOM before calling CSV.parse :
|
1813
|
+
# # remove BOM on calling File.open
|
1814
|
+
# File.open(path, encoding: 'bom|utf-8') do |file|
|
1815
|
+
# CSV.parse(file, headers: true) do |row|
|
1816
|
+
# # you can get value by column name because BOM is removed
|
1817
|
+
# p row['Name']
|
1818
|
+
# end
|
1819
|
+
# end
|
1820
|
+
#
|
1821
|
+
# Output:
|
1822
|
+
# # "foo"
|
1823
|
+
# # "bar"
|
1824
|
+
# # "baz"
|
1732
1825
|
def parse(str, **options, &block)
|
1733
1826
|
csv = new(str, **options)
|
1734
1827
|
|
@@ -1862,6 +1955,42 @@ class CSV
|
|
1862
1955
|
options = default_options.merge(options)
|
1863
1956
|
read(path, **options)
|
1864
1957
|
end
|
1958
|
+
|
1959
|
+
ON_WINDOWS = /mingw|mswin/.match?(RUBY_PLATFORM)
|
1960
|
+
private_constant :ON_WINDOWS
|
1961
|
+
|
1962
|
+
private
|
1963
|
+
def may_enable_bom_detection_automatically(filename_or_io,
|
1964
|
+
mode,
|
1965
|
+
options,
|
1966
|
+
file_opts)
|
1967
|
+
if filename_or_io.is_a?(StringIO)
|
1968
|
+
# Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support:
|
1969
|
+
# https://github.com/ruby/stringio/pull/47
|
1970
|
+
return if RUBY_VERSION < "2.7"
|
1971
|
+
else
|
1972
|
+
# "bom|utf-8" may be buggy on Windows:
|
1973
|
+
# https://bugs.ruby-lang.org/issues/20526
|
1974
|
+
return if ON_WINDOWS
|
1975
|
+
end
|
1976
|
+
return unless Encoding.default_external == Encoding::UTF_8
|
1977
|
+
return if options.key?(:encoding)
|
1978
|
+
return if options.key?(:external_encoding)
|
1979
|
+
return if mode.include?(":")
|
1980
|
+
file_opts[:encoding] = "bom|utf-8"
|
1981
|
+
end
|
1982
|
+
|
1983
|
+
if RUBY_VERSION < "2.7"
|
1984
|
+
def create_stringio(str, mode, opts)
|
1985
|
+
opts.delete_if {|k, _| k == :universal_newline or DEFAULT_OPTIONS.key?(k)}
|
1986
|
+
raise ArgumentError, "Unsupported options parsing StringIO: #{opts.keys}" unless opts.empty?
|
1987
|
+
StringIO.new(str, mode)
|
1988
|
+
end
|
1989
|
+
else
|
1990
|
+
def create_stringio(str, mode, opts)
|
1991
|
+
StringIO.new(str, mode, **opts)
|
1992
|
+
end
|
1993
|
+
end
|
1865
1994
|
end
|
1866
1995
|
|
1867
1996
|
# :call-seq:
|
@@ -2000,6 +2129,12 @@ class CSV
|
|
2000
2129
|
writer if @writer_options[:write_headers]
|
2001
2130
|
end
|
2002
2131
|
|
2132
|
+
class TSV < CSV
|
2133
|
+
def initialize(data, **options)
|
2134
|
+
super(data, **({col_sep: "\t"}.merge(options)))
|
2135
|
+
end
|
2136
|
+
end
|
2137
|
+
|
2003
2138
|
# :call-seq:
|
2004
2139
|
# csv.col_sep -> string
|
2005
2140
|
#
|
metadata
CHANGED
@@ -1,71 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Edward Gray II
|
8
8
|
- Kouhei Sutou
|
9
|
+
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2024-
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: benchmark_driver
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: test-unit
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: 3.4.8
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: 3.4.8
|
12
|
+
date: 2024-12-15 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
69
14
|
description: The CSV library provides a complete interface to CSV files and data.
|
70
15
|
It offers tools to enable you to read and write to and from Strings or IO objects,
|
71
16
|
as needed.
|
@@ -127,6 +72,7 @@ licenses:
|
|
127
72
|
- Ruby
|
128
73
|
- BSD-2-Clause
|
129
74
|
metadata: {}
|
75
|
+
post_install_message:
|
130
76
|
rdoc_options:
|
131
77
|
- "--main"
|
132
78
|
- README.md
|
@@ -143,7 +89,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
143
89
|
- !ruby/object:Gem::Version
|
144
90
|
version: '0'
|
145
91
|
requirements: []
|
146
|
-
rubygems_version: 3.
|
92
|
+
rubygems_version: 3.5.22
|
93
|
+
signing_key:
|
147
94
|
specification_version: 4
|
148
95
|
summary: CSV Reading and Writing
|
149
96
|
test_files: []
|