Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Owen Corrigan
ml-data
Commits
a9d44455
Commit
a9d44455
authored
Feb 25, 2022
by
Owen Corrigan
Browse files
Convert fairface annotations to ndjson
parent
9f6ce804
Changes
3
Hide whitespace changes
Inline
Side-by-side
dvc.lock
View file @
a9d44455
...
...
@@ -11,3 +11,18 @@ stages:
md5: e98b6274966ebeca27c3f0debfba5fda.dir
size: 4240651
nfiles: 2
fairface_annotations_to_ndjson:
cmd: python3 scripts/fairface_to_ndjson.py data/fairface/raw/fairface_label_train.csv
data/fairface/raw/fairface_label_val.csv data/fairface/processed/annotations.json
deps:
- path: data/fairface/raw
md5: e98b6274966ebeca27c3f0debfba5fda.dir
size: 4240651
nfiles: 2
- path: scripts/fairface_to_ndjson.py
md5: d1be196bac8fc6d1a4aa5df21a1d66be
size: 1321
outs:
- path: data/fairface/processed/annotations.json
md5: f47ca7827cd5d7c57d7291d3a941265d
size: 19132174
dvc.yaml
View file @
a9d44455
...
...
@@ -5,3 +5,11 @@ stages:
-
scripts/download_fairface.sh
outs
:
-
data/fairface/raw/
fairface_annotations_to_ndjson
:
cmd
:
python3 scripts/fairface_to_ndjson.py data/fairface/raw/fairface_label_train.csv
data/fairface/raw/fairface_label_val.csv data/fairface/processed/annotations.json
deps
:
-
data/fairface/raw
-
scripts/fairface_to_ndjson.py
outs
:
-
data/fairface/processed/annotations.json
scripts/fairface_to_ndjson.py
0 → 100644
View file @
a9d44455
import
argparse
import
datetime
import
json
import
math
import
pandas
as
pd
def
median_age
(
s
):
if
s
==
"more than 70"
:
return
75
min
,
max
=
[
int
(
i
)
for
i
in
s
.
split
(
"-"
)]
return
math
.
ceil
((
min
+
max
)
/
2
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
"Convert annotations to a single ndjson file."
)
parser
.
add_argument
(
"train"
,
type
=
argparse
.
FileType
(
"r"
),
help
=
"train annotation file"
)
parser
.
add_argument
(
"val"
,
type
=
argparse
.
FileType
(
"r"
),
help
=
"val annotation file"
)
parser
.
add_argument
(
"out"
,
type
=
argparse
.
FileType
(
"w"
),
help
=
"ndjson file"
)
args
=
parser
.
parse_args
()
train
=
pd
.
read_csv
(
args
.
train
)
val
=
pd
.
read_csv
(
args
.
val
)
train
[
"legacy_splits"
]
=
"train"
# rename val to test for consistency
val
[
"legacy_splits"
]
=
"test"
# combine into single dataframe
df
=
pd
.
concat
([
train
,
val
]).
drop
([
"service_test"
],
axis
=
1
).
reset_index
(
drop
=
True
)
df
[
"image_width"
]
=
224
df
[
"image_height"
]
=
224
df
[
"ingestion_timestamp"
]
=
datetime
.
datetime
.
utcnow
().
isoformat
(
sep
=
" "
,
timespec
=
"milliseconds"
)
df
[
"age"
]
=
df
[
"age"
].
apply
(
median_age
)
records
=
df
.
to_dict
(
orient
=
"records"
)
for
record
in
records
:
args
.
out
.
write
(
json
.
dumps
(
record
)
+
"
\n
"
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment