Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F28211266
Example output of just the huwiki models.
No One
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Authored By
Halfak
Feb 13 2019, 8:34 PM
2019-02-13 20:34:27 (UTC+0)
Size
3 KB
Referenced Files
None
Subscribers
None
Example output of just the huwiki models.
View Options
$ ./utility generate_make --config test-config
# This file is built automatically using cg.py file and Makefile.j2
# Any change you make on this file will be lost in the next run.
# Remove target files after command failure.
.DELETE_ON_ERROR:
models: \
huwiki_models
tuning_reports: \
huwiki_tuning_reports
touch:
touch datasets/*
touch models/*
include Makefile.manual
############################# Hungarian Wikipedia ################################
datasets/huwiki.human_labeled_revisions.raw.5k_2016.json:
./utility fetch_labels \
https://labels.wmflabs.org/campaigns/huwiki/12/ > $@
datasets/huwiki.sampled_revisions.40k_2016.json:
wget -qO- http://quarry.wmflabs.org/run/79645/output/0/json-lines?download=true > $@
datasets/huwiki.autolabeled_revisions.40k_2016.json: \
datasets/huwiki.sampled_revisions.40k_2016.json
cat $< | \
./utility autolabel --host=https://hu.wikipedia.org \
--trusted-groups=sysop,oversight,trusted,bot,rollbacker,checkuser,abusefilter,bureaucrat,editor,templateeditor,interface-editor \
--trusted-edits=1000 \
--revert-radius=3 \
--revert-window=48 \
--verbose > $@
datasets/huwiki.revisions_for_review.5k_2016.json: \
datasets/huwiki.autolabeled_revisions.40k_2016.json
( \
cat datasets/huwiki.autolabeled_revisions.40k_2016.json | grep '"needs_review": (true|"True") | \
shuf -n 2500; \
cat datasets/huwiki.autolabeled_revisions.40k_2016.json | grep '"needs_review": (false|"False") | \
shuf -n 2500 \
) | shuf > $@
datasets/huwiki.labeled_revisions.40k_2016.json: \
datasets/huwiki.autolabeled_revisions.40k_2016.json \
datasets/huwiki.human_labeled_revisions.5k_2016.json ./utility merge_labels $^ > $@
datasets/huwiki.labeled_revisions.w_cache.40k_2016.json: \
datasets/huwiki.labeled_revisions.20k_2016.json
revscoring extract \
editquality.feature_lists.huwiki.damaging \
editquality.feature_lists.huwiki.goodfaith \
--host https://hu.wikipedia.org \
--extractors $(max_extractors) \
--verbose > $@
tuning_reports/huwiki.damaging.md: \
datasets/huwiki.labeled_revisions.w_cache.40k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.huwiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight $(damaging_label_weight) \
--pop-rate "true=0.01" \
--pop-rate "false=0.99" \
--center --scale \
--cv-timeout 60 \
--debug > $@
models/huwiki.damaging.gradient_boosting.model: \
datasets/huwiki.labeled_revisions.w_cache.40k_2016.json
cat $< | \
revscoring cv_train
damaging \
--version=$(damaging_major_minor). \
-p 'learning_rate=0.01' \
-p 'max_depth=7' \
-p 'max_features="log2"' \
-p 'n_estimators=700' \
--label-weight $(damaging_label_weight) \
--pop-rate "true=0.01" \
--pop-rate "false=0.99" \
--center --scale > $@
revscoring model_info $@ > model_info/huwiki.damaging.md
tuning_reports/huwiki.goodfaith.md: \
datasets/huwiki.labeled_revisions.w_cache.40k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.huwiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight $(goodfaith_label_weight) \
--pop-rate "true=0.99" \
--pop-rate "false=0.010000000000000009" \
--center --scale \
--cv-timeout 60 \
--debug > $@
models/huwiki.goodfaith.gradient_boosting.model: \
datasets/huwiki.labeled_revisions.w_cache.40k_2016.json
cat $< | \
revscoring cv_train
goodfaith \
--version=$(goodfaith_major_minor). \
-p 'learning_rate=0.01' \
-p 'max_depth=7' \
-p 'max_features="log2"' \
-p 'n_estimators=700' \
--label-weight $(goodfaith_label_weight) \
--pop-rate "true=0.99" \
--pop-rate "false=0.010000000000000009" \
--center --scale > $@
revscoring model_info $@ > model_info/huwiki.goodfaith.md
huwiki_models: \
models/huwiki.goodfaith.gradient_boosting.model \
models/huwiki.damaging.gradient_boosting.model
huwiki_tuning_reports: \
tuning_reports/huwiki.goodfaith.md \
tuning_reports/huwiki.damaging.md
File Metadata
Details
Attached
Mime Type
text/plain; charset=utf-8
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
7097574
Default Alt Text
Example output of just the huwiki models. (3 KB)
Attached To
Mode
P8080 Example output of just the huwiki models.
Attached
Detach File
Event Timeline
Log In to Comment