Page MenuHomePhabricator
Paste P8080

Example output of just the huwiki models.
ActivePublic

Authored by Halfak on Feb 13 2019, 8:34 PM.
$ ./utility generate_make --config test-config
# This file is built automatically using cg.py file and Makefile.j2
# Any change you make on this file will be lost in the next run.
# Remove target files after command failure.
.DELETE_ON_ERROR:
models: \
huwiki_models
tuning_reports: \
huwiki_tuning_reports
touch:
touch datasets/*
touch models/*
include Makefile.manual
############################# Hungarian Wikipedia ################################
datasets/huwiki.human_labeled_revisions.raw.5k_2016.json:
./utility fetch_labels \
https://labels.wmflabs.org/campaigns/huwiki/12/ > $@
datasets/huwiki.sampled_revisions.40k_2016.json:
wget -qO- http://quarry.wmflabs.org/run/79645/output/0/json-lines?download=true > $@
datasets/huwiki.autolabeled_revisions.40k_2016.json: \
datasets/huwiki.sampled_revisions.40k_2016.json
cat $< | \
./utility autolabel --host=https://hu.wikipedia.org \
--trusted-groups=sysop,oversight,trusted,bot,rollbacker,checkuser,abusefilter,bureaucrat,editor,templateeditor,interface-editor \
--trusted-edits=1000 \
--revert-radius=3 \
--revert-window=48 \
--verbose > $@
datasets/huwiki.revisions_for_review.5k_2016.json: \
datasets/huwiki.autolabeled_revisions.40k_2016.json
( \
cat datasets/huwiki.autolabeled_revisions.40k_2016.json | grep '"needs_review": (true|"True") | \
shuf -n 2500; \
cat datasets/huwiki.autolabeled_revisions.40k_2016.json | grep '"needs_review": (false|"False") | \
shuf -n 2500 \
) | shuf > $@
datasets/huwiki.labeled_revisions.40k_2016.json: \
datasets/huwiki.autolabeled_revisions.40k_2016.json \
datasets/huwiki.human_labeled_revisions.5k_2016.json ./utility merge_labels $^ > $@
datasets/huwiki.labeled_revisions.w_cache.40k_2016.json: \
datasets/huwiki.labeled_revisions.20k_2016.json
revscoring extract \
editquality.feature_lists.huwiki.damaging \
editquality.feature_lists.huwiki.goodfaith \
--host https://hu.wikipedia.org \
--extractors $(max_extractors) \
--verbose > $@
tuning_reports/huwiki.damaging.md: \
datasets/huwiki.labeled_revisions.w_cache.40k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.huwiki.damaging \
damaging \
roc_auc.labels.true \
--label-weight $(damaging_label_weight) \
--pop-rate "true=0.01" \
--pop-rate "false=0.99" \
--center --scale \
--cv-timeout 60 \
--debug > $@
models/huwiki.damaging.gradient_boosting.model: \
datasets/huwiki.labeled_revisions.w_cache.40k_2016.json
cat $< | \
revscoring cv_train
damaging \
--version=$(damaging_major_minor). \
-p 'learning_rate=0.01' \
-p 'max_depth=7' \
-p 'max_features="log2"' \
-p 'n_estimators=700' \
--label-weight $(damaging_label_weight) \
--pop-rate "true=0.01" \
--pop-rate "false=0.99" \
--center --scale > $@
revscoring model_info $@ > model_info/huwiki.damaging.md
tuning_reports/huwiki.goodfaith.md: \
datasets/huwiki.labeled_revisions.w_cache.40k_2016.json
cat $< | \
revscoring tune \
config/classifiers.params.yaml \
editquality.feature_lists.huwiki.goodfaith \
goodfaith \
roc_auc.labels.true \
--label-weight $(goodfaith_label_weight) \
--pop-rate "true=0.99" \
--pop-rate "false=0.010000000000000009" \
--center --scale \
--cv-timeout 60 \
--debug > $@
models/huwiki.goodfaith.gradient_boosting.model: \
datasets/huwiki.labeled_revisions.w_cache.40k_2016.json
cat $< | \
revscoring cv_train
goodfaith \
--version=$(goodfaith_major_minor). \
-p 'learning_rate=0.01' \
-p 'max_depth=7' \
-p 'max_features="log2"' \
-p 'n_estimators=700' \
--label-weight $(goodfaith_label_weight) \
--pop-rate "true=0.99" \
--pop-rate "false=0.010000000000000009" \
--center --scale > $@
revscoring model_info $@ > model_info/huwiki.goodfaith.md
huwiki_models: \
models/huwiki.goodfaith.gradient_boosting.model \
models/huwiki.damaging.gradient_boosting.model
huwiki_tuning_reports: \
tuning_reports/huwiki.goodfaith.md \
tuning_reports/huwiki.damaging.md