Page MenuHomePhabricator

WDQ CONSTRUCT times out
Open, Needs TriagePublic

Description

The query below returns orgs related to prestigious science award winners.
It is moderately complex and returns 2365 rows that are about 2048 ?org (i.e. almost no Cartesian product).

base         <http://trr.ontotext.com/resource/>
prefix trr:  <http://trr.ontotext.com/resource/ontology/>
prefix ps:   <http://www.wikidata.org/prop/statement/>
prefix pq:   <http://www.wikidata.org/prop/qualifier/>
prefix wd:   <http://www.wikidata.org/entity/>
prefix wdt:  <http://www.wikidata.org/prop/direct/>
prefix bd:   <http://www.bigdata.com/rdf#>
prefix wikibase: <http://wikiba.se/ontology#>

select ?orgId ?GRID ?orgLabel ?officialName ?orgDescription ?countryLabel ?locationLabel ?year ?officialWebsite ?orgURL ?identifierWD ?identifierGRID ?sourceURL ?linkWD ?linkGRID
with {select distinct ?award {
  ?award wdt:P31/wdt:P279* wd:Q11448906. # science award
  ?award wdt:P444 []. # review score
}} as %AWARD
with {select distinct ?person {
  include %AWARD
  ?person wdt:P166 ?award.
}} as %PERSON
with {select distinct ?org {
  include %PERSON
  ?person
    wdt:P108      | # employer
    wdt:P436      | # member of (learned society)
    wdt:P69       | # educated at
    p:P512/pq:P69 | # academic degree / educated at
    p:P166/pq:P1416 # won award / affiliation. This may not be a notable award, but I can't write the correct union with include %AWARD
  ?org.
  filter not exists {?org wdt:P31/wdt:P279* wd:Q170584} # not a project
}} as %ORG {
  include %ORG
  optional {?org wdt:P1448 ?officialName}
  optional {?org wdt:P17 ?country}
  optional {?org wdt:P131 ?location} # located in administrative territorial entity
  optional {?org wdt:P580|wdt:P571 ?date bind(year(?date) as ?year)} # inception|start date    
  optional {?org wdt:P856 ?officialWebsite}
  optional {?org wdt:P2427 ?GRID}
  bind(strafter(str(?org),str(wd:)) as ?orgId)
  bind(uri(concat("organization/Wikidata/",          ?orgId)) as ?orgURL)
  bind(uri(concat("source/Wikidata/",                ?orgId)) as ?sourceURL)
  bind(uri(concat("identifier/Wikidata/",            ?orgId)) as ?identifierWD)
  bind(uri(concat("identifier/GRID/",                ?GRID))  as ?identifierGRID)
  bind(uri(concat("https://www.wikidata.org/wiki/",  ?orgId)) as ?linkWD)
  bind(uri(concat("https://www.grid.ac/institutes/", ?GRID))  as ?linkGRID)
  service wikibase:label {bd:serviceParam wikibase:language "en,fr,it,de,nl"}
}

I want to insert the orgs to another repo but can't use federated INSERT query because of T211107.
So I tried to use CONSTRUCT to get the data as Turtle:

base         <http://trr.ontotext.com/resource/>
prefix trr:  <http://trr.ontotext.com/resource/ontology/>
prefix xsd:  <http://www.w3.org/2001/XMLSchema#>
prefix ps:   <http://www.wikidata.org/prop/statement/>
prefix pq:   <http://www.wikidata.org/prop/qualifier/>
prefix wd:   <http://www.wikidata.org/entity/>
prefix wdt:  <http://www.wikidata.org/prop/direct/>
prefix bd:   <http://www.bigdata.com/rdf#>
prefix wikibase: <http://wikiba.se/ontology#>

construct {
  ?org_URL   a trr:Organization;
    trr:name           ?orgLabel;
    trr:altName        ?officialName; 
    trr:description    ?orgDescription;
    trr:country        ?countryLabel;
    trr:location       ?locationLabel; 
    trr:startDate      ?YEAR;
    trr:webLink        ?official_Website;
    trr:identifier     ?identifier_WD, ?identifier_GRID;
    trr:source         ?source_URL;
    trr:status         "raw".
  ?source_URL        a trr:Source;
    trr:src            "Wikidata";
    trr:webLink        ?link_WD;
    trr:semanticLink   ?org.
  ?identifier_WD     a trr:Identifier;
    trr:type           "Wikidata";
    trr:id             ?orgId;
    trr:webLink        ?link_WD;
    trr:semanticLink   ?org.
  ?identifier_GRID    a trr:Identifier;
    trr:type           "GRID";
    trr:id             ?GRID;
    trr:webLink        ?link_GRID;
    trr:source         ?source_URL.
}
#select ?orgId ?GRID ?orgLabel ?officialName ?orgDescription ?countryLabel ?locationLabel ?year ?officialWebsite ?orgURL ?identifierWD ?identifierGRID ?sourceURL ?linkWD ?linkGRID
with {select distinct ?award {
  ?award wdt:P31/wdt:P279* wd:Q11448906. # science award
  ?award wdt:P444 []. # review score
}} as %AWARD
with {select distinct ?person {
  include %AWARD
  ?person wdt:P166 ?award.
}} as %PERSON
with {select distinct ?org {
  include %PERSON
  ?person
    wdt:P108      | # employer
    wdt:P436      | # member of (learned society)
    wdt:P69       | # educated at
    p:P512/pq:P69 | # academic degree / educated at
    p:P166/pq:P1416 # won award / affiliation. This may not be a notable award, but I can't write the correct union with include %AWARD
  ?org.
  filter not exists {?org wdt:P31/wdt:P279* wd:Q170584} # not a project
}} as %ORG {
  include %ORG
  optional {?org wdt:P1448 ?officialName}
  optional {?org wdt:P17 ?country}
  optional {?org wdt:P131 ?location} # located in administrative territorial entity
  optional {?org wdt:P580|wdt:P571 ?date bind(year(?date) as ?year)} # inception|start date    
  optional {?org wdt:P856 ?officialWebsite}
  optional {?org wdt:P2427 ?GRID}
  bind(strafter(str(?org),str(wd:)) as ?orgId)
  bind(uri(concat("organization/Wikidata/",          ?orgId)) as ?orgURL)
  bind(uri(concat("source/Wikidata/",                ?orgId)) as ?sourceURL)
  bind(uri(concat("identifier/Wikidata/",            ?orgId)) as ?identifierWD)
  bind(uri(concat("identifier/GRID/",                ?GRID))  as ?identifierGRID)
  bind(uri(concat("https://www.wikidata.org/wiki/",  ?orgId)) as ?linkWD)
  bind(uri(concat("https://www.grid.ac/institutes/", ?GRID))  as ?linkGRID)
  bind(uri(?officialWebsite) as ?official_Website)
  bind(uri(?orgURL         ) as ?org_URL         )
  bind(uri(?identifierWD   ) as ?identifier_WD   )
  bind(uri(?identifierGRID ) as ?identifier_GRID )
  bind(uri(?sourceURL      ) as ?source_URL      )
  bind(uri(?linkWD         ) as ?link_WD         )
  bind(uri(?linkGRID       ) as ?link_GRID       )
  bind(strdt(?year,xsd:gYear) as ?YEAR)
  service wikibase:label {bd:serviceParam wikibase:language "en,fr,it,de,nl"}
}

The WDQ UI returns only 15k triples (and I can't save Turtle, see T211177).
The WDQ endpoint https://query.wikidata.org/sparql returns format text/turtle, but again it's incomplete (only 209 trr:Organization instead of 2048).
So I'm forced to save the first query as CSV and then RDFize it locally using this tarql query:

prefix trr:  <http://trr.ontotext.com/resource/ontology/>
prefix xsd:  <http://www.w3.org/2001/XMLSchema#>
construct {
  ?org_URL   a trr:Organization;
    trr:name           ?orgLabel;
    trr:altName        ?officialName; 
    trr:description    ?orgDescription;
    trr:country        ?countryLabel;
    trr:location       ?locationLabel; 
    trr:startDate      ?YEAR;
    trr:webLink        ?official_Website;
    trr:identifier     ?identifier_WD, ?identifier_GRID;
    trr:source         ?source_URL;
    trr:status         "raw".
  ?source_URL        a trr:Source;
    trr:src            "Wikidata";
    trr:webLink        ?link_WD;
    trr:semanticLink   ?org.
  ?identifier_WD     a trr:Identifier;
    trr:type           "Wikidata";
    trr:id             ?orgId;
    trr:webLink        ?link_WD;
    trr:semanticLink   ?org.
  ?identifier_GRID    a trr:Identifier;
    trr:type           "GRID";
    trr:id             ?GRID;
    trr:webLink        ?link_GRID;
    trr:source         ?source_URL.
} where {
  bind(uri(?officialWebsite) as ?official_Website)
  bind(uri(?orgURL         ) as ?org_URL         )
  bind(uri(?identifierWD   ) as ?identifier_WD   )
  bind(uri(?identifierGRID ) as ?identifier_GRID )
  bind(uri(?sourceURL      ) as ?source_URL      )
  bind(uri(?linkWD         ) as ?link_WD         )
  bind(uri(?linkGRID       ) as ?link_GRID       )
  bind(strdt(?year,xsd:gYear) as ?YEAR)
}

The result is 42197 triples and 2049 orgs (trr:Organization).
(Let me know if you'd like the tarql and counting commands for testing)

Event Timeline

Restricted Application added a project: Wikidata. · View Herald TranscriptDec 5 2018, 8:03 AM
Restricted Application added a subscriber: Aklapper. · View Herald Transcript
Acka47 added a subscriber: Acka47.Feb 6 2019, 1:54 PM

I can confirm that the CONSTRUCT query times out. Also, it is not giving an error message so that you first think everything should be fine. I am running into a similar problem as Vladimir with the following query using curl in the command line (see also this Github comment and the one afterwards):

$ curl -H "Accept: text/turtle" -G "https://query.wikidata.org/sparql" --data-urlencode query='
CONSTRUCT {
    ?lobidURI a skos:Concept ;
    skos:inScheme <http://purl.org/lobid/nwbib-spatial> ;
    skos:prefLabel ?wikidataURILabel ;
    foaf:focus ?wikidataURI ;
    skos:notation ?QID ;
    skos:broader ?broaderURI .
  }
WHERE {
 {
      { ?wikidataURI wdt:P131* wd:Q1198 . }
   UNION
      { ?wikidataURI p:P131 [ ps:P131 wd:Q1198 ] . }
      { ?wikidataURI p:P31 [ ps:P31 wd:Q829277 ] . } # Regierungsbezirk in NRW
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q106658 ] . } # Landkreis in Deutschland 
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q5283531 ] . } # Landkreis in Preußen
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q262166 ] . } # Gemeinde in Deutschland
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q22865 ] . } # kreisfreie Stadt in Deutschland
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q253019 ]. } # Ortsteil
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q2983893 ]. } # Stadtteil
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q42744322 ]. } # Stadtgemeinde Deutschlands
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q134626 ]. } # Kreisstadt
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q448801 ]. } # Große Kreisstadt
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q1548518 ]. } # Große kreisangehörige Stadt
  UNION
       { ?wikidataURI p:P31 [ ps:P31 wd:Q54935786 ]. } # Mittlere kreisangehörige Stadt
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q1852178 ] . } # Stadteil von Düsseldorf
  UNION
      { ?wikidataURI p:P31 [ ps:P31 wd:Q15632166 ] . } # Stadtteil von Köln
  UNION
     { ?wikidataURI p:P31 [ps:P31 wd:Q1780389 ] . } # Kommunalverband der besonderen Art (derzeit nur "Städteregion Aachen")
   UNION
   { ?wikidataURI wdt:P31/wdt:P279*  wd:Q4286337 . } # Stadtbezirk, für Geocache auskommentieren
  OPTIONAL  { ?wikidataURI wdt:P131 ?broader . }
 }
# FILTER (?wikidataURI in (wd:Q1295))
 FILTER (?wikidataURI != wd:Q1787449 && ?wikidataURI != wd:Q16500124 && ?wikidataURI != wd:Q1465811 && ?wikidataURI != wd:Q1787449
       && ?wikidataURI != wd:Q16832627 && ?wikidataURI != wd:Q1113210 && ?wikidataURI != wd:Q19288281 && ?wikidataURI != wd:Q1662807
        && ?wikidataURI != wd:Q1351319 ) # Herausfiltern von Altkreisen, die namensidentisch sind mit Neukreisen
 BIND (STRAFTER (STR(?wikidataURI),"entity/") AS ?QID)
 BIND (STRAFTER (STR(?broader),"entity/") AS ?broaderQID)
 BIND (URI(CONCAT ("http://purl.org/lobid/nwbib-spatial#", ?QID)) AS ?lobidURI)
 BIND (URI(CONCAT ("http://purl.org/lobid/nwbib-spatial#", ?broaderQID)) AS ?broaderURI)
 SERVICE wikibase:label {  bd:serviceParam wikibase:language "de" }
}'

I only get back the fully constructed statements for around 1650 resources while for the other 6671 resources I only get the skos:prefLabel statement.

Restricted Application added a subscriber: Liuxinyu970226. · View Herald TranscriptMay 21 2019, 1:24 PM

Independently of the above, is there a way to use CONSTRUCT ?

I haven't figured out how it works and when it could be useful.

Use construct when you want to get an rdf graph out of the repo, especially when you want the data to be shaped differently or to use ontology terms different from the original