#!/usr/bin/env python """Jaspwr : Universal Feed Parser to SPARQL JSON results Input : any feed (ideally Atom) Output : as http://www.w3.org/TR/rdf-sparql-json-res/ Uses http://feedparser.org/ Mark Pilgrim (http://diveintomark.org/) et al Universal Feed Parser Requires: Python 2.1 or later Recommended: Python 2.3 or later Blame danbri. """ __version__ = "0.1" __license__ = "take your pick" __author__ = "Danny Ayers " __contributors__ = ["Dan Brickley "] import sys import feedparser class Jaspwr: feed_terms = ['id', 'title', 'link', 'subtitle', 'updated'] entry_terms = ['id', 'title', 'link', 'published', 'updated', 'summary'] content_terms = ['type', 'base', 'language', 'value'] uri_terms = ['id', 'link', 'base'] result_prefix_separator = "." head_prefix = '\n "head": { \"vars\": [ "' head_suffix = '" ] \n} ,' head_separator = '" , "' results_prefix = '\n "results": { "distinct": true , "ordered": false ,' results_suffix = '}' bindings_prefix = '\n "bindings": [' bindings_separator = '\n} ,\n{\n ' bindings_suffix = '\n]' def write(self, feed_data): self._json = '{' self._write_head(feed_data) self._write_results(feed_data) self._json = self._json + '}' return self._json def _write_head(self, feed_data): qualified_terms = [] for term in self.feed_terms: qualified_terms.append('feed' + self.result_prefix_separator + term) for term in self.entry_terms: qualified_terms.append('entry' + self.result_prefix_separator + term) for term in self.content_terms: qualified_terms.append('entry' + self.result_prefix_separator + 'content' + self.result_prefix_separator + term) self._json = self._json + self.head_prefix \ + self.head_separator.join(qualified_terms) \ + self.head_suffix def _write_results(self, feed_data): self._json = self._json + self.results_prefix self._write_bindings(feed_data) self._json = self._json + self.results_suffix def _write_bindings(self, feed_data): self._json = self._json + self.bindings_prefix for i in range(len(feed_data.entries)): # feed-level for term in self.feed_terms: self._json = self._json + self._get_binding(term, feed_data['feed'], 'feed') + ' ,' # entry-level for term in self.entry_terms: self._json = self._json + self._get_binding(term, feed_data.entries[i], 'entry') + ' ,' # content-level for term in self.content_terms: try: for j in range(len(feed_data.entries[i]['content'])): self._json = self._json + self._get_binding(term, feed_data.entries[i]['content'][j], 'entry'+ self.result_prefix_separator+'content') + ' ,' except: print "content bleah" if i != len(feed_data.entries) - 1: self._json = self._json + self.bindings_separator # don't know where the last comma might have come from self._json = self._json.rstrip(' ,') self._json = self._json.rstrip(' ,') self._json = self._json + self.bindings_suffix def _get_binding(self, term, dict, label): binding = '\n "' + label + self.result_prefix_separator + term + '":' \ + ' { "type": "' if term in self.uri_terms: binding = binding + 'uri' else: binding = binding + 'literal' binding = binding + '" , "value": "' # may have missing values or data structures here try: binding = binding + self.escape(str(dict[term])) except: binding = binding + "" binding = binding + '" }' return binding def escape(self, string): string = string.replace('\\', r'\\') string = string.replace('"', r'\"') string = string.replace('\b', r'\b') string = string.replace('\f', r'\f') string = string.replace('\n', r'\n') string = string.replace('\r', r'\r') string = string.replace('\t', r'\t') return string if __name__ == '__main__': if not sys.argv[1:]: print "\nUsage example :" print "python jaspwr.py http://feedparser.org/docs/examples/atom10.xml\n" sys.exit(0) else: d = feedparser.parse(sys.argv[1]) print Jaspwr().write(d)