<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>Ken Reisman</title>
	<atom:link href="http://kenreisman.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://kenreisman.com</link>
	<description></description>
	<lastBuildDate>Mon, 30 Jan 2012 13:08:11 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='kenreisman.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://0.gravatar.com/blavatar/22f8ed4b6f5c7d9846aa19e8b91965ac?s=96&#038;d=http%3A%2F%2Fs2.wp.com%2Fi%2Fbuttonw-com.png</url>
		<title>Ken Reisman</title>
		<link>http://kenreisman.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://kenreisman.com/osd.xml" title="Ken Reisman" />
	<atom:link rel='hub' href='http://kenreisman.com/?pushpress=hub'/>
		<item>
		<title>Test post from Blogo for preview</title>
		<link>http://kenreisman.com/2012/01/30/test-post-from-blogo-for-preview/</link>
		<comments>http://kenreisman.com/2012/01/30/test-post-from-blogo-for-preview/#comments</comments>
		<pubDate>Mon, 30 Jan 2012 13:08:06 +0000</pubDate>
		<dc:creator>Ken Reisman</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">https://kreisman.wordpress.com/?p=109</guid>
		<description><![CDATA[This is a test post sent by Blogo in order to generate a preview template. It should be deleted shortly.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=kenreisman.com&amp;blog=6009169&amp;post=109&amp;subd=kreisman&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>This is a test post sent by Blogo in order to generate a preview template. It should be deleted shortly.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/kreisman.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/kreisman.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/kreisman.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/kreisman.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/kreisman.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/kreisman.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/kreisman.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/kreisman.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/kreisman.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/kreisman.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/kreisman.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/kreisman.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/kreisman.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/kreisman.wordpress.com/109/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=kenreisman.com&amp;blog=6009169&amp;post=109&amp;subd=kreisman&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://kenreisman.com/2012/01/30/test-post-from-blogo-for-preview/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">ken</media:title>
		</media:content>
	</item>
		<item>
		<title>Machine learning as meta-programming</title>
		<link>http://kenreisman.com/2009/03/31/what-can-machines-really-learn-1/</link>
		<comments>http://kenreisman.com/2009/03/31/what-can-machines-really-learn-1/#comments</comments>
		<pubDate>Tue, 31 Mar 2009 18:38:19 +0000</pubDate>
		<dc:creator>Ken Reisman</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[artificial intelligence]]></category>
		<category><![CDATA[human computation]]></category>
		<category><![CDATA[machine learning]]></category>

		<guid isPermaLink="false">http://kreisman.wordpress.com/?p=58</guid>
		<description><![CDATA[&#160; When I began experimenting with machine learning as an undergrad in 1995, I was immediately taken with the possibilities. Contemplating biological evolution, it’s hard not to be awed that Darwin’s “endless forms most beautiful and most wonderful” have evolved from a blind evolutionary process, without invention from a conscious designer. Similarly, with machine learning, [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=kenreisman.com&amp;blog=6009169&amp;post=58&amp;subd=kreisman&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>&nbsp;</p>
<p class="MsoNormal">When I began experimenting with machine learning as an undergrad in 1995, I was immediately taken with the possibilities. Contemplating biological evolution, it’s hard not to be awed that Darwin’s “endless forms most beautiful and most wonderful” have evolved from a blind evolutionary process, without invention from a conscious designer. Similarly, with machine learning, there is the enticing prospect that programmers won’t actually have to solve the hardest problems in engineering and artificial intelligence. Rather, with an appropriately advanced theory of machine learning (and machine evolution), we’ll evolve and train machines to solve these difficult problems on their own.On their own! The very thought is powerful, and magical, and maybe a little heretical too.</p>
<p class="MsoNormal">Despite my continuing enthusiasm for machine learning, I hear from skeptics all the time.Some are my friends. They are understandably burned out by overzealous claims about machine intelligence (e.g., the first chess playing computers, Doug Lenat’s Cyc Project, the mid 80’s literature on neural networks, the early 90’s literature on artifical life, Ray Kurzweil’s books, and so on). Time after time, we hear about some new AI research program that promises to introduce intelligent machines to the world, only to have these fizzle out or hit a plateau after a few years.</p>
<p class="MsoNormal">Well, I’m sorry to say, the age of <em>truly</em> intelligent machines is not near. We are still many decades (if not many centuries) away from producing machines that can hold a good conversation, contemplate their own existence, or watch Stephen Colbert and have a genuine laugh. Nope, no conscious robots anytime soon. Humans are still much smarter than machines.</p>
<p class="MsoNormal">Still, I’m as enthusiastic as ever about machine learning because we are at the point where machines can learn autonomously to solve many large, interesting problems. Machine learning thrives on big data sets and lots of CPU cycles to crunch them. Thanks to the internet and cloud computing we now have more of both than ever. If you’re an AI skeptic, then I encourage you to stop worrying about conscious robots, and start thinking about the more immediate, practical applications of machine learning. There are many.</p>
<p class="MsoNormal">With machine learning, the programmer becomes a <em>meta-</em>programmer; instead of finding and coding the solution to a problem, she (1) gathers an appropriate data set, and (2) codes an appropriate learning algorithm that (if all goes well) will learn to solve the problem by examining this data set. For many programming challenges, this is more feasible and cost-effective than finding and coding the solution directly.</p>
<p class="MsoNormal">Being a meta-programmer in this sense requires a different skills than traditional programming. You need an understanding the practical requirements and limitations of various learning algorithms (though not necessarily how to implement them, since open source implementations of many algorithms are available).You also have to be crafty about gathering training data. Can you scrape data from the web? Can you assemble a training set using <a href="https://www.mturk.com/mturk/welcome">Mechanical Turk</a> or other <a href="http://en.wikipedia.org/wiki/Human_computation">human computation</a> services? If you run a web service, can you collect the data from your users? Is a free or licensable data set already available? To a large extent, meta-programming through machine learning is about clever data acquisition; gathering the richest data set you can with as little time and money as possible.</p>
<p class="MsoNormal"><em><br />
</em></p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/kreisman.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/kreisman.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/kreisman.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/kreisman.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/kreisman.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/kreisman.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/kreisman.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/kreisman.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/kreisman.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/kreisman.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/kreisman.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/kreisman.wordpress.com/58/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/kreisman.wordpress.com/58/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/kreisman.wordpress.com/58/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=kenreisman.com&amp;blog=6009169&amp;post=58&amp;subd=kreisman&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://kenreisman.com/2009/03/31/what-can-machines-really-learn-1/feed/</wfw:commentRss>
		<slash:comments>4</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">ken</media:title>
		</media:content>
	</item>
		<item>
		<title>Wanted: a univeral tool for summarizing opinions</title>
		<link>http://kenreisman.com/2009/03/27/opinion-synthesis/</link>
		<comments>http://kenreisman.com/2009/03/27/opinion-synthesis/#comments</comments>
		<pubDate>Fri, 27 Mar 2009 06:23:12 +0000</pubDate>
		<dc:creator>Ken Reisman</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[human computation]]></category>
		<category><![CDATA[opinion mining]]></category>
		<category><![CDATA[opinions]]></category>
		<category><![CDATA[summarization]]></category>

		<guid isPermaLink="false">http://kreisman.wordpress.com/?p=39</guid>
		<description><![CDATA[I don&#8217;t how, but at some point in the last two years I became obsessed with the problem of summarizing opinion information on the web: there are just too many opinions for our limited minds to absorb, many of them valuable, and we need help making sense of them all. For me, the solution is [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=kenreisman.com&amp;blog=6009169&amp;post=39&amp;subd=kreisman&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>I don&#8217;t how, but at some point in the last two years I became obsessed with the problem of summarizing opinion information on the web: there are just too many opinions for our limited minds to absorb, many of them valuable, and we need help making sense of them all. For me, the solution is a summarization tool &#8212; something that can scan through pages of opinions, and report the overall <em>gist</em> or <em>consensus </em>in just a few lines (or perhaps, with a pithy visualization).</p>
<div>
<p>Why do I care so much about opinion summarization? Because I believe strongly in &#8220;wisdom of the crowd&#8221; effects &#8212;  that we can often get better solutions to problems by synthesizing information from individuals &#8212; and yet, I believe that we sometimes lack good mechanisms for information synthesis. Wikis have proven to be an excellent way to synthesize factual information. Numerical averaging works reasonably well to synthesize opinions about numerical values. Voting and surveys help us synthesize opinions about decisions when there are a small, discrete number of alternatives. And there are other solutions too, such as prediction markets, Digg-style voting, etc.</p>
<p>Yet I believe we are missing good mechanisms for synthesizing <em>qualitative opinions. </em>By that, I mean opinions about people&#8217;s feelings, attitudes, likes, dislikes, desires, reasons, etc. What do people think about Obama? Well, there are generally two ways to find out. First, if you check the <a href="http://www.gallup.com/poll/112006/Gallup-Daily-Confidence-Obama.aspx">presidential approval polls</a> you&#8217;ll see that nearly 70% of Americans are currently confident in Obama as a president. That&#8217;s definitely a synthesis of opinions, but it leave out all the details: it says nothing about <em>why</em> Americans like Obama, <em>what</em> they like or dislike about him, and more importantly whether <em>you</em>should approve of Obama or not. The second major way to find out what people think is to listen to or read opinions from a variety of sources (op-ed articles, twitter, blogs, your friends, pundits on TV, etc.). There are a ton of opinions on Obama out there, roughly 10 created every minute on Twitter alone, but it would take an inordinate amount of time to read them.</p>
<p>Wouldn&#8217;t it be nice if you could click a button and immediately get a summary of all those opinions? For example, click a button have your computer report that the biggest single topic of discussion is Obama&#8217;s economic plan, and that though most americans are supportive, many of them apprehensive about the massive federal debt that will be created.  (Ok, I admit that&#8217;s a fictional example, those are just my opinions). If you had this button, you could take a stack of opinions on any topic and immediately get the <em>gist</em> of what people have said, the points where they agreed on, and the points where they disagreed.</p>
<p>Last year, I launched Pluribo with my friend Samidh Chakrabarti as modest initial solution to the problem. Pluribo is an NLP-driven tool to summarize user reviews for certain product categories. We began with electronics reviews and it worked pretty well. We then tried to expand to other categories and we started to run into problems. Yes, our solution worked for other categories, but  each category required so much calibration and training that it turned out to be inefficient to cover all categories, one by one. A hotel version of Pluribo is in the works, but for the most part I have become convinced that a fundamentally different solution is necessary. Rather than beginning with an algorithm for one category and then trying to extend the approach to other categories, I&#8217;m now interested in algorithms that are domain-neutral from the start.</p>
<p>What I want is <em>universal</em> tool for opinion synthesis, a tool that can accept multiple written opinions in any format, on any topic, and automatically provide a coherent summary. I do think this is possible. I am convinced the NLP technology we developed at Pluribo is not the right foundation for this, and that a fundamentally different approach is needed. Perhaps the new solution doesn&#8217;t use much NLP at all. Perhaps it mainly uses human computation to effectively decompose the tasks of aggregating, analyzing and synthesizing opinions; or perhaps there is a relatively simple machine learning solution that merely requires massive amounts of training data. Of course, what we consider to be a solution depends a lot on what we consider an acceptable summary to be.</p>
<p>I&#8217;m working on a human computation approach to qualitative summarization now. The approach is expensive to run, but it is domain neutral and could make sense for certain applications such as helping government agencies listen to their constituents, or helping brands listen to their customers. I&#8217;m curious to see where it will lead.</p></div>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/kreisman.wordpress.com/39/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/kreisman.wordpress.com/39/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/kreisman.wordpress.com/39/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/kreisman.wordpress.com/39/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/kreisman.wordpress.com/39/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/kreisman.wordpress.com/39/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/kreisman.wordpress.com/39/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/kreisman.wordpress.com/39/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/kreisman.wordpress.com/39/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/kreisman.wordpress.com/39/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/kreisman.wordpress.com/39/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/kreisman.wordpress.com/39/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/kreisman.wordpress.com/39/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/kreisman.wordpress.com/39/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=kenreisman.com&amp;blog=6009169&amp;post=39&amp;subd=kreisman&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://kenreisman.com/2009/03/27/opinion-synthesis/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">ken</media:title>
		</media:content>
	</item>
		<item>
		<title>The semantic web: smart data for dumb machines</title>
		<link>http://kenreisman.com/2009/01/13/semantic_web/</link>
		<comments>http://kenreisman.com/2009/01/13/semantic_web/#comments</comments>
		<pubDate>Tue, 13 Jan 2009 19:21:11 +0000</pubDate>
		<dc:creator>Ken Reisman</dc:creator>
				<category><![CDATA[Intelligent Systems]]></category>
		<category><![CDATA[Semantic Web]]></category>
		<category><![CDATA[nlp]]></category>
		<category><![CDATA[opencalais]]></category>

		<guid isPermaLink="false">http://kreisman.wordpress.com/?p=20</guid>
		<description><![CDATA[As someone who does a lot of work with natural language understanding and &#8220;semantic&#8221; technology, people frequently ask me how my work fits in with the  Semantic Web. The answer is that there is little overlap: the semantic web is about data standards for structured documents, whereas I develop NLP &#38; machine learning technology to take [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=kenreisman.com&amp;blog=6009169&amp;post=20&amp;subd=kreisman&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>As someone who does a lot of work with natural language understanding and &#8220;semantic&#8221; technology, people frequently ask me how my work fits in with the  Semantic Web. The answer is that there is little overlap: the semantic web is about data standards for structured documents, whereas I develop NLP &amp; machine learning technology to take<em> unstructured documents </em>and <em>turn them into structured documents</em>. My work is &#8220;semantic&#8221; in a different (and arguably deeper) sense &#8212; it&#8217;s about teaching machines to parse and, in a rudimentary way, to understand natural language.</p>
<p>For anyone confused about what the semantic web really is, here is my take:</p>
<h3>Motivation behind the semantic web</h3>
<p>The World World Web is a universal format for putting human readable documents on the internet. It has been a revolutionary step in human knowledge, allowing humans to share knowledge on any topic, and to perform all kinds of daily activities from shopping to travel reservations. In 2001, Tim-Berners Lee, the inventor of the World Wide Web, co-authored a hugely influential article which argued that the next major step in the evolution of the internet would be to develop common data standards so that machines can share information from multiple web sites in the way that humans can. Berners-Lee and his co-authors called this “the semantic web” (in fact, he had discussed these ideas as early as 1994). The idea has not quite taken off, but it has been slowly gaining currency in the tech community ever since (there seems to be a lot discussion about it lately, though Google Trends <a href="http://www.google.com/trends?q=semantic+web&amp;ctab=0&amp;geo=all&amp;date=all&amp;sort=0" target="_blank">suggests otherwise</a>).</p>
<p>The motivation behind the semantic web is, basically, that machines are not very smart. When I make travel plans online for a meeting in Chicago, I combine information from many different sites. I’ll check my calendar at Google for the best days to go, look up flight schedules and fares at Expedia to find a cheap flight on those days, check the address of the meeting using gmail, read hotel reviews at TripAdvisor to find the best accommodations near that address, check pricing and availability on those hotels using Hotels.com, and then book everything. It’s great that the internet allows me to do this, but, in fact, it’s a lot of work. I have to consult five different sites to complete the task, and make all sorts of decisions and inferences along the way.</p>
<p>It would save a tremendous amount of time if we could delegate complex scheduling tasks like this to a computer, but there are two major challenges. For one thing, the information on these different websites (e.g., calendar dates, flight schedules, hotel reviews, street addresses , etc) is easy for a human to understand, but difficult for a computer. When I see the words “Park Hyatt Chicago” at TripAdvisor, I know that this is a hotel and that the user review next to it gives me information about the quality of the hotel. Moreover, when I see the same name at Hotels.com with “$779.00” next to it, I know that these two sites are referring to the same hotel. I can see this immediately, without even thinking about it. Most computer programs aren’t so gifted. When they scan this page at TripAdvisor, they see a lot words. They don’t know what the words refer to (or even that “Park Hyatt Chicago” refers to one entity, not three). They don’t know what a user review is, or which words on the page are part of a user review rather than part of an adjacent advertisement. They may not even know that the TripAdvisor page and the Hotels.com page both refer to the same entity.</p>
<p>There is another challenge too. Even if we explicitly tell the computer that “Park Hyatt Chicago” refers to a hotel, that “5/5 stars” and “I enjoyed my stay” are part of a user review, and that $779.00 is the cost per night, the computer may not know how to reason with all of this information. It doesn’t know how to use the review and price information to make a good decision. It’s doesn’t even know that 5/5 stars is a very good thing, or that $779/night is far too expensive for most travelers. It doesn’t know how to combine the information from these two sites in order to infer the best value for your money.</p>
<p>How do we overcome these problems? The ideal solution would be to build machines that are genuinely smart, machines that can actually read and understand natural language. Smart systems like this have been the driving goal of artificial intelligence researchers for decades (so-called “strong” AI). Powerful and suggestive AI techniques are continually being developed, but we haven’t arrived at this ambitious goal quite yet.</p>
<p>The semantic web is an alternative. Rather than programming computers to be smart interpreters of ambiguous data, we make the data clearer and easier to interpret. To help computers understand the information on a page, the semantic web incorporates standards so that publishers of a web site can carefully structure their information in a simple machine readable format; entities like hotels, addresses, user reviews, and prices are all clearly identified. To help computers reason with this structures information, the semantic web incorporates standards so that publishers can explicitly tell computers what inferences they can make with the data.</p>
<h3>Challenges for the semantic web</h3>
<p>The semantic web obviates the need to solve very difficult AI problems, but it has challenges of its own. First, for the potential of the semantic web to be realized, its demanding data standards must become widely adopted. So far this hasn’t happened, though there has been progress. Second, publishers can markup data that is already well organized in databases (prices, product ids, dates, etc), but a great deal of information on the internet is and always will be in natural language format (news and blog articles, product reviews, emails and messages, etc.). It’s not clear how to incorporate this data into the semantic web vision. To encode this data using proper semantic web standards, we could either do so by hand (which is extremely cumbersome) or we’d have to write programs to parse and translate this natural language text into a suitable machine format (but if we had such a program, then we wouldn’t need the semantic web at all). Third, the semantic web represents an old, and arguably outdated vision, for how intelligent systems should reason. The semantic web standards are built around symbolic representations of entities, their relationships, and applicable rules for inference. Having this information is certainly better than nothing, but it’s not clear how much real-world activity and decision- making can be incorporated into this framework. Many computer scientists and philosophers have argued that symbolic approaches to AI and reasoning are severely limited (a common term of derision is GOFAI, or “Good Old-Fashioned Artificial Intelligence”).</p>
<p>Most likely, semantic web standards will prove more useful for sharing data than for sharing rules of reasoning and inference. Better organization of data can only be a good thing, but it’s important to see that the semantic web is mainly about smart data, not smart machines. If it does take off, it will only be a prelude to a web that is built with smarter machines (ones with more robust and flexible capabilities for learning, parsing text, and representing knowledge).</p>
<h3>Alternatives</h3>
<p>The semantic web has been slow in development, but that hasn’t stopped people from building programs that have “semantic web”–like functionality. One trend has been the proliferation of data APIs. APIs make structured data available for consumption by other machines, but they don’t typically conform to semantic web standards. It turns out to be quite difficult to foster agreement on any but the loosest data standards, such as XML (which is a data formatting standard, but not a data content standard). APIs make it possible to create mashups of data from many sites, but they don’t make it trivial—most API’s speak their own idiosyncratic language, and it takes work to connect a lot of different APIs together.</p>
<p>Another trend in this direction has been the development of programs that parse free text. The driving assumption behind the semantic web is that <em>to enable computers to perform complex tasks on the internet, it will be easier to get publishers to agree on common data standards than to build machines that can parse unstructured data</em>. It’s not clear that this is right. Oddly, for some types of data, it has been easier to build smarter machines. <a href="http://www.opencalais.com/">OpenCalais</a>, now owned by Thomson Reuters, is a promising software service in this area. OpenCalais scans any piece of text and automatically extracts entities, facts, and events (including the entity “Park Hyatt Chicago”).  It even marks them up using the semantic web standard RDF. In a way, OpenCalais acts like a translator between plain html and the shiny new world of the semantic web. And in a way, as this kind of smart NLP technology becomes more widespread, it seems to undermine the need for the semantic web itself.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/kreisman.wordpress.com/20/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/kreisman.wordpress.com/20/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/kreisman.wordpress.com/20/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/kreisman.wordpress.com/20/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/kreisman.wordpress.com/20/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/kreisman.wordpress.com/20/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/kreisman.wordpress.com/20/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/kreisman.wordpress.com/20/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/kreisman.wordpress.com/20/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/kreisman.wordpress.com/20/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/kreisman.wordpress.com/20/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/kreisman.wordpress.com/20/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/kreisman.wordpress.com/20/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/kreisman.wordpress.com/20/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=kenreisman.com&amp;blog=6009169&amp;post=20&amp;subd=kreisman&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://kenreisman.com/2009/01/13/semantic_web/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">ken</media:title>
		</media:content>
	</item>
		<item>
		<title>Late adopter</title>
		<link>http://kenreisman.com/2008/12/31/late-adopter/</link>
		<comments>http://kenreisman.com/2008/12/31/late-adopter/#comments</comments>
		<pubDate>Wed, 31 Dec 2008 16:48:50 +0000</pubDate>
		<dc:creator>Ken Reisman</dc:creator>
				<category><![CDATA[Intelligent Systems]]></category>
		<category><![CDATA[artificial intelligence]]></category>
		<category><![CDATA[evolution]]></category>
		<category><![CDATA[www]]></category>

		<guid isPermaLink="false"></guid>
		<description><![CDATA[Is 2009 too late to start a new blog, something with more than 140 characters per post?  I like to consider myself an early adopter of technology, but somehow it&#8217;s taken me until 2009 to get going. I&#8217;ve been working with NLP and machine learning lately, and I&#8217;ve had a flurry of thoughts on how to [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=kenreisman.com&amp;blog=6009169&amp;post=1&amp;subd=kreisman&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Is 2009 too late to start a new blog, something with more than 140 characters per post?  I like to consider myself an early adopter of technology, but somehow it&#8217;s taken me until 2009 to get going.</p>
<p>I&#8217;ve been working with NLP and machine learning lately, and I&#8217;ve had a flurry of thoughts on how to use these technologies to build a smarter web, a web that seems to think for itself.  Building intelligent systems is an incremental processes (whether done by human hand, or the hand of evolution). Between systems with no intelligence at all and those that think, there are many possibilities with intermediate complexity and intermediate intelligence. These systems do not &#8220;understand&#8221; or self-reflect in any interesting sense, but they are based on many of the same building blocks, and principles as human cognition: statistical learning, distributed architecture, fuzzy representations, adaptive heuristics, developmental assembly, evolutionary design. I&#8217;m less interested in far ranging predictions about strong AI than in these intermediate systems that we can put on web right now.</p>
<p>Pluribo was an example. There is so much user generated content on the web. I wanted build a system that could scan the thousands of user comments about a product or topic, and <em>generalize</em> or <em>summarize</em> this content much like a human editor would. Pluribo is imperfect and it doesn&#8217;t generalize through the same cognitive process as a human, but it does generalize. I can imagine a series of modifications which would make the system more robust and naturalistic over time.</p>
<p>There are many of these systems I want to build, and I can&#8217;t realize all of them (not enough time, not enough money, and many of them probably aren&#8217;t viable anyway). But I can work on some of them, and I&#8217;ll occasionally write about the others here.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/kreisman.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/kreisman.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/kreisman.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/kreisman.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/kreisman.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/kreisman.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/kreisman.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/kreisman.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/kreisman.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/kreisman.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/kreisman.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/kreisman.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/kreisman.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/kreisman.wordpress.com/1/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=kenreisman.com&amp;blog=6009169&amp;post=1&amp;subd=kreisman&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://kenreisman.com/2008/12/31/late-adopter/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">ken</media:title>
		</media:content>
	</item>
	</channel>
</rss>
