Your AI pair programmer
GitHub Copilot uses the OpenAI Codex to suggest code and entire functions in real-time, right from your editor.
#!/usr/bin/env ts-node
import { fetch } from "fetch-h2";
// Determine whether the sentiment of text is positive
// Use a web service
async function isPositive(text: string): Promise<boolean> {
const response = await fetch(`http://text-processing.com/api/sentiment/`, {
method: "POST",
body: `text=${text}`,
headers: {
"Content-Type": "application/x-www-form-urlencoded",
},
});
const json = await response.json();
return json.label === "pos";
}
package main
type CategorySummary struct {
Title string
Tasks int
AvgValue float64
}
func createTables(db *sql.DB) {
db.Exec("CREATE TABLE tasks (id INTEGER PRIMARY KEY, title TEXT, value INTEGER, category TEXT)")
}
func createCategorySummaries(db *sql.DB) ([]CategorySummary, error) {
var summaries []CategorySummary
rows, err := db.Query("SELECT category, COUNT(category), AVG(value) FROM tasks GROUP BY category")
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var summary CategorySummary
err := rows.Scan(&summary.Title, &summary.Tasks, &summary.AvgValue)
if err != nil {
return nil, err
}
summaries = append(summaries, summary)
}
return summaries, nil
}
import datetime
def parse_expenses(expenses_string):
"""Parse the list of expenses and return the list of triples (date, value, currency).
Ignore lines starting with #.
Parse the date using datetime.
Example expenses_string:
2016-01-02 -34.01 USD
2016-01-03 2.59 DKK
2016-01-03 -2.72 EUR
"""
expenses = []
for line in expenses_string.splitlines():
if line.startswith("#"):
continue
date, value, currency = line.split(" ")
expenses.append((datetime.datetime.strptime(date, "%Y-%m-%d"),
float(value),
currency))
return expenses
class CreateShippingAddresses < ActiveRecord::Migration
def change
create_table :shipping_addresses do |t|
t.string :name
t.string :address
t.string :city
t.string :zip
t.string :state
t.string :phone
t.string :email
t.references :shipping_method, index: true, foreign_key: true
t.references :customer, index: true, foreign_key: true
t.timestamps null: false
end
end
end
Trained on billions of lines of code, GitHub Copilot turns natural language prompts into coding suggestions across dozens of languages.
Focus on solving bigger problems
Spend less time creating boilerplate and repetitive code patterns, and more time on what matters: building great software. Write a comment describing the logic you want and GitHub Copilot will immediately suggest code to implement the solution.
package main
type Run struct {
Time int // in milliseconds
Results string
Failed bool
}
// Get average runtime of successful runs in seconds
func averageRuntimeInSeconds(runs []Run) float64 {
var totalTime int
var failedRuns int
for _, run := range runs {
if run.Failed {
failedRuns++
} else {
totalTime += run.Time
}
}
averageRuntime := float64(totalTime) / float64(len(runs) - failedRuns) / 1000
return averageRuntime
}
class Course < ApplicationRecord
has_many :enrollments, dependent: :destroy
has_many :students, through: :enrollments, source: :user
has_many :teachers, through: :enrollments, source: :user
has_many :assignments, dependent: :destroy
has_many :submissions, through: :assignments
validates :name, presence: true
validates :start_date, presence: true
validates :end_date, presence: true
validates :term, presence: true
validates :year, presence: true
def self.find_by_name(name)
Course.find_by(name: name)
end
end
const seconds = 3600
const minutes = seconds / 60
const hours = minutes / 60
const days = hours / 24
const weeks = days / 7
const months = days / 30
const years = months / 12
import static org.junit.Assert.*;
import org.junit.Test;
public class IsPrimeTest {
// Math.isPrime(int) returns whether the given number is prime or not
@Test
public void testIsPrime() {
assertTrue(Math.isPrime(2));
assertTrue(Math.isPrime(3));
assertTrue(Math.isPrime(5));
assertTrue(Math.isPrime(7));
assertTrue(Math.isPrime(11));
assertTrue(Math.isPrime(13));
assertTrue(Math.isPrime(17));
assertTrue(Math.isPrime(19));
assertTrue(Math.isPrime(23));
assertTrue(Math.isPrime(29));
}
Get AI-based suggestions, just for you
Visual Studio Code
def max_sum_slice(xs):
max_ending = max_so_far = 0
for x in xs:
max_ending = max(0, max_ending + x)
max_so_far = max(max_so_far, max_ending)
return max_so_far
Visual Studio Code
def max_sum_slice(xs):
if not xs:
return 0
max_ending = max_slice = 0
for x in xs:
max_ending = max(0, max_ending + x)
max_slice = max(max_slice, max_ending)
return max_slice
Visual Studio Code
def max_sum_slice(xs):
max_ending = max_so_far = 0
begin = end = 0
for i, x in enumerate(xs):
if max_ending + x > 0:
max_ending += x
else:
max_ending = 0
begin = i
if max_ending > max_so_far:
max_so_far = max_ending
end = i
return begin, end, max_so_far
Visual Studio Code
def max_sum_slice(xs):
if not xs:
return 0
current_max = 0
max_so_far = 0
for x in xs:
current_max = max(0, current_max + x)
max_so_far = max(max_so_far, current_max)
return max_so_far
Visual Studio Code
def max_sum_slice(xs):
max_sum = 0
max_sum_so_far = 0
start_index = 0
end_index = 0
for i in range(len(xs)):
max_sum_so_far += xs[i]
if max_sum_so_far > max_sum:
max_sum = max_sum_so_far
start_index = i
end_index = i
elif max_sum_so_far < 0:
max_sum_so_far = 0
start_index = i
else:
end_index = i
return (start_index, end_index, max_sum)
Visual Studio Code
def max_sum_slice(xs):
if len(xs) == 0:
return 0
max_sum = 0
max_sum_ending_here = 0
max_ending_here = 0
for x in xs:
max_ending_here = max(0, max_ending_here + x)
max_sum_ending_here = max(max_sum_ending_here, max_ending_here)
max_sum = max(max_sum, max_sum_ending_here)
return max_sum
Visual Studio Code
def max_sum_slice(xs):
best_sum, best_start, best_end = 0, None, None
for i, x in enumerate(xs):
if best_start is None or best_sum < x + best_sum:
best_sum = x + best_sum
best_start = i
best_end = i
elif best_sum > x + best_sum:
best_sum = x + best_sum
best_end = i
return best_sum, best_start, best_end
Visual Studio Code
def max_sum_slice(xs):
"""
Return a tuple (i,j) where i is the index of the first slice with a max value,
and j is the index of the slice whose sum is the max
"""
if len(xs) == 0:
return None
elif len(xs) == 1:
return (0,0)
elif len(xs) == 2:
return (0,1) if xs[0] >= xs[1] else (1,0)
else:
return max_sum_slice_helper(xs,0,len(xs)-1)
Keep flying with your favorite editor
Code confidently in unfamiliar territory
Whether you’re working in a new language or framework, or just learning to code, GitHub Copilot can help you find your way. Tackle a bug, or learn how to use a new framework without spending most of your time spelunking through the docs or searching the web.
const token = process.env["TWITTER_BEARER_TOKEN"]
const fetchTweetsFromUser = async (screenName, count) => {
const response = await fetch(
`https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=${screenName}&count=${count}`,
{
headers: {
Authorization: `Bearer ${token}`,
},
}
)
const json = await response.json()
return json
}
import tweepy, os # secrets in environment variables
def fetch_tweets_from_user(user_name):
# authentification
auth = tweepy.OAuthHandler(os.environ['TWITTER_KEY'], os.environ['TWITTER_SECRET'])
auth.set_access_token(os.environ['TWITTER_TOKEN'], os.environ['TWITTER_TOKEN_SECRET'])
api = tweepy.API(auth)
# fetch tweets
tweets = api.user_timeline(screen_name=user, count=200, include_rts=False)
return tweets
require 'twitter'
def fetch_tweets_from_user(handle)
twitter = Twitter::REST::Client.new do |config|
config.consumer_key = ENV['TWITTER_CONSUMER_KEY']
config.consumer_secret = ENV['TWITTER_CONSUMER_SECRET']
config.access_token = ENV['TWITTER_ACCESS_TOKEN']
config.access_token_secret = ENV['TWITTER_ACCESS_TOKEN_SECRET']
end
tweets = twitter.user_timeline(handle)
tweets
end
const token = process.env["TWITTER_BEARER_TOKEN"]
const fetchTweetsFromUser = (userName: string) => {
const url = `https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=${userName}&count=20`
return fetch(url, {
headers: {
"Authorization": `Bearer ${token}`
}
})
.then(res => res.json())
.then(tweets => tweets.map(tweet => ({
id: tweet.id,
text: tweet.text,
created_at: tweet.created_at,
user: {
id: tweet.user.id,
name: tweet.user.name,
screen_name: tweet.user.screen_name,
profile_image_url: tweet.user.profile_image_url
}
})))
}
package main
var apiKey = os.Getenv("TWITTER_BEARER_TOKEN")
type Tweet = struct{ Text string }
func fetchTweetsFromUser(user string) ([]Tweet, error) {
url := "https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=" + user + "&count=200"
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+apiKey)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("bad status: %d", resp.StatusCode)
}
var tweets []Tweet
if err := json.NewDecoder(resp.Body).Decode(&tweets); err != nil {
return nil, err
}
return tweets, nil
}
import d3 from "d3"
const drawScatterplot = (data, height, width) => {
const svg = d3.select("#scatterplot")
.append("svg")
.attr("height", height)
.attr("width", width)
svg.selectAll("circle")
.data(data)
.enter()
.append("circle")
.attr("cx", d => d.x)
.attr("cy", d => d.y)
.attr("r", 5)
}
import matplotlib.pyplot as plt
def draw_scatterplot(x_values, y_values):
plt.scatter(x_values, y_values, s=20)
plt.title("Scatter Plot")
plt.xlabel("x values")
plt.ylabel("y values")
plt.show()
require 'gruff'
def draw_scatterplot(x_values, y_values)
g = Gruff::Scatter.new(400)
g.title = "GCD"
g.x_axis_label = "X"
g.y_axis_label = "Y"
g.data('data', x_values, y_values)
g.write("plot.png")
end
import d3 from "d3"
const drawScatterplot = (
data: Array<{ x: number; y: number; size: number }>,
x: d3.ScaleLinear<number, number>,
y: d3.ScaleLinear<number, number>,
svg: d3.Selection<SVGGElement, unknown, HTMLElement, any>,
margin: { top: number; bottom: number; left: number; right: number }
) => {
const width = svg.attr("width") - margin.left - margin.right
const height = svg.attr("height") - margin.top - margin.bottom
svg.selectAll("*").remove()
const g = svg.append("g")
.attr("transform", `translate(${margin.left}, ${margin.top})`)
const xAxis = g
.append("g")
.attr("transform", `translate(0, ${height})`)
.call(d3.axisBottom(x))
...
const memoize = fn => {
const cache = {};
return (...args) => {
const key = JSON.stringify(args);
return (cache[key] = cache[key] || fn(...args));
};
}
def memoize(func):
cache = {}
def wrapper(*args):
if args in cache:
return cache[args]
else:
cache[args] = func(*args)
return cache[args]
return wrapper
def memoize(func)
cache = {}
lambda { |*args| cache[args] ||= func.call(*args) }
end
const memoize = (fn: Function): Function => {
const cache: { [key: string]: any } = {};
return (...args: any[]): any => {
const key = JSON.stringify(args);
if (cache[key]) {
return cache[key];
}
const result = fn(...args);
cache[key] = result;
return result;
};
}
package main
func Memoize(fn func(int) int) func(int) int {
cache := make(map[int]int)
return func(n int) int {
if v, ok := cache[n]; ok {
return v
}
cache[n] = fn(n)
return cache[n]
}
}
const https = require('https')
const parseString = require('xml2js').parseString
const apiKey = process.env["GOODREADS_API_KEY"]
const getRating = title => {
return new Promise((resolve, reject) => {
https.get(`https://www.goodreads.com/book/title.xml?key=${apiKey}&title=${title}`, res => {
let xml = ''
res.setEncoding('utf8')
res.on('data', chunk => {
xml += chunk
})
res.on('end', () => {
parseString(xml, (err, result) => {
if (err) {
reject(err)
} else {
resolve(result.GoodreadsResponse.book[0].average_rating)
}
})
})
})
})
}
import os, requests, json, bs4
key = os.environ['GOODREADS_API_KEY']
def get_rating(title) :
"""Get the average rating of the book from GoodReads, and return a float."""
url = 'https://www.goodreads.com/book/title.xml?key=' + key + '&title=' + title
response = requests.get(url)
soup = bs4.BeautifulSoup(response.text, 'xml')
rating = soup.find('average_rating').text
return float(rating)
require 'goodreads'
def get_rating_of_book_by_title(title)
client = Goodreads::Client.new(
api_key: "YOUR_API_KEY",
api_secret: "YOUR_API_SECRET"
)
book = client.book(title)
book.rating
end
const apiKey = process.env["GOODREADS_API_KEY"]
const getRating = (title: string) => {
return fetch(`https://www.goodreads.com/book/title.xml?key=${apiKey}&title=${title}`)
.then(res => res.text())
.then(res => {
const parser = new DOMParser();
const xml = parser.parseFromString(res, "text/xml");
const rating = xml.getElementsByTagName("average_rating")[0].textContent;
return rating;
})
.catch(error => console.log(error));
}
package main
import ("log"; "net/http"; "net/url"; "os"; "strconv"; "github.com/beevik/etree")
var (
apiKey = os.Getenv("GOODREADS_API_KEY")
)
// GetRating gets the average rating of a book using the Goodreads API.
func GetRating(title string) (float64, error) {
u := url.URL{
Scheme: "https",
Host: "www.goodreads.com",
Path: "/book/title.xml",
RawQuery: url.Values{
"key": {apiKey},
"title": {title},
}.Encode(),
}
resp, err := http.Get(u.String())
if err != nil {
return 0, err
}
defer resp.Body.Close()
doc := etree.NewDocument()
if _, err := doc.ReadFrom(resp.Body); err != nil {
return 0, err
}
ratings := doc.FindElements("//average_rating")
if len(ratings) == 0 {
return 0, nil
}
rating, err := strconv.ParseFloat(ratings[0].Text(), 64)
if err != nil {
return 0, err
}
return rating, nil
}
Flight Reports
Hundreds of engineers, including our own, use GitHub Copilot every day.
This is the single most mind-blowing application of machine learning I’ve ever seen.
Mike Krieger // Co-founder, Instagram
GitHub Copilot works shockingly well. I will never develop software without it again.
Lars Gyrup Brink Nielsen
I was stunned when I started writing Clojure with GitHub Copilot and it filled an idiomatic namespace require, just like I was going to write it.
Gunnika Batra // Senior Analyst
Trying to code in an unfamiliar language by googling everything is like navigating a foreign country with just a phrasebook. Using GitHub Copilot is like hiring an interpreter.
Harri Edwards // Open AI
Don't fly solo
Enjoy a 60-day free trial then $10/month or $100/year per user
Learn about GitHub Copilot Terms and Conditions
GitHub Copilot for companies
Frequently asked questions
General
What is GitHub Copilot?
GitHub Copilot is an AI pair programmer that helps you write code faster and with less work. It draws context from comments and code to suggest individual lines and whole functions instantly. GitHub Copilot is powered by Codex, a generative pretrained language model created by OpenAI. It is available as an extension for Visual Studio Code, Visual Studio, Neovim, and the JetBrains suite of integrated development environments (IDEs).
GitHub Copilot is not intended for non-coding tasks like data generation and natural language generation, like question & answering. Your use of GitHub Copilot is subject to the GitHub Terms for Additional Product and Features.
How does GitHub Copilot work?
OpenAI Codex was trained on publicly available source code and natural language, so it works for both programming and human languages. The GitHub Copilot extension sends your comments and code to the GitHub Copilot service, and it relies on context, as described in Privacy below - i.e., file content both in the file you are editing, as well as neighboring or related files within a project. It may also collect the URLs of repositories or file paths to identify relevant context. The comments and code along with context are then used by OpenAI Codex to synthesize and suggest individual lines and whole functions.
What data has GitHub Copilot been trained on?
GitHub Copilot is powered by Codex, a generative pretrained AI model created by OpenAI. It has been trained on natural language text and source code from publicly available sources, including code in public repositories on GitHub.
Does GitHub Copilot write perfect code?
In a recent evaluation, we found that users accepted on average 26% of all completions shown by GitHub Copilot. We also found that on average more than 27% of developers’ code files were generated by GitHub Copilot, and in certain languages like Python that goes up to 40%. However, GitHub Copilot does not write perfect code. It is designed to generate the best code possible given the context it has access to, but it doesn’t test the code it suggests so the code may not always work, or even make sense. GitHub Copilot can only hold a very limited context, so it may not make use of helpful functions defined elsewhere in your project or even in the same file. And it may suggest old or deprecated uses of libraries and languages. When converting comments written in non-English to code, there may be performance disparities when compared to English. For suggested code, certain languages like Python, JavaScript, TypeScript, and Go might perform better compared to other programming languages.
Like any other code, code suggested by GitHub Copilot should be carefully tested, reviewed, and vetted. As the developer, you are always in charge.
Will GitHub Copilot help me write code for a new platform?
GitHub Copilot is trained on public code. When a new library, framework, or API is released, there is less public code available for the model to learn from. That reduces GitHub Copilot’s ability to provide suggestions for the new codebase. As more examples enter the public space, we integrate them into the training set and suggestion relevance improves. In the future, we will provide ways to highlight newer APIs and samples to raise their relevance in GitHub Copilot’s suggestions.
How do I get the most out of GitHub Copilot?
GitHub Copilot works best when you divide your code into small functions, use meaningful names for functions parameters, and write good docstrings and comments as you go. It also seems to do best when it’s helping you navigate unfamiliar libraries or frameworks.
How can I contribute?
By using GitHub Copilot and sharing your feedback in the feedback forum, you help to improve GitHub Copilot. Please also report incidents (e.g., offensive output, code vulnerabilities, apparent personal information in code generation) directly to copilot-safety@github.com so that we can improve our safeguards. GitHub takes safety and security very seriously and we are committed to continually improving.
Human oversight
Can GitHub Copilot introduce insecure code in its suggestions?
Public code may contain insecure coding patterns, bugs, or references to outdated APIs or idioms. When GitHub Copilot synthesizes code suggestions based on this data, it can also synthesize code that contains these undesirable patterns. This is something we care a lot about at GitHub, and in recent years we’ve provided tools such as GitHub Actions, Dependabot, and CodeQL to open source projects to help improve code quality. Of course, you should always use GitHub Copilot together with good testing and code review practices and security tools, as well as your own judgment.
Does GitHub own the code generated by GitHub Copilot?
GitHub Copilot is a tool, like a compiler or a pen. GitHub does not own the suggestions GitHub Copilot generates. The code you write with GitHub Copilot’s help belongs to you, and you are responsible for it. We recommend that you carefully test, review, and vet the code before pushing it to production, as you would with any code you write that incorporates material you did not independently originate.
Does GitHub Copilot recite code from the training set?
The vast majority of the code that GitHub Copilot suggests has never been seen before. Our latest internal research shows that about 1% of the time, a suggestion may contain some code snippets longer than ~150 characters that matches the training set. Previous research showed that many of these cases happen when GitHub Copilot is unable to glean sufficient context from the code you are writing, or when there is a common, perhaps even universal, solution to the problem.
What can I do to reduce GitHub Copilot’s suggestion of code that matches public code?
We built a filter to help detect and suppress the rare instances where a GitHub Copilot suggestion contains code that matches public code on GitHub. You have the choice to turn that filter on or off during setup. With the filter on, GitHub Copilot checks code suggestions with its surrounding code for matches or near matches (ignoring whitespace) against public code on GitHub of about 150 characters. If there is a match, the suggestion will not be shown to you. We plan on continuing to evolve this approach and welcome feedback and comment.
Other than the filter, what other measures can I take to assess code suggested by GitHub Copilot?
You should take the same precautions as you would with any code you write that uses material you did not independently originate. These include rigorous testing, IP scanning, and checking for security vulnerabilities. You should make sure your IDE or editor does not automatically compile or run generated code before you review it.
Fairness and broader impact
Will different people experience different quality of service from GitHub Copilot?
Given public sources are predominantly in English, GitHub Copilot will likely work less well in scenarios where natural language prompts provided by the developer are not in English and/or are grammatically incorrect. Therefore, non-English speakers might experience a lower quality of service.
Additionally, inexperienced developers may struggle to use GitHub Copilot to effectively generate code, and their lack of experience might inhibit their capability to effectively review and edit suggestions made by GitHub Copilot. Finally, we are conducting internal testing of GitHub Copilot’s ease of use by developers with disabilities and working to ensure that GitHub Copilot is accessible to all developers. Please feel free to share your feedback on GitHub Copilot accessibility in our feedback forum.
We acknowledge that fairness and inclusivity in code generation systems are important emerging research areas. We are working with experts, including Microsoft’s Office of Responsible AI, in an effort to advance GitHub Copilot’s responsible AI practices. We will also review new research and learn from feedback we receive to improve GitHub Copilot such that it is usable by a wide range of developers and provides similar quality of service to people with different backgrounds.
Does GitHub Copilot produce offensive outputs?
GitHub Copilot includes filters to block offensive language in the prompts and to avoid synthesizing suggestions in sensitive contexts. We continue to work on improving the filter system to more intelligently detect and remove offensive outputs. However, due to the novel space of code safety, GitHub Copilot may sometimes produce undesired output. If you see offensive outputs, please report them directly to copilot-safety@github.com so that we can improve our safeguards. GitHub takes this challenge very seriously and we are committed to addressing it.
How will advanced code generation tools like GitHub Copilot affect developer jobs?
Bringing in more intelligent systems has the potential to bring enormous change to the developer experience. We do not expect GitHub Copilot to replace developers. Rather, we expect GitHub Copilot to partner with developers, augment their capabilities, and enable them to be more productive, reduce manual tasks, and help them focus on interesting work. We also believe that GitHub Copilot has the potential to lower barriers to entry, enabling more people to explore software development, and join the next generation of developers. We are working to test these hypotheses with both internal and external research.
Privacy
How can I control the use of my data collected by Copilot?
GitHub Copilot gives you certain choices about how it uses the data it collects. User engagement data, including pseudonymous identifiers and general usage data, is required for the use of GitHub Copilot and will continue to be collected, processed, and shared with Microsoft and OpenAI as you use GitHub Copilot. You can choose whether your code snippets are collected and retained by GitHub and further processed and shared with Microsoft and OpenAI by adjusting your user settings. Additional information about the types of telemetry collected and processed by GitHub Copilot can be found in What data does GitHub Copilot collect? below.
You can also request deletion of GitHub Copilot data associated with your GitHub identity by filling out a support ticket. Please note that future data collection will occur with continued use of GitHub Copilot, but you can control whether your code snippets are collected, processed, and retained in telemetry in your Copilot user settings.
What data does GitHub Copilot collect?
GitHub Copilot relies on file content and additional data to work. It collects data both to provide the service and saves some of the data to perform further analysis and enable improvements. Please see below for more details on how your telemetry data is used and shared.
User Engagement Data
When you use GitHub Copilot it will collect usage information about events generated when interacting with the IDE or editor. These events include user edit actions like completions accepted and dismissed, and error and general usage data to identify metrics like latency and features engagement. This information may include personal data, such as pseudonymous identifiers.
Code Snippets Data
Depending on your preferred telemetry settings, GitHub Copilot may also collect and retain the following, collectively referred to as “code snippets”: source code that you are editing, related files and other files open in the same IDE or editor, URLs of repositories and files paths.
How is the transmitted data protected?
We know that user edit actions, source code snippets, and URLs of repositories and file paths are sensitive data. Consequently, several measures of protection are applied, including:
- The transmitted data is encrypted in transit and at rest
- Access is strictly controlled. The data can only be accessed by (1) named GitHub personnel working on the GitHub Copilot team or on the GitHub platform health team, (2) Microsoft personnel working on or with the GitHub Copilot team, and (3) OpenAI personnel who work on GitHub Copilot
- Role-based access controls and multi-factor authentication are required for personnel accessing code snippet data
Does GitHub Copilot ever output personal data?
Because GitHub Copilot was trained on publicly available code, its training set included public personal data included in that code. From our internal testing, we found it to be rare that GitHub Copilot suggestions included personal data verbatim from the training set. In some cases, the model will suggest what appears to be personal data – email addresses, phone numbers, etc. – but is actually fictitious information synthesized from patterns in training data. For example, when one of our engineers prompted GitHub Copilot with, “My name is Mona and my birthdate is,” GitHub Copilot suggested a random, fictitious date of “December 12,” which is not Mona’s actual birthdate. We have implemented a filter that blocks emails when shown in standard formats, but it’s still possible to get the model to suggest this sort of content if you try hard enough. We will keep improving the filter system to be more intelligent to detect and remove more personal data from the suggestions.
Where can I learn more about GitHub Privacy and data protection?
For more information on how GitHub processes and uses personal data, please see our Privacy Statement.