Features

Actions Packages Security Codespaces Copilot Code review Issues Discussions

Your AI pair programmer

GitHub Copilot uses the OpenAI Codex to suggest code and entire functions in real-time, right from your editor.

#!/usr/bin/env ts-node

import { fetch } from "fetch-h2";

// Determine whether the sentiment of text is positive

// Use a web service

async function isPositive(text: string): Promise<boolean> {

  const response = await fetch(`http://text-processing.com/api/sentiment/`, {

    method: "POST",

    body: `text=${text}`,

    headers: {

      "Content-Type": "application/x-www-form-urlencoded",

},

});

  const json = await response.json();

  return json.label === "pos";

Copilot

package main

type CategorySummary struct {

    Title       string

    Tasks       int

    AvgValue    float64

func createTables(db *sql.DB) {

    db.Exec("CREATE TABLE tasks (id INTEGER PRIMARY KEY, title TEXT, value INTEGER, category TEXT)")

func createCategorySummaries(db *sql.DB) ([]CategorySummary, error) {

    var summaries []CategorySummary

    rows, err := db.Query("SELECT category, COUNT(category), AVG(value) FROM tasks GROUP BY category")

    if err != nil {

        return nil, err

    defer rows.Close()

    for rows.Next() {

        var summary CategorySummary

        err := rows.Scan(&summary.Title, &summary.Tasks, &summary.AvgValue)

        if err != nil {

            return nil, err

        summaries = append(summaries, summary)

    return summaries, nil

Copilot

import datetime

def parse_expenses(expenses_string):

    """Parse the list of expenses and return the list of triples (date, value, currency).

    Ignore lines starting with #.

    Parse the date using datetime.

    Example expenses_string:

        2016-01-02 -34.01 USD

        2016-01-03 2.59 DKK

        2016-01-03 -2.72 EUR

"""

    expenses = []

    for line in expenses_string.splitlines():

        if line.startswith("#"):

            continue

        date, value, currency = line.split(" ")

        expenses.append((datetime.datetime.strptime(date, "%Y-%m-%d"),

                        float(value),

                        currency))

    return expenses

Copilot

class CreateShippingAddresses < ActiveRecord::Migration

  def change

    create_table :shipping_addresses do |t|

      t.string :name

      t.string :address

      t.string :city

      t.string :zip

      t.string :state

      t.string :phone

      t.string :email

      t.references :shipping_method, index: true, foreign_key: true

      t.references :customer, index: true, foreign_key: true

      t.timestamps null: false

end

end

end

Copilot

Trained on billions of lines of code, GitHub Copilot turns natural language prompts into coding suggestions across dozens of languages.

Focus on solving bigger problems

Spend less time creating boilerplate and repetitive code patterns, and more time on what matters: building great software. Write a comment describing the logic you want and GitHub Copilot will immediately suggest code to implement the solution.

package main

type Run struct {

    Time int // in milliseconds

    Results string

    Failed bool

// Get average runtime of successful runs in seconds

func averageRuntimeInSeconds(runs []Run) float64 {

    var totalTime int

    var failedRuns int

    for _, run := range runs {

        if run.Failed {

            failedRuns++

        } else {

            totalTime += run.Time

    averageRuntime := float64(totalTime) / float64(len(runs) - failedRuns) / 1000

    return averageRuntime

Copilot

class Course < ApplicationRecord

  has_many :enrollments, dependent: :destroy

  has_many :students, through: :enrollments, source: :user

  has_many :teachers, through: :enrollments, source: :user

  has_many :assignments, dependent: :destroy

  has_many :submissions, through: :assignments

  validates :name, presence: true

  validates :start_date, presence: true

  validates :end_date, presence: true

  validates :term, presence: true

  validates :year, presence: true

  def self.find_by_name(name)

    Course.find_by(name: name)

end

end

Copilot

const seconds = 3600

const minutes = seconds / 60

const hours = minutes / 60

const days = hours / 24

const weeks = days / 7

const months = days / 30

const years = months / 12

Copilot

import static org.junit.Assert.*;

import org.junit.Test;

public class IsPrimeTest {

  // Math.isPrime(int) returns whether the given number is prime or not

  @Test

  public void testIsPrime() {

    assertTrue(Math.isPrime(2));

    assertTrue(Math.isPrime(3));

    assertTrue(Math.isPrime(5));

    assertTrue(Math.isPrime(7));

    assertTrue(Math.isPrime(11));

    assertTrue(Math.isPrime(13));

    assertTrue(Math.isPrime(17));

    assertTrue(Math.isPrime(19));

    assertTrue(Math.isPrime(23));

    assertTrue(Math.isPrime(29));

Copilot

Get AI-based suggestions, just for you

GitHub Copilot shares recommendations based on the project's context and style conventions. Quickly cycle through lines of code, complete function suggestions, and decide which to accept, reject, or edit.

Visual Studio Code

def max_sum_slice(xs):

  max_ending = max_so_far = 0

  for x in xs:

      max_ending = max(0, max_ending + x)

      max_so_far = max(max_so_far, max_ending)

  return max_so_far

Copilot

Visual Studio Code

def max_sum_slice(xs):

  if not xs:

      return 0

  max_ending = max_slice = 0

  for x in xs:

      max_ending = max(0, max_ending + x)

      max_slice = max(max_slice, max_ending)

  return max_slice

Copilot

Visual Studio Code

def max_sum_slice(xs):

  max_ending = max_so_far = 0

  begin = end = 0

  for i, x in enumerate(xs):

      if max_ending + x > 0:

          max_ending += x

      else:

          max_ending = 0

          begin = i

      if max_ending > max_so_far:

          max_so_far = max_ending

          end = i

  return begin, end, max_so_far

Copilot

Visual Studio Code

def max_sum_slice(xs):

  if not xs:

      return 0

  current_max = 0

  max_so_far = 0

  for x in xs:

      current_max = max(0, current_max + x)

      max_so_far = max(max_so_far, current_max)

  return max_so_far

Copilot

Visual Studio Code

def max_sum_slice(xs):

  max_sum = 0

  max_sum_so_far = 0

  start_index = 0

  end_index = 0

  for i in range(len(xs)):

      max_sum_so_far += xs[i]

      if max_sum_so_far > max_sum:

          max_sum = max_sum_so_far

          start_index = i

          end_index = i

      elif max_sum_so_far < 0:

          max_sum_so_far = 0

          start_index = i

      else:

          end_index = i

  return (start_index, end_index, max_sum)

Copilot

Visual Studio Code

def max_sum_slice(xs):

  if len(xs) == 0:

      return 0

  max_sum = 0

  max_sum_ending_here = 0

  max_ending_here = 0

  for x in xs:

      max_ending_here = max(0, max_ending_here + x)

      max_sum_ending_here = max(max_sum_ending_here, max_ending_here)

      max_sum = max(max_sum, max_sum_ending_here)

  return max_sum

Copilot

Visual Studio Code

def max_sum_slice(xs):

  best_sum, best_start, best_end = 0, None, None

  for i, x in enumerate(xs):

      if best_start is None or best_sum < x + best_sum:

          best_sum = x + best_sum

          best_start = i

          best_end = i

      elif best_sum > x + best_sum:

          best_sum = x + best_sum

          best_end = i

  return best_sum, best_start, best_end

Copilot

Visual Studio Code

def max_sum_slice(xs):

"""

  Return a tuple (i,j) where i is the index of the first slice with a max value,

  and j is the index of the slice whose sum is the max

"""

  if len(xs) == 0:

      return None

  elif len(xs) == 1:

      return (0,0)

  elif len(xs) == 2:

      return (0,1) if xs[0] >= xs[1] else (1,0)

  else:

      return max_sum_slice_helper(xs,0,len(xs)-1)

Copilot

Keep flying with your favorite editor

GitHub Copilot integrates directly into your editor including Neovim, JetBrains IDEs, Visual Studio, and Visual Studio Code—and is fast enough to use as you type.

Code confidently in unfamiliar territory

Whether you’re working in a new language or framework, or just learning to code, GitHub Copilot can help you find your way. Tackle a bug, or learn how to use a new framework without spending most of your time spelunking through the docs or searching the web.

const token = process.env["TWITTER_BEARER_TOKEN"]

const fetchTweetsFromUser = async (screenName, count) => {

  const response = await fetch(

    `https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=${screenName}&count=${count}`,

      headers: {

        Authorization: `Bearer ${token}`,

},

  const json = await response.json()

  return json

Copilot

import tweepy, os # secrets in environment variables

def fetch_tweets_from_user(user_name):

    # authentification

    auth = tweepy.OAuthHandler(os.environ['TWITTER_KEY'], os.environ['TWITTER_SECRET'])

    auth.set_access_token(os.environ['TWITTER_TOKEN'], os.environ['TWITTER_TOKEN_SECRET'])

    api = tweepy.API(auth)

    # fetch tweets

    tweets = api.user_timeline(screen_name=user, count=200, include_rts=False)

    return tweets

Copilot

require 'twitter'

def fetch_tweets_from_user(handle)

  twitter = Twitter::REST::Client.new do |config|

    config.consumer_key        = ENV['TWITTER_CONSUMER_KEY']

    config.consumer_secret     = ENV['TWITTER_CONSUMER_SECRET']

    config.access_token        = ENV['TWITTER_ACCESS_TOKEN']

    config.access_token_secret = ENV['TWITTER_ACCESS_TOKEN_SECRET']

end

  tweets = twitter.user_timeline(handle)

  tweets

end

Copilot

const token = process.env["TWITTER_BEARER_TOKEN"]

const fetchTweetsFromUser = (userName: string) => {

  const url = `https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=${userName}&count=20`

  return fetch(url, {

    headers: {

      "Authorization": `Bearer ${token}`

})

    .then(res => res.json())

    .then(tweets => tweets.map(tweet => ({

      id: tweet.id,

      text: tweet.text,

      created_at: tweet.created_at,

      user: {

        id: tweet.user.id,

        name: tweet.user.name,

        screen_name: tweet.user.screen_name,

        profile_image_url: tweet.user.profile_image_url

    })))

Copilot

package main

var apiKey = os.Getenv("TWITTER_BEARER_TOKEN")

type Tweet = struct{ Text string }

func fetchTweetsFromUser(user string) ([]Tweet, error) {

    url := "https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=" + user + "&count=200"

    req, err := http.NewRequest("GET", url, nil)

    if err != nil {

        return nil, err

    req.Header.Set("Authorization", "Bearer "+apiKey)

    resp, err := http.DefaultClient.Do(req)

    if err != nil {

        return nil, err

    defer resp.Body.Close()

    if resp.StatusCode != 200 {

        return nil, fmt.Errorf("bad status: %d", resp.StatusCode)

    var tweets []Tweet

    if err := json.NewDecoder(resp.Body).Decode(&tweets); err != nil {

        return nil, err

    return tweets, nil

Copilot

import d3 from "d3"

const drawScatterplot = (data, height, width) => {

  const svg = d3.select("#scatterplot")

    .append("svg")

    .attr("height", height)

    .attr("width", width)

  svg.selectAll("circle")

    .data(data)

    .enter()

    .append("circle")

    .attr("cx", d => d.x)

    .attr("cy", d => d.y)

    .attr("r", 5)

Copilot

import matplotlib.pyplot as plt

def draw_scatterplot(x_values, y_values):

    plt.scatter(x_values, y_values, s=20)

    plt.title("Scatter Plot")

    plt.xlabel("x values")

    plt.ylabel("y values")

    plt.show()

Copilot

require 'gruff'

def draw_scatterplot(x_values, y_values)

  g = Gruff::Scatter.new(400)

  g.title = "GCD"

  g.x_axis_label = "X"

  g.y_axis_label = "Y"

  g.data('data', x_values, y_values)

  g.write("plot.png")

end

Copilot

import d3 from "d3"

const drawScatterplot = (

  data: Array<{ x: number; y: number; size: number }>,

  x: d3.ScaleLinear<number, number>,

  y: d3.ScaleLinear<number, number>,

  svg: d3.Selection<SVGGElement, unknown, HTMLElement, any>,

  margin: { top: number; bottom: number; left: number; right: number }

) => {

  const width = svg.attr("width") - margin.left - margin.right

  const height = svg.attr("height") - margin.top - margin.bottom

  svg.selectAll("*").remove()

  const g = svg.append("g")

    .attr("transform", `translate(${margin.left}, ${margin.top})`)

  const xAxis = g

    .append("g")

    .attr("transform", `translate(0, ${height})`)

    .call(d3.axisBottom(x))

...

Copilot

const memoize = fn => {

  const cache = {};

  return (...args) => {

    const key = JSON.stringify(args);

    return (cache[key] = cache[key] || fn(...args));

};

Copilot

def memoize(func):

cache = {}

def wrapper(*args):

    if args in cache:

        return cache[args]

    else:

        cache[args] = func(*args)

        return cache[args]

return wrapper

Copilot

def memoize(func)

  cache = {}

  lambda { |*args| cache[args] ||= func.call(*args) }

end

Copilot

const memoize = (fn: Function): Function => {

  const cache: { [key: string]: any } = {};

  return (...args: any[]): any => {

    const key = JSON.stringify(args);

    if (cache[key]) {

      return cache[key];

    const result = fn(...args);

    cache[key] = result;

    return result;

};

Copilot

package main

  func Memoize(fn func(int) int) func(int) int {

      cache := make(map[int]int)

      return func(n int) int {

          if v, ok := cache[n]; ok {

              return v

          cache[n] = fn(n)

          return cache[n]

Copilot

const https = require('https')

const parseString = require('xml2js').parseString

const apiKey = process.env["GOODREADS_API_KEY"]

const getRating = title => {

  return new Promise((resolve, reject) => {

    https.get(`https://www.goodreads.com/book/title.xml?key=${apiKey}&title=${title}`, res => {

      let xml = ''

      res.setEncoding('utf8')

      res.on('data', chunk => {

        xml += chunk

})

      res.on('end', () => {

        parseString(xml, (err, result) => {

          if (err) {

            reject(err)

          } else {

            resolve(result.GoodreadsResponse.book[0].average_rating)

})

})

})

})

Copilot

import os, requests, json, bs4

key = os.environ['GOODREADS_API_KEY']

def get_rating(title) :

    """Get the average rating of the book from GoodReads, and return a float."""

    url = 'https://www.goodreads.com/book/title.xml?key=' + key + '&title=' + title

    response = requests.get(url)

    soup = bs4.BeautifulSoup(response.text, 'xml')

    rating = soup.find('average_rating').text

    return float(rating)

Copilot

require 'goodreads'

def get_rating_of_book_by_title(title)

  client = Goodreads::Client.new(

    api_key: "YOUR_API_KEY",

    api_secret: "YOUR_API_SECRET"

  book = client.book(title)

  book.rating

end

Copilot

const apiKey = process.env["GOODREADS_API_KEY"]

const getRating = (title: string) => {

  return fetch(`https://www.goodreads.com/book/title.xml?key=${apiKey}&title=${title}`)

    .then(res => res.text())

    .then(res => {

      const parser = new DOMParser();

      const xml = parser.parseFromString(res, "text/xml");

      const rating = xml.getElementsByTagName("average_rating")[0].textContent;

      return rating;

})

    .catch(error => console.log(error));

Copilot

package main

import ("log"; "net/http"; "net/url"; "os"; "strconv"; "github.com/beevik/etree")

var (

    apiKey = os.Getenv("GOODREADS_API_KEY")

// GetRating gets the average rating of a book using the Goodreads API.

func GetRating(title string) (float64, error) {

    u := url.URL{

        Scheme: "https",

        Host:   "www.goodreads.com",

        Path:   "/book/title.xml",

        RawQuery: url.Values{

            "key":   {apiKey},

            "title": {title},

    }.Encode(),

  resp, err := http.Get(u.String())

  if err != nil {

      return 0, err

  defer resp.Body.Close()

  doc := etree.NewDocument()

  if _, err := doc.ReadFrom(resp.Body); err != nil {

      return 0, err

  ratings := doc.FindElements("//average_rating")

  if len(ratings) == 0 {

      return 0, nil

  rating, err := strconv.ParseFloat(ratings[0].Text(), 64)

  if err != nil {

      return 0, err

  return rating, nil

Copilot

Flight Reports

Hundreds of engineers, including our own, use GitHub Copilot every day.

This is the single most mind-blowing application of machine learning I’ve ever seen.

Mike Krieger // Co-founder, Instagram

”

GitHub Copilot works shockingly well. I will never develop software without it again.

Lars Gyrup Brink Nielsen

”

I was stunned when I started writing Clojure with GitHub Copilot and it filled an idiomatic namespace require, just like I was going to write it.

Gunnika Batra // Senior Analyst

”

Trying to code in an unfamiliar language by googling everything is like navigating a foreign country with just a phrasebook. Using GitHub Copilot is like hiring an interpreter.

Harri Edwards // Open AI

Don't fly solo

Enjoy a 60-day free trial then $10/month or $100/year per user

Start my free trial Explore docs

Learn about GitHub Copilot Terms and Conditions

Coming later this year

GitHub Copilot for companies

Join the waitlist

Frequently asked questions

General

What is GitHub Copilot?

GitHub Copilot is an AI pair programmer that helps you write code faster and with less work. It draws context from comments and code to suggest individual lines and whole functions instantly. GitHub Copilot is powered by Codex, a generative pretrained language model created by OpenAI. It is available as an extension for Visual Studio Code, Visual Studio, Neovim, and the JetBrains suite of integrated development environments (IDEs).

GitHub Copilot is not intended for non-coding tasks like data generation and natural language generation, like question & answering. Your use of GitHub Copilot is subject to the GitHub Terms for Additional Product and Features.

How does GitHub Copilot work?

OpenAI Codex was trained on publicly available source code and natural language, so it works for both programming and human languages. The GitHub Copilot extension sends your comments and code to the GitHub Copilot service, and it relies on context, as described in Privacy below - i.e., file content both in the file you are editing, as well as neighboring or related files within a project. It may also collect the URLs of repositories or file paths to identify relevant context. The comments and code along with context are then used by OpenAI Codex to synthesize and suggest individual lines and whole functions.

What data has GitHub Copilot been trained on?

GitHub Copilot is powered by Codex, a generative pretrained AI model created by OpenAI. It has been trained on natural language text and source code from publicly available sources, including code in public repositories on GitHub.

Does GitHub Copilot write perfect code?

In a recent evaluation, we found that users accepted on average 26% of all completions shown by GitHub Copilot. We also found that on average more than 27% of developers’ code files were generated by GitHub Copilot, and in certain languages like Python that goes up to 40%. However, GitHub Copilot does not write perfect code. It is designed to generate the best code possible given the context it has access to, but it doesn’t test the code it suggests so the code may not always work, or even make sense. GitHub Copilot can only hold a very limited context, so it may not make use of helpful functions defined elsewhere in your project or even in the same file. And it may suggest old or deprecated uses of libraries and languages. When converting comments written in non-English to code, there may be performance disparities when compared to English. For suggested code, certain languages like Python, JavaScript, TypeScript, and Go might perform better compared to other programming languages.

Like any other code, code suggested by GitHub Copilot should be carefully tested, reviewed, and vetted. As the developer, you are always in charge.

Will GitHub Copilot help me write code for a new platform?

GitHub Copilot is trained on public code. When a new library, framework, or API is released, there is less public code available for the model to learn from. That reduces GitHub Copilot’s ability to provide suggestions for the new codebase. As more examples enter the public space, we integrate them into the training set and suggestion relevance improves. In the future, we will provide ways to highlight newer APIs and samples to raise their relevance in GitHub Copilot’s suggestions.

How do I get the most out of GitHub Copilot?

GitHub Copilot works best when you divide your code into small functions, use meaningful names for functions parameters, and write good docstrings and comments as you go. It also seems to do best when it’s helping you navigate unfamiliar libraries or frameworks.

How can I contribute?

By using GitHub Copilot and sharing your feedback in the feedback forum, you help to improve GitHub Copilot. Please also report incidents (e.g., offensive output, code vulnerabilities, apparent personal information in code generation) directly to copilot-safety@github.com so that we can improve our safeguards. GitHub takes safety and security very seriously and we are committed to continually improving.

Human oversight

Can GitHub Copilot introduce insecure code in its suggestions?

Public code may contain insecure coding patterns, bugs, or references to outdated APIs or idioms. When GitHub Copilot synthesizes code suggestions based on this data, it can also synthesize code that contains these undesirable patterns. This is something we care a lot about at GitHub, and in recent years we’ve provided tools such as GitHub Actions, Dependabot, and CodeQL to open source projects to help improve code quality. Of course, you should always use GitHub Copilot together with good testing and code review practices and security tools, as well as your own judgment.

Does GitHub own the code generated by GitHub Copilot?

GitHub Copilot is a tool, like a compiler or a pen. GitHub does not own the suggestions GitHub Copilot generates. The code you write with GitHub Copilot’s help belongs to you, and you are responsible for it. We recommend that you carefully test, review, and vet the code before pushing it to production, as you would with any code you write that incorporates material you did not independently originate.

Does GitHub Copilot recite code from the training set?

The vast majority of the code that GitHub Copilot suggests has never been seen before. Our latest internal research shows that about 1% of the time, a suggestion may contain some code snippets longer than ~150 characters that matches the training set. Previous research showed that many of these cases happen when GitHub Copilot is unable to glean sufficient context from the code you are writing, or when there is a common, perhaps even universal, solution to the problem.

What can I do to reduce GitHub Copilot’s suggestion of code that matches public code?

We built a filter to help detect and suppress the rare instances where a GitHub Copilot suggestion contains code that matches public code on GitHub. You have the choice to turn that filter on or off during setup. With the filter on, GitHub Copilot checks code suggestions with its surrounding code for matches or near matches (ignoring whitespace) against public code on GitHub of about 150 characters. If there is a match, the suggestion will not be shown to you. We plan on continuing to evolve this approach and welcome feedback and comment.

Other than the filter, what other measures can I take to assess code suggested by GitHub Copilot?

You should take the same precautions as you would with any code you write that uses material you did not independently originate. These include rigorous testing, IP scanning, and checking for security vulnerabilities. You should make sure your IDE or editor does not automatically compile or run generated code before you review it.

Fairness and broader impact

Will different people experience different quality of service from GitHub Copilot?

Given public sources are predominantly in English, GitHub Copilot will likely work less well in scenarios where natural language prompts provided by the developer are not in English and/or are grammatically incorrect. Therefore, non-English speakers might experience a lower quality of service.

Additionally, inexperienced developers may struggle to use GitHub Copilot to effectively generate code, and their lack of experience might inhibit their capability to effectively review and edit suggestions made by GitHub Copilot. Finally, we are conducting internal testing of GitHub Copilot’s ease of use by developers with disabilities and working to ensure that GitHub Copilot is accessible to all developers. Please feel free to share your feedback on GitHub Copilot accessibility in our feedback forum.

We acknowledge that fairness and inclusivity in code generation systems are important emerging research areas. We are working with experts, including Microsoft’s Office of Responsible AI, in an effort to advance GitHub Copilot’s responsible AI practices. We will also review new research and learn from feedback we receive to improve GitHub Copilot such that it is usable by a wide range of developers and provides similar quality of service to people with different backgrounds.

Does GitHub Copilot produce offensive outputs?

GitHub Copilot includes filters to block offensive language in the prompts and to avoid synthesizing suggestions in sensitive contexts. We continue to work on improving the filter system to more intelligently detect and remove offensive outputs. However, due to the novel space of code safety, GitHub Copilot may sometimes produce undesired output. If you see offensive outputs, please report them directly to copilot-safety@github.com so that we can improve our safeguards. GitHub takes this challenge very seriously and we are committed to addressing it.

How will advanced code generation tools like GitHub Copilot affect developer jobs?

Bringing in more intelligent systems has the potential to bring enormous change to the developer experience. We do not expect GitHub Copilot to replace developers. Rather, we expect GitHub Copilot to partner with developers, augment their capabilities, and enable them to be more productive, reduce manual tasks, and help them focus on interesting work. We also believe that GitHub Copilot has the potential to lower barriers to entry, enabling more people to explore software development, and join the next generation of developers. We are working to test these hypotheses with both internal and external research.

Privacy

How can I control the use of my data collected by Copilot?

GitHub Copilot gives you certain choices about how it uses the data it collects. User engagement data, including pseudonymous identifiers and general usage data, is required for the use of GitHub Copilot and will continue to be collected, processed, and shared with Microsoft and OpenAI as you use GitHub Copilot. You can choose whether your code snippets are collected and retained by GitHub and further processed and shared with Microsoft and OpenAI by adjusting your user settings. Additional information about the types of telemetry collected and processed by GitHub Copilot can be found in What data does GitHub Copilot collect? below.

You can also request deletion of GitHub Copilot data associated with your GitHub identity by filling out a support ticket. Please note that future data collection will occur with continued use of GitHub Copilot, but you can control whether your code snippets are collected, processed, and retained in telemetry in your Copilot user settings.

What data does GitHub Copilot collect?

GitHub Copilot relies on file content and additional data to work. It collects data both to provide the service and saves some of the data to perform further analysis and enable improvements. Please see below for more details on how your telemetry data is used and shared.

User Engagement Data

When you use GitHub Copilot it will collect usage information about events generated when interacting with the IDE or editor. These events include user edit actions like completions accepted and dismissed, and error and general usage data to identify metrics like latency and features engagement. This information may include personal data, such as pseudonymous identifiers.

Code Snippets Data

Depending on your preferred telemetry settings, GitHub Copilot may also collect and retain the following, collectively referred to as “code snippets”: source code that you are editing, related files and other files open in the same IDE or editor, URLs of repositories and files paths.

How is GitHub Copilot telemetry data used and shared?

Telemetry including code snippets, as detailed in What data does GitHub Copilot collect?, are used by GitHub, Microsoft, and OpenAI to improve GitHub Copilot and related services and to conduct product and academic research about developers.

Telemetry uses may include:

Directly improving GitHub Copilot, including assessing different strategies in processing and predicting which suggestions users may find helpful
Developing and improving closely related developer products and services from GitHub, Microsoft, and OpenAI
Investigating and detecting potential abuse of GitHub Copilot
Conducting experiments and research related to developers and their use of developer tools and services
Evaluating GitHub Copilot, e.g., by measuring the positive impact it has on the user
Improving the underlying code generation models, e.g., by providing positive and negative examples
Fine tuning ranking and sorting algorithms and prompt crafting

When processing code snippets, we take the protection measures described below in How is the transmitted data protected? and follow responsible practices in accordance with our Privacy Statement so that the use of your telemetry data to improve these models does not result in this data being shared with other GitHub Copilot users.

How is the transmitted data protected?

We know that user edit actions, source code snippets, and URLs of repositories and file paths are sensitive data. Consequently, several measures of protection are applied, including:

The transmitted data is encrypted in transit and at rest
Access is strictly controlled. The data can only be accessed by (1) named GitHub personnel working on the GitHub Copilot team or on the GitHub platform health team, (2) Microsoft personnel working on or with the GitHub Copilot team, and (3) OpenAI personnel who work on GitHub Copilot
Role-based access controls and multi-factor authentication are required for personnel accessing code snippet data

Will my private code be shared with other users?

No. We use data, including information about which suggestions users accept or reject, to improve the model. We follow responsible practices in accordance with our Privacy Statement to ensure that your code snippets will not be used as suggested code for other users of GitHub Copilot.

Does GitHub Copilot ever output personal data?

Because GitHub Copilot was trained on publicly available code, its training set included public personal data included in that code. From our internal testing, we found it to be rare that GitHub Copilot suggestions included personal data verbatim from the training set. In some cases, the model will suggest what appears to be personal data – email addresses, phone numbers, etc. – but is actually fictitious information synthesized from patterns in training data. For example, when one of our engineers prompted GitHub Copilot with, “My name is Mona and my birthdate is,” GitHub Copilot suggested a random, fictitious date of “December 12,” which is not Mona’s actual birthdate. We have implemented a filter that blocks emails when shown in standard formats, but it’s still possible to get the model to suggest this sort of content if you try hard enough. We will keep improving the filter system to be more intelligent to detect and remove more personal data from the suggestions.

Where can I learn more about GitHub Privacy and data protection?

For more information on how GitHub processes and uses personal data, please see our Privacy Statement.